refactor: use global reqwest client
parent
df43adcd35
commit
084660ab46
|
@ -775,36 +775,6 @@ dependencies = [
|
|||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "curl"
|
||||
version = "0.4.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "509bd11746c7ac09ebd19f0b17782eae80aadee26237658a6b4808afb5c11a22"
|
||||
dependencies = [
|
||||
"curl-sys",
|
||||
"libc",
|
||||
"openssl-probe",
|
||||
"openssl-sys",
|
||||
"schannel",
|
||||
"socket2",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "curl-sys"
|
||||
version = "0.4.61+curl-8.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14d05c10f541ae6f3bc5b3d923c20001f47db7d5f0b2bc6ad16490133842db79"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"libz-sys",
|
||||
"openssl-sys",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cxx"
|
||||
version = "1.0.94"
|
||||
|
@ -3503,7 +3473,6 @@ version = "1.5.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d261bbae112cb48a95d3cc9e8873a4e40933bc54ae8eddc1eef70e952dd3b232"
|
||||
dependencies = [
|
||||
"curl",
|
||||
"html5ever",
|
||||
"markup5ever_rcdom",
|
||||
"serde_json",
|
||||
|
|
|
@ -74,7 +74,7 @@ nonempty = "0.6.0"
|
|||
image = { version = "0.23.14", optional = true }
|
||||
webp = { version = "0.2.0", optional = true }
|
||||
|
||||
webpage = { version = "1.5.0", optional = true }
|
||||
webpage = { version = "1.5.0", optional = true, default-features = false}
|
||||
id3 = { version = "1.0.2", optional = true }
|
||||
kamadak-exif = { version = "0.5.4", optional = true }
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
use anyhow::{anyhow, Result};
|
||||
use shadow_rs::{is_debug, shadow};
|
||||
|
||||
shadow!(build);
|
||||
use lazy_static::lazy_static;
|
||||
use shadow_rs::is_debug;
|
||||
|
||||
pub fn get_static_dir<S: AsRef<str>>(dir: S) -> Result<std::path::PathBuf> {
|
||||
let cwd = std::env::current_exe()?.parent().unwrap().to_path_buf();
|
||||
|
@ -17,3 +16,16 @@ pub fn get_static_dir<S: AsRef<str>>(dir: S) -> Result<std::path::PathBuf> {
|
|||
Err(anyhow!("Path {result:?} doesn't exist."))
|
||||
}
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref APP_USER_AGENT: String = format!(
|
||||
"{} / {}",
|
||||
upend::common::build::PROJECT_NAME,
|
||||
upend::common::build::PKG_VERSION
|
||||
);
|
||||
|
||||
pub static ref REQWEST_CLIENT: reqwest::blocking::Client = reqwest::blocking::Client::builder()
|
||||
.user_agent(APP_USER_AGENT.as_str())
|
||||
.build()
|
||||
.unwrap();
|
||||
}
|
||||
|
|
|
@ -1,16 +1,16 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use super::Extractor;
|
||||
use crate::common::REQWEST_CLIENT;
|
||||
use anyhow::anyhow;
|
||||
use anyhow::Result;
|
||||
use upend::common::APP_USER_AGENT;
|
||||
use upend::{
|
||||
addressing::Address,
|
||||
database::{entry::Entry, stores::UpStore, UpEndConnection},
|
||||
util::jobs::{JobContainer, JobState},
|
||||
};
|
||||
|
||||
use webpage::{Webpage, WebpageOptions};
|
||||
use webpage::HTML;
|
||||
|
||||
pub struct WebExtractor;
|
||||
|
||||
|
@ -26,25 +26,21 @@ impl Extractor for WebExtractor {
|
|||
let mut job_handle =
|
||||
job_container.add_job(None, &format!("Getting info about {url:?}"))?;
|
||||
|
||||
let webpage_url = url.clone();
|
||||
let options = WebpageOptions {
|
||||
useragent: APP_USER_AGENT.to_string(),
|
||||
..WebpageOptions::default()
|
||||
};
|
||||
let webpage_get = Webpage::from_url(webpage_url.as_ref(), options);
|
||||
let response = REQWEST_CLIENT.get(url.clone()).send()?;
|
||||
let html = HTML::from_string(response.text()?, Some(url.to_string()));
|
||||
|
||||
if let Ok(webpage) = webpage_get {
|
||||
if let Ok(html) = html {
|
||||
let _ = job_handle.update_progress(50.0);
|
||||
|
||||
let mut entries = vec![
|
||||
webpage.html.title.map(|html_title| Entry {
|
||||
html.title.map(|html_title| Entry {
|
||||
entity: address.clone(),
|
||||
attribute: "HTML_TITLE".to_string(),
|
||||
value: html_title.into(),
|
||||
provenance: "SYSTEM EXTRACTOR".to_string(),
|
||||
timestamp: chrono::Utc::now().naive_utc(),
|
||||
}),
|
||||
webpage.html.description.map(|html_desc| Entry {
|
||||
html.description.map(|html_desc| Entry {
|
||||
entity: address.clone(),
|
||||
attribute: "HTML_DESCRIPTION".to_string(),
|
||||
value: html_desc.into(),
|
||||
|
@ -52,7 +48,7 @@ impl Extractor for WebExtractor {
|
|||
timestamp: chrono::Utc::now().naive_utc(),
|
||||
}),
|
||||
];
|
||||
for (key, value) in webpage.html.opengraph.properties {
|
||||
for (key, value) in html.opengraph.properties {
|
||||
entries.push(Some(Entry {
|
||||
entity: address.clone(),
|
||||
attribute: format!("OG_{}", key.to_uppercase()),
|
||||
|
@ -61,7 +57,7 @@ impl Extractor for WebExtractor {
|
|||
timestamp: chrono::Utc::now().naive_utc(),
|
||||
}))
|
||||
}
|
||||
for image in webpage.html.opengraph.images {
|
||||
for image in html.opengraph.images {
|
||||
entries.push(Some(Entry {
|
||||
entity: address.clone(),
|
||||
attribute: "OG_IMAGE".to_string(),
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#[macro_use]
|
||||
extern crate upend;
|
||||
use crate::common::{get_static_dir, REQWEST_CLIENT};
|
||||
use actix_cors::Cors;
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{middleware, App, HttpServer};
|
||||
|
@ -20,12 +21,11 @@ use tracing::trace;
|
|||
use tracing::{debug, error, info, warn};
|
||||
use tracing_subscriber::filter::{EnvFilter, LevelFilter};
|
||||
use upend::addressing::Address;
|
||||
use upend::common::APP_USER_AGENT;
|
||||
use upend::database::entry::EntryValue;
|
||||
use upend::util::hash::hash;
|
||||
|
||||
use upend::{
|
||||
common::{build, get_static_dir},
|
||||
common::build,
|
||||
config::UpEndConfig,
|
||||
database::{
|
||||
stores::{fs::FsStore, UpStore},
|
||||
|
@ -36,6 +36,7 @@ use upend::{
|
|||
|
||||
use crate::util::exec::block_background;
|
||||
|
||||
mod common;
|
||||
mod routes;
|
||||
mod util;
|
||||
|
||||
|
@ -200,10 +201,7 @@ async fn main() -> Result<()> {
|
|||
let api_url = url.join("/api/query")?;
|
||||
|
||||
debug!("Querying \"{}\"", api_url);
|
||||
let client = reqwest::blocking::Client::builder()
|
||||
.user_agent(APP_USER_AGENT.as_str())
|
||||
.build()?;
|
||||
let response = client.post(api_url).body(query).send()?;
|
||||
let response = REQWEST_CLIENT.post(api_url).body(query).send()?;
|
||||
|
||||
response.error_for_status_ref()?;
|
||||
|
||||
|
@ -254,10 +252,7 @@ async fn main() -> Result<()> {
|
|||
});
|
||||
|
||||
debug!("Inserting {:?} at \"{}\"", body, api_url);
|
||||
let client = reqwest::blocking::Client::builder()
|
||||
.user_agent(APP_USER_AGENT.as_str())
|
||||
.build()?;
|
||||
let response = client.put(api_url).json(&body).send()?;
|
||||
let response = REQWEST_CLIENT.put(api_url).json(&body).send()?;
|
||||
|
||||
match response.error_for_status_ref() {
|
||||
Ok(_) => {
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use crate::common::REQWEST_CLIENT;
|
||||
use crate::extractors;
|
||||
use crate::previews::PreviewStore;
|
||||
use crate::util::exec::block_background;
|
||||
|
@ -27,7 +28,7 @@ use std::time::{SystemTime, UNIX_EPOCH};
|
|||
use tempfile::NamedTempFile;
|
||||
use tracing::{debug, info, trace};
|
||||
use upend::addressing::{Address, Addressable};
|
||||
use upend::common::{build, APP_USER_AGENT};
|
||||
use upend::common::build;
|
||||
use upend::config::UpEndConfig;
|
||||
use upend::database::constants::{ADDED_ATTR, LABEL_ATTR};
|
||||
use upend::database::entry::{Entry, EntryValue, InvariantEntry};
|
||||
|
@ -856,22 +857,12 @@ pub async fn get_info(state: web::Data<State>) -> Result<HttpResponse, Error> {
|
|||
}
|
||||
|
||||
const MAX_EXTERNAL_SIZE: usize = 128_000_000;
|
||||
|
||||
#[tracing::instrument(skip(url), fields(url=%url))]
|
||||
async fn fetch_external(url: Url) -> Result<(bytes::Bytes, Option<String>), actix_web::Error> {
|
||||
let client = reqwest::Client::builder()
|
||||
.user_agent(APP_USER_AGENT.as_str())
|
||||
.build()
|
||||
.map_err(ErrorInternalServerError)?;
|
||||
|
||||
debug!("Fetching...");
|
||||
|
||||
let response = client
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(ErrorInternalServerError)?
|
||||
.error_for_status()
|
||||
let response = web::block(|| REQWEST_CLIENT.get(url).send())
|
||||
.await?
|
||||
.map_err(ErrorInternalServerError)?;
|
||||
|
||||
if let Some(content_length) = response.headers().get(reqwest::header::CONTENT_LENGTH) {
|
||||
|
@ -899,7 +890,7 @@ async fn fetch_external(url: Url) -> Result<(bytes::Bytes, Option<String>), acti
|
|||
.and_then(|cd| cd.get_filename().map(String::from));
|
||||
debug!("Got filename: {filename:?}");
|
||||
|
||||
let bytes = response.bytes().await.map_err(ErrorInternalServerError)?;
|
||||
let bytes = response.bytes().map_err(ErrorInternalServerError)?;
|
||||
debug!("Got {} bytes.", bytes.len());
|
||||
|
||||
Ok((bytes, filename))
|
||||
|
|
|
@ -1,23 +1,3 @@
|
|||
use anyhow::{anyhow, Result};
|
||||
use shadow_rs::{is_debug, shadow};
|
||||
use shadow_rs::shadow;
|
||||
|
||||
shadow!(build);
|
||||
|
||||
pub fn get_static_dir<S: AsRef<str>>(dir: S) -> Result<std::path::PathBuf> {
|
||||
let cwd = std::env::current_exe()?.parent().unwrap().to_path_buf();
|
||||
let base_path = if is_debug() {
|
||||
cwd.join("../../tmp/static")
|
||||
} else {
|
||||
cwd
|
||||
};
|
||||
let result = base_path.join(dir.as_ref());
|
||||
if result.exists() {
|
||||
Ok(result)
|
||||
} else {
|
||||
Err(anyhow!("Path {result:?} doesn't exist."))
|
||||
}
|
||||
}
|
||||
lazy_static! {
|
||||
pub static ref APP_USER_AGENT: String =
|
||||
format!("{} / {}", build::PROJECT_NAME, build::PKG_VERSION);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue