refactor: use global reqwest client

feat/type-attributes
Tomáš Mládek 2023-05-24 11:20:13 +02:00
parent df43adcd35
commit 084660ab46
7 changed files with 36 additions and 93 deletions

31
Cargo.lock generated
View File

@ -775,36 +775,6 @@ dependencies = [
"typenum",
]
[[package]]
name = "curl"
version = "0.4.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "509bd11746c7ac09ebd19f0b17782eae80aadee26237658a6b4808afb5c11a22"
dependencies = [
"curl-sys",
"libc",
"openssl-probe",
"openssl-sys",
"schannel",
"socket2",
"winapi",
]
[[package]]
name = "curl-sys"
version = "0.4.61+curl-8.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14d05c10f541ae6f3bc5b3d923c20001f47db7d5f0b2bc6ad16490133842db79"
dependencies = [
"cc",
"libc",
"libz-sys",
"openssl-sys",
"pkg-config",
"vcpkg",
"winapi",
]
[[package]]
name = "cxx"
version = "1.0.94"
@ -3503,7 +3473,6 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d261bbae112cb48a95d3cc9e8873a4e40933bc54ae8eddc1eef70e952dd3b232"
dependencies = [
"curl",
"html5ever",
"markup5ever_rcdom",
"serde_json",

View File

@ -74,7 +74,7 @@ nonempty = "0.6.0"
image = { version = "0.23.14", optional = true }
webp = { version = "0.2.0", optional = true }
webpage = { version = "1.5.0", optional = true }
webpage = { version = "1.5.0", optional = true, default-features = false}
id3 = { version = "1.0.2", optional = true }
kamadak-exif = { version = "0.5.4", optional = true }

View File

@ -1,7 +1,6 @@
use anyhow::{anyhow, Result};
use shadow_rs::{is_debug, shadow};
shadow!(build);
use lazy_static::lazy_static;
use shadow_rs::is_debug;
pub fn get_static_dir<S: AsRef<str>>(dir: S) -> Result<std::path::PathBuf> {
let cwd = std::env::current_exe()?.parent().unwrap().to_path_buf();
@ -17,3 +16,16 @@ pub fn get_static_dir<S: AsRef<str>>(dir: S) -> Result<std::path::PathBuf> {
Err(anyhow!("Path {result:?} doesn't exist."))
}
}
lazy_static! {
static ref APP_USER_AGENT: String = format!(
"{} / {}",
upend::common::build::PROJECT_NAME,
upend::common::build::PKG_VERSION
);
pub static ref REQWEST_CLIENT: reqwest::blocking::Client = reqwest::blocking::Client::builder()
.user_agent(APP_USER_AGENT.as_str())
.build()
.unwrap();
}

View File

@ -1,16 +1,16 @@
use std::sync::Arc;
use super::Extractor;
use crate::common::REQWEST_CLIENT;
use anyhow::anyhow;
use anyhow::Result;
use upend::common::APP_USER_AGENT;
use upend::{
addressing::Address,
database::{entry::Entry, stores::UpStore, UpEndConnection},
util::jobs::{JobContainer, JobState},
};
use webpage::{Webpage, WebpageOptions};
use webpage::HTML;
pub struct WebExtractor;
@ -26,25 +26,21 @@ impl Extractor for WebExtractor {
let mut job_handle =
job_container.add_job(None, &format!("Getting info about {url:?}"))?;
let webpage_url = url.clone();
let options = WebpageOptions {
useragent: APP_USER_AGENT.to_string(),
..WebpageOptions::default()
};
let webpage_get = Webpage::from_url(webpage_url.as_ref(), options);
let response = REQWEST_CLIENT.get(url.clone()).send()?;
let html = HTML::from_string(response.text()?, Some(url.to_string()));
if let Ok(webpage) = webpage_get {
if let Ok(html) = html {
let _ = job_handle.update_progress(50.0);
let mut entries = vec![
webpage.html.title.map(|html_title| Entry {
html.title.map(|html_title| Entry {
entity: address.clone(),
attribute: "HTML_TITLE".to_string(),
value: html_title.into(),
provenance: "SYSTEM EXTRACTOR".to_string(),
timestamp: chrono::Utc::now().naive_utc(),
}),
webpage.html.description.map(|html_desc| Entry {
html.description.map(|html_desc| Entry {
entity: address.clone(),
attribute: "HTML_DESCRIPTION".to_string(),
value: html_desc.into(),
@ -52,7 +48,7 @@ impl Extractor for WebExtractor {
timestamp: chrono::Utc::now().naive_utc(),
}),
];
for (key, value) in webpage.html.opengraph.properties {
for (key, value) in html.opengraph.properties {
entries.push(Some(Entry {
entity: address.clone(),
attribute: format!("OG_{}", key.to_uppercase()),
@ -61,7 +57,7 @@ impl Extractor for WebExtractor {
timestamp: chrono::Utc::now().naive_utc(),
}))
}
for image in webpage.html.opengraph.images {
for image in html.opengraph.images {
entries.push(Some(Entry {
entity: address.clone(),
attribute: "OG_IMAGE".to_string(),

View File

@ -1,5 +1,6 @@
#[macro_use]
extern crate upend;
use crate::common::{get_static_dir, REQWEST_CLIENT};
use actix_cors::Cors;
use actix_web::web::Data;
use actix_web::{middleware, App, HttpServer};
@ -20,12 +21,11 @@ use tracing::trace;
use tracing::{debug, error, info, warn};
use tracing_subscriber::filter::{EnvFilter, LevelFilter};
use upend::addressing::Address;
use upend::common::APP_USER_AGENT;
use upend::database::entry::EntryValue;
use upend::util::hash::hash;
use upend::{
common::{build, get_static_dir},
common::build,
config::UpEndConfig,
database::{
stores::{fs::FsStore, UpStore},
@ -36,6 +36,7 @@ use upend::{
use crate::util::exec::block_background;
mod common;
mod routes;
mod util;
@ -200,10 +201,7 @@ async fn main() -> Result<()> {
let api_url = url.join("/api/query")?;
debug!("Querying \"{}\"", api_url);
let client = reqwest::blocking::Client::builder()
.user_agent(APP_USER_AGENT.as_str())
.build()?;
let response = client.post(api_url).body(query).send()?;
let response = REQWEST_CLIENT.post(api_url).body(query).send()?;
response.error_for_status_ref()?;
@ -254,10 +252,7 @@ async fn main() -> Result<()> {
});
debug!("Inserting {:?} at \"{}\"", body, api_url);
let client = reqwest::blocking::Client::builder()
.user_agent(APP_USER_AGENT.as_str())
.build()?;
let response = client.put(api_url).json(&body).send()?;
let response = REQWEST_CLIENT.put(api_url).json(&body).send()?;
match response.error_for_status_ref() {
Ok(_) => {

View File

@ -1,3 +1,4 @@
use crate::common::REQWEST_CLIENT;
use crate::extractors;
use crate::previews::PreviewStore;
use crate::util::exec::block_background;
@ -27,7 +28,7 @@ use std::time::{SystemTime, UNIX_EPOCH};
use tempfile::NamedTempFile;
use tracing::{debug, info, trace};
use upend::addressing::{Address, Addressable};
use upend::common::{build, APP_USER_AGENT};
use upend::common::build;
use upend::config::UpEndConfig;
use upend::database::constants::{ADDED_ATTR, LABEL_ATTR};
use upend::database::entry::{Entry, EntryValue, InvariantEntry};
@ -856,22 +857,12 @@ pub async fn get_info(state: web::Data<State>) -> Result<HttpResponse, Error> {
}
const MAX_EXTERNAL_SIZE: usize = 128_000_000;
#[tracing::instrument(skip(url), fields(url=%url))]
async fn fetch_external(url: Url) -> Result<(bytes::Bytes, Option<String>), actix_web::Error> {
let client = reqwest::Client::builder()
.user_agent(APP_USER_AGENT.as_str())
.build()
.map_err(ErrorInternalServerError)?;
debug!("Fetching...");
let response = client
.get(url)
.send()
.await
.map_err(ErrorInternalServerError)?
.error_for_status()
let response = web::block(|| REQWEST_CLIENT.get(url).send())
.await?
.map_err(ErrorInternalServerError)?;
if let Some(content_length) = response.headers().get(reqwest::header::CONTENT_LENGTH) {
@ -899,7 +890,7 @@ async fn fetch_external(url: Url) -> Result<(bytes::Bytes, Option<String>), acti
.and_then(|cd| cd.get_filename().map(String::from));
debug!("Got filename: {filename:?}");
let bytes = response.bytes().await.map_err(ErrorInternalServerError)?;
let bytes = response.bytes().map_err(ErrorInternalServerError)?;
debug!("Got {} bytes.", bytes.len());
Ok((bytes, filename))

View File

@ -1,23 +1,3 @@
use anyhow::{anyhow, Result};
use shadow_rs::{is_debug, shadow};
use shadow_rs::shadow;
shadow!(build);
pub fn get_static_dir<S: AsRef<str>>(dir: S) -> Result<std::path::PathBuf> {
let cwd = std::env::current_exe()?.parent().unwrap().to_path_buf();
let base_path = if is_debug() {
cwd.join("../../tmp/static")
} else {
cwd
};
let result = base_path.join(dir.as_ref());
if result.exists() {
Ok(result)
} else {
Err(anyhow!("Path {result:?} doesn't exist."))
}
}
lazy_static! {
pub static ref APP_USER_AGENT: String =
format!("{} / {}", build::PROJECT_NAME, build::PKG_VERSION);
}