diff --git a/Cargo.lock b/Cargo.lock index 26559a9..44e5b4f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3880,6 +3880,7 @@ dependencies = [ "tracing", "tracing-subscriber", "tree_magic_mini", + "url", "uuid", "walkdir", ] @@ -3933,6 +3934,7 @@ dependencies = [ "tracing-subscriber", "tree_magic_mini", "upend", + "url", "uuid", "walkdir", "webbrowser", @@ -3949,6 +3951,7 @@ dependencies = [ "form_urlencoded", "idna 0.3.0", "percent-encoding", + "serde", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 0c1c535..a31e826 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,7 @@ multihash = { version = "*", default-features = false, features = [ "identity", ] } uuid = { version = "0.8", features = ["v4"] } +url = { version = "2", features = ["serde"] } filebuffer = "0.4.0" tempfile = "^3.2.0" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index a5a22ef..5bef1b9 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -80,6 +80,7 @@ kamadak-exif = { version = "0.5.4", optional = true } shadow-rs = "0.17" reqwest = { version = "0.11.16", features = ["blocking", "json"] } +url = "2" [build-dependencies] shadow-rs = "0.17" diff --git a/cli/src/extractors/web.rs b/cli/src/extractors/web.rs index 5c0038b..1a480a4 100644 --- a/cli/src/extractors/web.rs +++ b/cli/src/extractors/web.rs @@ -26,7 +26,7 @@ impl Extractor for WebExtractor { job_container.add_job(None, &format!("Getting info about {url:?}"))?; let webpage_url = url.clone(); - let webpage_get = Webpage::from_url(&webpage_url, WebpageOptions::default()); + let webpage_get = Webpage::from_url(webpage_url.as_ref(), WebpageOptions::default()); if let Ok(webpage) = webpage_get { let _ = job_handle.update_progress(50.0); @@ -91,6 +91,7 @@ mod test { use upend::database::stores::fs::FsStore; use upend::util::jobs::JobContainer; + use url::Url; use super::*; use anyhow::Result; @@ -106,7 +107,7 @@ mod test { Arc::new(Box::new(FsStore::from_path(&temp_dir)?) as Box); let job_container = JobContainer::new(); - let address = Address::Url("https://upend.dev".into()); + let address = Address::Url(Url::parse("https://upend.dev").unwrap()); assert!(WebExtractor.is_needed(&address, &connection)?); WebExtractor.insert_info(&address, &connection, store, job_container)?; diff --git a/cli/src/main.rs b/cli/src/main.rs index 9f69789..f46b21d 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -241,7 +241,7 @@ fn main() -> Result<()> { let entity = match entity { entity if entity.starts_with('=') => hash_path(&entity[1..])?.to_string(), - entity if entity.starts_with("http") => Address::Url(entity).to_string(), + entity if entity.starts_with("http") => Address::Url(entity.parse()?).to_string(), _ => entity, }; diff --git a/cli/src/routes.rs b/cli/src/routes.rs index d22e6d6..0d87667 100644 --- a/cli/src/routes.rs +++ b/cli/src/routes.rs @@ -35,6 +35,7 @@ use upend::database::stores::{Blob, UpStore}; use upend::database::UpEndDatabase; use upend::util::hash::{b58_decode, b58_encode}; use upend::util::jobs; +use url::Url; use uuid::Uuid; #[cfg(feature = "desktop")] @@ -312,7 +313,7 @@ pub async fn get_object( Address::Hash(_) => ("Hash", None), Address::Uuid(_) => ("Uuid", None), Address::Attribute(attribute) => ("Attribute", Some(attribute)), - Address::Url(url) => ("Url", Some(url)), + Address::Url(url) => ("Url", Some(url.to_string())), }; Ok(HttpResponse::Ok().json(json!({ @@ -342,7 +343,11 @@ impl TryInto
for InAddress { // TODO: make this automatically derive from `Address` definition match t.as_str() { "Attribute" => Address::Attribute(c.ok_or(anyhow!("Missing attribute."))?), - "Url" => Address::Url(c.ok_or(anyhow!("Missing URL."))?), + "Url" => Address::Url(if let Some(string) = c { + Url::parse(&string)? + } else { + Err(anyhow!("Missing URL."))? + }), "Uuid" => match c { Some(c) => c.parse()?, None => Address::Uuid(Uuid::new_v4()), @@ -641,6 +646,7 @@ pub async fn delete_object( #[derive(Deserialize)] pub struct GetAddressRequest { attribute: Option, + // url: Option, } #[get("/api/address")] @@ -820,7 +826,7 @@ mod tests { #[test] fn test_in_address() -> Result<()> { - let address = Address::Url("https://upend.dev".into()); + let address = Address::Url(Url::parse("https://upend.dev").unwrap()); let in_address = InAddress::Address(address.to_string()); assert_eq!(address, in_address.try_into()?); diff --git a/src/addressing.rs b/src/addressing.rs index f3423be..1d5ac57 100644 --- a/src/addressing.rs +++ b/src/addressing.rs @@ -5,6 +5,7 @@ use serde::de::Visitor; use serde::{de, ser, Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; use std::str::FromStr; +use url::Url; use uuid::Uuid; #[derive(Clone, Eq, PartialEq, Hash)] @@ -12,7 +13,7 @@ pub enum Address { Hash(Hash), Uuid(Uuid), Attribute(String), - Url(String), + Url(Url), } // multihash SHA2-256 @@ -30,7 +31,9 @@ impl Address { Self::Attribute(attribute) => { Code::Identity.digest(&[&[b'A'], attribute.as_bytes()].concat()) } - Self::Url(url) => Code::Identity.digest(&[&[b'X'], url.as_bytes()].concat()), + Self::Url(url) => { + Code::Identity.digest(&[&[b'X'], url.to_string().as_bytes()].concat()) + } }; Ok(hash.to_bytes()) @@ -50,7 +53,7 @@ impl Address { digest_content.as_slice(), )?)), b'A' => Ok(Self::Attribute(String::from_utf8(digest_content)?)), - b'X' => Ok(Self::Url(String::from_utf8(digest_content)?)), + b'X' => Ok(Self::Url(Url::parse(&String::from_utf8(digest_content)?)?)), _ => Err(anyhow!("Error decoding address: Unknown identity marker.")), } } @@ -146,6 +149,7 @@ pub trait Addressable: Hashable { #[cfg(test)] mod tests { use anyhow::Result; + use url::Url; use uuid::Uuid; use crate::addressing::Address; @@ -180,7 +184,7 @@ mod tests { #[test] fn test_url_codec() -> Result<()> { - let addr = Address::Url(String::from("https://upend.dev")); + let addr = Address::Url(Url::parse("https://upend.dev").unwrap()); let encoded = addr.encode()?; let decoded = Address::decode(&encoded)?; assert_eq!(addr, decoded); diff --git a/src/database/entry.rs b/src/database/entry.rs index 57a61c5..57a1e8b 100644 --- a/src/database/entry.rs +++ b/src/database/entry.rs @@ -3,10 +3,10 @@ use crate::database::inner::models; use crate::util::hash::{b58_decode, hash, Hash, Hashable}; use anyhow::{anyhow, Result}; use chrono::NaiveDateTime; -use regex::Regex; use serde::{Deserialize, Serialize}; use std::convert::TryFrom; use std::io::{Cursor, Write}; +use url::Url; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Entry { @@ -79,7 +79,7 @@ impl TryFrom<&Entry> for models::Entry { identity: e.address()?.encode()?, entity_searchable: match &e.entity { Address::Attribute(attr) => Some(attr.clone()), - Address::Url(url) => Some(url.clone()), + Address::Url(url) => Some(url.to_string()), _ => None, }, entity: e.entity.encode()?, @@ -183,11 +183,8 @@ impl EntryValue { match string.parse::() { Ok(num) => EntryValue::Number(num), Err(_) => { - lazy_static! { - static ref URL_REGEX: Regex = Regex::new("^[a-zA-Z0-9_]+://").unwrap(); - } - if URL_REGEX.is_match(string) { - EntryValue::Address(Address::Url(string.to_string())) + if let Ok(url) = Url::parse(string) { + EntryValue::Address(Address::Url(url)) } else { EntryValue::String(string.to_string()) } @@ -230,6 +227,12 @@ impl std::str::FromStr for EntryValue { } } +impl From for EntryValue { + fn from(value: Url) -> Self { + EntryValue::Address(Address::Url(value)) + } +} + impl std::fmt::Display for EntryValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let (entry_type, entry_value) = match self { @@ -284,7 +287,7 @@ mod tests { let decoded = encoded.parse::()?; assert_eq!(entry, decoded); - let entry = EntryValue::Address(Address::Url("https://upend.dev".to_string())); + let entry = EntryValue::Address(Address::Url(Url::parse("https://upend.dev").unwrap())); let encoded = entry.to_string()?; let decoded = encoded.parse::()?; assert_eq!(entry, decoded); @@ -306,7 +309,7 @@ mod tests { fn test_into() { assert_eq!(EntryValue::String(String::from("UPEND")), "UPEND".into()); assert_eq!(EntryValue::Number(1337.93), 1337.93.into()); - let addr = Address::Url("https://upend.dev".into()); + let addr = Address::Url(Url::parse("https://upend.dev").unwrap()); assert_eq!(EntryValue::Address(addr.clone()), addr.into()); } @@ -322,7 +325,7 @@ mod tests { ); assert_eq!( EntryValue::guess_from("https://upend.dev"), - EntryValue::Address(Address::Url("https://upend.dev".to_string())) + EntryValue::Address(Address::Url(Url::parse("https://upend.dev").unwrap())) ); } } diff --git a/src/database/lang.rs b/src/database/lang.rs index ff439ed..6b2cc8b 100644 --- a/src/database/lang.rs +++ b/src/database/lang.rs @@ -308,6 +308,7 @@ impl FromStr for Query { mod test { use super::*; use anyhow::Result; + use url::Url; #[test] fn test_matches() -> Result<()> { @@ -321,7 +322,7 @@ mod test { })) ); - let address = Address::Url(String::from("https://upend.dev")); + let address = Address::Url(Url::parse("https://upend.dev").unwrap()); let query = format!("(matches @{address} ? ?)").parse::()?; assert_eq!( query,