diff --git a/Cargo.lock b/Cargo.lock index 8f8971e..b4c750e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2918,6 +2918,7 @@ dependencies = [ "once_cell", "opener", "rayon", + "regex", "serde", "serde_json", "tempfile", diff --git a/Cargo.toml b/Cargo.toml index fb4d98d..0f55528 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,7 @@ chrono = { version = "0.4", features = ["serde"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" lexpr = "0.2.6" +regex = "1" bs58 = "^0.4" filebuffer = "0.4.0" diff --git a/src/database/entry.rs b/src/database/entry.rs index 5e4a984..26d9360 100644 --- a/src/database/entry.rs +++ b/src/database/entry.rs @@ -2,6 +2,7 @@ use crate::addressing::{Address, Addressable}; use crate::database::inner::models; use crate::util::hash::{b58_decode, hash, Hash, Hashable}; use anyhow::{anyhow, Result}; +use regex::Regex; use serde::{Deserialize, Serialize}; use std::convert::TryFrom; use std::io::{Cursor, Write}; @@ -161,6 +162,23 @@ impl EntryValue { Ok(format!("{}{}", type_char, content)) } + + pub fn guess_from>(string: S) -> Self { + let string = string.as_ref(); + match string.parse::() { + Ok(num) => EntryValue::Number(num), + Err(_) => { + lazy_static! { + static ref URL_REGEX: Regex = Regex::new("^[a-zA-Z0-9_]://").unwrap(); + } + if URL_REGEX.is_match(string) { + EntryValue::Address(Address::Url(string.to_string())) + } else { + EntryValue::String(string.to_string()) + } + } + } + } } impl std::str::FromStr for EntryValue { diff --git a/src/extractors/audio.rs b/src/extractors/audio.rs index fe2d639..b244262 100644 --- a/src/extractors/audio.rs +++ b/src/extractors/audio.rs @@ -1,7 +1,11 @@ use super::Extractor; use crate::{ addressing::Address, - database::{entry::Entry, UpEndConnection}, + database::{ + entry::{Entry, EntryValue}, + UpEndConnection, + }, + filesystem::FILE_MIME_KEY, util::jobs::{Job, JobContainer, State}, }; use anyhow::{anyhow, Result}; @@ -17,6 +21,19 @@ impl Extractor for ID3Extractor { job_container: Arc>, ) -> Result> { if let Address::Hash(hash) = address { + let is_audio = connection.retrieve_object(address)?.iter().any(|e| { + if e.attribute == FILE_MIME_KEY { + if let EntryValue::String(mime) = &e.value { + return mime.starts_with("audio") || mime == "application/x-riff"; + } + } + false + }); + + if !is_audio { + return Ok(vec![]); + } + let files = connection.retrieve_file(hash)?; if let Some(file) = files.get(0) { @@ -26,8 +43,13 @@ impl Extractor for ID3Extractor { .add_job(Job::new( None, &format!( - "Getting ID3 info from {:?}", - file.path.components().last().unwrap() + "Getting ID3 info from \"{:}\"", + file.path + .components() + .last() + .unwrap() + .as_os_str() + .to_string_lossy() ), )) .unwrap(); @@ -40,7 +62,10 @@ impl Extractor for ID3Extractor { id3::Content::Text(text) => Some(Entry { entity: address.clone(), attribute: format!("ID3_{}", frame.id()), - value: text.clone().into(), + value: match frame.id() { + "TYER" | "TBPM" => EntryValue::guess_from(text), + _ => text.clone().into(), + }, }), _ => None, }) diff --git a/src/filesystem.rs b/src/filesystem.rs index 5883739..4ea0ca8 100644 --- a/src/filesystem.rs +++ b/src/filesystem.rs @@ -27,7 +27,7 @@ use walkdir::WalkDir; const BLOB_TYPE: &str = "BLOB"; const ALIAS_KEY: &str = "ALIAS"; -const FILE_MIME_KEY: &str = "FILE_MIME"; +pub const FILE_MIME_KEY: &str = "FILE_MIME"; const FILE_MTIME_KEY: &str = "FILE_MTIME"; const FILE_SIZE_KEY: &str = "FILE_SIZE";