id3 improvements - extract year/bpm as numbers; fix id3 extract job name; exit early if file isn't audio

also add EntryValue::guess_from
feat/vaults
Tomáš Mládek 2022-03-01 21:00:29 +01:00
parent 82c7bfcb64
commit 4f36bff549
No known key found for this signature in database
GPG Key ID: 65E225C8B3E2ED8A
5 changed files with 50 additions and 5 deletions

1
Cargo.lock generated
View File

@ -2918,6 +2918,7 @@ dependencies = [
"once_cell", "once_cell",
"opener", "opener",
"rayon", "rayon",
"regex",
"serde", "serde",
"serde_json", "serde_json",
"tempfile", "tempfile",

View File

@ -43,6 +43,7 @@ chrono = { version = "0.4", features = ["serde"] }
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
lexpr = "0.2.6" lexpr = "0.2.6"
regex = "1"
bs58 = "^0.4" bs58 = "^0.4"
filebuffer = "0.4.0" filebuffer = "0.4.0"

View File

@ -2,6 +2,7 @@ use crate::addressing::{Address, Addressable};
use crate::database::inner::models; use crate::database::inner::models;
use crate::util::hash::{b58_decode, hash, Hash, Hashable}; use crate::util::hash::{b58_decode, hash, Hash, Hashable};
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Result};
use regex::Regex;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::convert::TryFrom; use std::convert::TryFrom;
use std::io::{Cursor, Write}; use std::io::{Cursor, Write};
@ -161,6 +162,23 @@ impl EntryValue {
Ok(format!("{}{}", type_char, content)) Ok(format!("{}{}", type_char, content))
} }
pub fn guess_from<S: AsRef<str>>(string: S) -> Self {
let string = string.as_ref();
match string.parse::<f64>() {
Ok(num) => EntryValue::Number(num),
Err(_) => {
lazy_static! {
static ref URL_REGEX: Regex = Regex::new("^[a-zA-Z0-9_]://").unwrap();
}
if URL_REGEX.is_match(string) {
EntryValue::Address(Address::Url(string.to_string()))
} else {
EntryValue::String(string.to_string())
}
}
}
}
} }
impl std::str::FromStr for EntryValue { impl std::str::FromStr for EntryValue {

View File

@ -1,7 +1,11 @@
use super::Extractor; use super::Extractor;
use crate::{ use crate::{
addressing::Address, addressing::Address,
database::{entry::Entry, UpEndConnection}, database::{
entry::{Entry, EntryValue},
UpEndConnection,
},
filesystem::FILE_MIME_KEY,
util::jobs::{Job, JobContainer, State}, util::jobs::{Job, JobContainer, State},
}; };
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Result};
@ -17,6 +21,19 @@ impl Extractor for ID3Extractor {
job_container: Arc<RwLock<JobContainer>>, job_container: Arc<RwLock<JobContainer>>,
) -> Result<Vec<Entry>> { ) -> Result<Vec<Entry>> {
if let Address::Hash(hash) = address { if let Address::Hash(hash) = address {
let is_audio = connection.retrieve_object(address)?.iter().any(|e| {
if e.attribute == FILE_MIME_KEY {
if let EntryValue::String(mime) = &e.value {
return mime.starts_with("audio") || mime == "application/x-riff";
}
}
false
});
if !is_audio {
return Ok(vec![]);
}
let files = connection.retrieve_file(hash)?; let files = connection.retrieve_file(hash)?;
if let Some(file) = files.get(0) { if let Some(file) = files.get(0) {
@ -26,8 +43,13 @@ impl Extractor for ID3Extractor {
.add_job(Job::new( .add_job(Job::new(
None, None,
&format!( &format!(
"Getting ID3 info from {:?}", "Getting ID3 info from \"{:}\"",
file.path.components().last().unwrap() file.path
.components()
.last()
.unwrap()
.as_os_str()
.to_string_lossy()
), ),
)) ))
.unwrap(); .unwrap();
@ -40,7 +62,10 @@ impl Extractor for ID3Extractor {
id3::Content::Text(text) => Some(Entry { id3::Content::Text(text) => Some(Entry {
entity: address.clone(), entity: address.clone(),
attribute: format!("ID3_{}", frame.id()), attribute: format!("ID3_{}", frame.id()),
value: text.clone().into(), value: match frame.id() {
"TYER" | "TBPM" => EntryValue::guess_from(text),
_ => text.clone().into(),
},
}), }),
_ => None, _ => None,
}) })

View File

@ -27,7 +27,7 @@ use walkdir::WalkDir;
const BLOB_TYPE: &str = "BLOB"; const BLOB_TYPE: &str = "BLOB";
const ALIAS_KEY: &str = "ALIAS"; const ALIAS_KEY: &str = "ALIAS";
const FILE_MIME_KEY: &str = "FILE_MIME"; pub const FILE_MIME_KEY: &str = "FILE_MIME";
const FILE_MTIME_KEY: &str = "FILE_MTIME"; const FILE_MTIME_KEY: &str = "FILE_MTIME";
const FILE_SIZE_KEY: &str = "FILE_SIZE"; const FILE_SIZE_KEY: &str = "FILE_SIZE";