upend/cli/src/extractors/audio.rs

192 lines
7.5 KiB
Rust

use std::io::Write;
use std::sync::Arc;
use super::Extractor;
use anyhow::{anyhow, Result};
use lazy_static::lazy_static;
use upend_base::{
addressing::Address,
constants::{ATTR_IN, ATTR_KEY, ATTR_LABEL, ATTR_OF},
entry::{Entry, EntryValue, InvariantEntry},
};
use upend_db::stores::Blob;
use upend_db::{
jobs::{JobContainer, JobState},
stores::{fs::FILE_MIME_KEY, UpStore},
BlobMode, OperationContext, UpEndConnection,
};
lazy_static! {
pub static ref ID3_TYPE_INVARIANT: InvariantEntry = InvariantEntry {
attribute: ATTR_KEY.parse().unwrap(),
value: "TYPE_ID3".into(),
};
pub static ref ID3_TYPE_LABEL: Entry = Entry {
entity: ID3_TYPE_INVARIANT.entity().unwrap(),
attribute: ATTR_LABEL.parse().unwrap(),
value: "ID3".into(),
provenance: "INVARIANT".to_string(),
user: None,
timestamp: chrono::Utc::now().naive_utc(),
};
}
pub struct ID3Extractor;
impl Extractor for ID3Extractor {
fn get(
&self,
address: &Address,
connection: &UpEndConnection,
store: Arc<Box<dyn UpStore + Send + Sync>>,
mut job_container: JobContainer,
context: OperationContext,
) -> Result<Vec<Entry>> {
if let Address::Hash(hash) = address {
let files = store.retrieve(hash)?;
if let Some(file) = files.first() {
let file_path = file.get_file_path();
let mut job_handle = job_container.add_job(
None,
&format!(
r#"Getting ID3 info from "{:}""#,
file_path
.components()
.last()
.unwrap()
.as_os_str()
.to_string_lossy()
),
)?;
let tags = id3::Tag::read_from_path(file_path)?;
let mut result: Vec<Entry> = vec![];
for frame in tags.frames() {
if let id3::Content::Text(text) = frame.content() {
result.extend(vec![
Entry {
entity: address.clone(),
attribute: format!("ID3_{}", frame.id()).parse()?,
value: match frame.id() {
"TYER" | "TBPM" => EntryValue::guess_from(text),
_ => text.clone().into(),
},
provenance: context.provenance.clone() + "EXTRACTOR",
user: context.user.clone(),
timestamp: chrono::Utc::now().naive_utc(),
},
Entry {
entity: Address::Attribute(format!("ID3_{}", frame.id()).parse()?),
attribute: ATTR_LABEL.parse().unwrap(),
value: format!("ID3: {}", frame.name()).into(),
provenance: context.provenance.clone() + "EXTRACTOR",
user: context.user.clone(),
timestamp: chrono::Utc::now().naive_utc(),
},
]);
}
}
let mut has_pictures = false;
for (idx, picture) in tags.pictures().enumerate() {
let tmp_dir = tempfile::tempdir()?;
let tmp_path = tmp_dir.path().join(format!("img-{}", idx));
let mut file = std::fs::File::create(&tmp_path)?;
file.write_all(&picture.data)?;
let hash = store.store(
connection,
Blob::from_filepath(&tmp_path),
None,
Some(BlobMode::StoreOnly),
context.clone(),
)?;
result.push(Entry {
entity: address.clone(),
attribute: "ID3_PICTURE".parse()?,
value: EntryValue::Address(Address::Hash(hash)),
provenance: context.provenance.clone() + "EXTRACTOR",
user: context.user.clone(),
timestamp: chrono::Utc::now().naive_utc(),
});
has_pictures = true;
}
if has_pictures {
result.push(Entry {
entity: Address::Attribute("ID3_PICTURE".parse()?),
attribute: ATTR_LABEL.parse().unwrap(),
value: "ID3 Embedded Image".into(),
provenance: context.provenance.clone() + "EXTRACTOR",
user: context.user.clone(),
timestamp: chrono::Utc::now().naive_utc(),
})
}
if !result.is_empty() {
result.extend(
result
.iter()
.filter(|e| e.attribute != ATTR_LABEL)
.map(|e| Entry {
entity: Address::Attribute(e.attribute.clone()),
attribute: ATTR_OF.parse().unwrap(),
value: EntryValue::Address(ID3_TYPE_INVARIANT.entity().unwrap()),
provenance: context.provenance.clone() + "EXTRACTOR",
user: context.user.clone(),
timestamp: chrono::Utc::now().naive_utc(),
})
.collect::<Vec<Entry>>(),
);
result.extend(vec![
(&ID3_TYPE_INVARIANT as &InvariantEntry).try_into().unwrap(),
ID3_TYPE_LABEL.clone(),
Entry {
entity: address.clone(),
attribute: ATTR_IN.parse().unwrap(),
value: EntryValue::Address(ID3_TYPE_INVARIANT.entity().unwrap()),
provenance: context.provenance.clone() + "EXTRACTOR",
user: context.user.clone(),
timestamp: chrono::Utc::now().naive_utc(),
},
]);
}
let _ = job_handle.update_state(JobState::Done);
Ok(result)
} else {
Err(anyhow!("Couldn't find file for {hash:?}!"))
}
} else {
Ok(vec![])
}
}
fn is_needed(&self, address: &Address, connection: &UpEndConnection) -> Result<bool> {
let is_audio = connection.retrieve_object(address)?.iter().any(|e| {
if e.attribute == FILE_MIME_KEY {
if let EntryValue::String(mime) = &e.value {
return mime.starts_with("audio") || mime == "application/x-riff";
}
}
false
});
if !is_audio {
return Ok(false);
}
let is_extracted = !connection
.query(format!("(matches @{} (contains \"ID3\") ?)", address).parse()?)?
.is_empty();
if is_extracted {
return Ok(false);
}
Ok(true)
}
}