upend/cli/src/extractors/media.rs

158 lines
5.5 KiB
Rust

use std::{process::Command, sync::Arc};
use super::Extractor;
use anyhow::{anyhow, Result};
use lazy_static::lazy_static;
use tracing::{debug, trace};
use upend_base::{
addressing::Address,
constants::{ATTR_IN, ATTR_KEY, ATTR_LABEL, ATTR_OF},
entry::{Entry, EntryValue, InvariantEntry},
};
use upend_db::{
jobs::{JobContainer, JobState},
stores::{fs::FILE_MIME_KEY, UpStore},
UpEndConnection,
};
const DURATION_KEY: &str = "MEDIA_DURATION";
lazy_static! {
pub static ref MEDIA_TYPE_INVARIANT: InvariantEntry = InvariantEntry {
attribute: ATTR_KEY.parse().unwrap(),
value: "TYPE_MEDIA".into(),
};
pub static ref MEDIA_TYPE_LABEL: Entry = Entry {
entity: MEDIA_TYPE_INVARIANT.entity().unwrap(),
attribute: ATTR_LABEL.parse().unwrap(),
value: "Multimedia".into(),
provenance: "INVARIANT".to_string(),
timestamp: chrono::Utc::now().naive_utc(),
};
pub static ref DURATION_OF_MEDIA: Entry = Entry {
entity: Address::Attribute(DURATION_KEY.parse().unwrap()),
attribute: ATTR_OF.parse().unwrap(),
value: EntryValue::Address(MEDIA_TYPE_INVARIANT.entity().unwrap()),
provenance: "INVARIANT".to_string(),
timestamp: chrono::Utc::now().naive_utc(),
};
}
pub struct MediaExtractor;
impl Extractor for MediaExtractor {
fn get(
&self,
address: &Address,
_connection: &UpEndConnection,
store: Arc<Box<dyn UpStore + Send + Sync>>,
mut job_container: JobContainer,
) -> Result<Vec<Entry>> {
if let Address::Hash(hash) = address {
let files = store.retrieve(hash)?;
if let Some(file) = files.first() {
let file_path = file.get_file_path();
let mut job_handle = job_container.add_job(
None,
&format!(
r#"Getting media info from "{:}""#,
file_path
.components()
.last()
.unwrap()
.as_os_str()
.to_string_lossy()
),
)?;
// https://superuser.com/a/945604/409504
let mut ffprobe = Command::new("ffprobe");
let command = ffprobe
.args(["-v", "error"])
.args(["-show_entries", "format=duration"])
.args(["-of", "default=noprint_wrappers=1:nokey=1"])
.arg(file_path);
trace!("Running `{:?}`", command);
let now = std::time::Instant::now();
let ffprobe_cmd = command.output()?;
debug!("Ran `{:?}`, took {}s", command, now.elapsed().as_secs_f32());
if !ffprobe_cmd.status.success() {
return Err(anyhow!(
"Failed to retrieve file duration: {:?}",
String::from_utf8_lossy(&ffprobe_cmd.stderr)
));
}
let duration = String::from_utf8(ffprobe_cmd.stdout)?
.trim()
.parse::<f64>()?;
let result = vec![
Entry {
entity: address.clone(),
attribute: DURATION_KEY.parse().unwrap(),
value: EntryValue::Number(duration),
provenance: "SYSTEM EXTRACTOR".to_string(),
timestamp: chrono::Utc::now().naive_utc(),
},
(&MEDIA_TYPE_INVARIANT as &InvariantEntry)
.try_into()
.unwrap(),
MEDIA_TYPE_LABEL.clone(),
DURATION_OF_MEDIA.clone(),
Entry {
entity: address.clone(),
attribute: ATTR_IN.parse().unwrap(),
value: EntryValue::Address(MEDIA_TYPE_INVARIANT.entity().unwrap()),
provenance: "SYSTEM EXTRACTOR".to_string(),
timestamp: chrono::Utc::now().naive_utc(),
},
];
let _ = job_handle.update_state(JobState::Done);
Ok(result)
} else {
Err(anyhow!("Couldn't find file for {hash:?}!"))
}
} else {
Ok(vec![])
}
}
fn is_needed(&self, address: &Address, connection: &UpEndConnection) -> Result<bool> {
let is_media = connection.retrieve_object(address)?.iter().any(|e| {
if e.attribute == FILE_MIME_KEY {
if let EntryValue::String(mime) = &e.value {
return mime.starts_with("audio") || mime.starts_with("video");
}
}
if e.attribute == ATTR_LABEL {
if let EntryValue::String(label) = &e.value {
let label = label.to_lowercase();
return label.ends_with(".ogg")
|| label.ends_with(".mp3")
|| label.ends_with(".wav");
}
}
false
});
if !is_media {
return Ok(false);
}
let is_extracted = !connection
.query(format!("(matches @{} (contains \"{}\") ?)", address, DURATION_KEY).parse()?)?
.is_empty();
if is_extracted {
return Ok(false);
}
Ok(true)
}
}