upend/cli/src/extractors/exif.rs

174 lines
6.5 KiB
Rust

use std::sync::Arc;
use super::Extractor;
use anyhow::{anyhow, Result};
use lazy_static::lazy_static;
use upend_base::entry::Attribute;
use upend_base::{
addressing::Address,
constants::{ATTR_IN, ATTR_KEY, ATTR_LABEL, ATTR_OF},
entry::{Entry, EntryValue, InvariantEntry},
};
use upend_db::{
jobs::{JobContainer, JobState},
stores::{fs::FILE_MIME_KEY, UpStore},
OperationContext, UpEndConnection,
};
pub struct ExifExtractor;
// TODO: EXIF metadata is oftentimes a constant/enum value. What's the proper
// model for enum-like values in UpEnd?
lazy_static! {
pub static ref EXIF_TYPE_INVARIANT: InvariantEntry = InvariantEntry {
attribute: ATTR_KEY.parse().unwrap(),
value: "TYPE_EXIF".into(),
};
pub static ref EXIF_TYPE_LABEL: Entry = Entry {
entity: EXIF_TYPE_INVARIANT.entity().unwrap(),
attribute: ATTR_LABEL.parse().unwrap(),
value: "EXIF".into(),
provenance: "INVARIANT".to_string(),
timestamp: chrono::Utc::now().naive_utc(),
user: None
};
}
impl Extractor for ExifExtractor {
fn get(
&self,
address: &Address,
_connection: &UpEndConnection,
store: Arc<Box<dyn UpStore + Send + Sync>>,
mut job_container: JobContainer,
context: OperationContext,
) -> Result<Vec<Entry>> {
if let Address::Hash(hash) = address {
let files = store.retrieve(hash)?;
if let Some(file) = files.first() {
let file_path = file.get_file_path();
let mut job_handle = job_container.add_job(
None,
&format!(
r#"Getting EXIF info from "{:}""#,
file_path
.components()
.last()
.unwrap()
.as_os_str()
.to_string_lossy()
),
)?;
let file = std::fs::File::open(file_path)?;
let mut bufreader = std::io::BufReader::new(&file);
let exifreader = exif::Reader::new();
let exif = exifreader.read_from_container(&mut bufreader)?;
let mut result: Vec<Entry> = vec![];
for field in exif
.fields()
.filter(|field| !matches!(field.value, exif::Value::Undefined(..)))
{
if let Some(tag_description) = field.tag.description() {
let attribute: Attribute = format!("EXIF_{}", field.tag.1).parse()?;
result.extend(vec![
Entry {
entity: address.clone(),
attribute: attribute.clone(),
value: match field.tag {
exif::Tag::ExifVersion => {
EntryValue::String(format!("{}", field.display_value()))
}
_ => {
EntryValue::guess_from(format!("{}", field.display_value()))
}
},
provenance: context.provenance.clone() + "EXTRACTOR",
user: context.user.clone(),
timestamp: chrono::Utc::now().naive_utc(),
},
Entry {
entity: Address::Attribute(attribute),
attribute: ATTR_LABEL.parse().unwrap(),
value: format!("EXIF: {}", tag_description).into(),
provenance: context.provenance.clone() + "EXTRACTOR",
user: context.user.clone(),
timestamp: chrono::Utc::now().naive_utc(),
},
]);
}
}
if !result.is_empty() {
result.extend(
result
.iter()
.filter(|e| e.attribute != ATTR_LABEL)
.map(|e| Entry {
entity: Address::Attribute(e.attribute.clone()),
attribute: ATTR_OF.parse().unwrap(),
value: EntryValue::Address(EXIF_TYPE_INVARIANT.entity().unwrap()),
provenance: context.provenance.clone() + "EXTRACTOR",
user: context.user.clone(),
timestamp: chrono::Utc::now().naive_utc(),
})
.collect::<Vec<Entry>>(),
);
result.extend(vec![
(&EXIF_TYPE_INVARIANT as &InvariantEntry)
.try_into()
.unwrap(),
EXIF_TYPE_LABEL.clone(),
Entry {
entity: address.clone(),
attribute: ATTR_IN.parse().unwrap(),
value: EntryValue::Address(EXIF_TYPE_INVARIANT.entity().unwrap()),
provenance: context.provenance.clone() + "EXTRACTOR",
user: context.user.clone(),
timestamp: chrono::Utc::now().naive_utc(),
},
]);
}
let _ = job_handle.update_state(JobState::Done);
Ok(result)
} else {
Err(anyhow!("Couldn't find file for {hash:?}!"))
}
} else {
Ok(vec![])
}
}
fn is_needed(&self, address: &Address, connection: &UpEndConnection) -> Result<bool> {
let is_exif = connection.retrieve_object(address)?.iter().any(|e| {
if e.attribute == FILE_MIME_KEY {
if let EntryValue::String(mime) = &e.value {
return mime.starts_with("image");
}
}
false
});
if !is_exif {
return Ok(false);
}
let is_extracted = !connection
.query(format!("(matches @{} (contains \"EXIF\") ?)", address).parse()?)?
.is_empty();
if is_extracted {
return Ok(false);
}
Ok(true)
}
}