use std::sync::Arc; use super::Extractor; use anyhow::{anyhow, Result}; use lazy_static::lazy_static; use upend_base::entry::Attribute; use upend_base::{ addressing::Address, constants::{ATTR_IN, ATTR_KEY, ATTR_LABEL, ATTR_OF}, entry::{Entry, EntryValue, InvariantEntry}, }; use upend_db::{ jobs::{JobContainer, JobState}, stores::{fs::FILE_MIME_KEY, UpStore}, OperationContext, UpEndConnection, }; pub struct ExifExtractor; // TODO: EXIF metadata is oftentimes a constant/enum value. What's the proper // model for enum-like values in UpEnd? lazy_static! { pub static ref EXIF_TYPE_INVARIANT: InvariantEntry = InvariantEntry { attribute: ATTR_KEY.parse().unwrap(), value: "TYPE_EXIF".into(), }; pub static ref EXIF_TYPE_LABEL: Entry = Entry { entity: EXIF_TYPE_INVARIANT.entity().unwrap(), attribute: ATTR_LABEL.parse().unwrap(), value: "EXIF".into(), provenance: "INVARIANT".to_string(), timestamp: chrono::Utc::now().naive_utc(), user: None }; } impl Extractor for ExifExtractor { fn get( &self, address: &Address, _connection: &UpEndConnection, store: Arc>, mut job_container: JobContainer, context: OperationContext, ) -> Result> { if let Address::Hash(hash) = address { let files = store.retrieve(hash)?; if let Some(file) = files.first() { let file_path = file.get_file_path(); let mut job_handle = job_container.add_job( None, &format!( r#"Getting EXIF info from "{:}""#, file_path .components() .last() .unwrap() .as_os_str() .to_string_lossy() ), )?; let file = std::fs::File::open(file_path)?; let mut bufreader = std::io::BufReader::new(&file); let exifreader = exif::Reader::new(); let exif = exifreader.read_from_container(&mut bufreader)?; let mut result: Vec = vec![]; for field in exif .fields() .filter(|field| !matches!(field.value, exif::Value::Undefined(..))) { if let Some(tag_description) = field.tag.description() { let attribute: Attribute = format!("EXIF_{}", field.tag.1).parse()?; result.extend(vec![ Entry { entity: address.clone(), attribute: attribute.clone(), value: match field.tag { exif::Tag::ExifVersion => { EntryValue::String(format!("{}", field.display_value())) } _ => { EntryValue::guess_from(format!("{}", field.display_value())) } }, provenance: context.provenance.clone() + "EXTRACTOR", user: context.user.clone(), timestamp: chrono::Utc::now().naive_utc(), }, Entry { entity: Address::Attribute(attribute), attribute: ATTR_LABEL.parse().unwrap(), value: format!("EXIF: {}", tag_description).into(), provenance: context.provenance.clone() + "EXTRACTOR", user: context.user.clone(), timestamp: chrono::Utc::now().naive_utc(), }, ]); } } if !result.is_empty() { result.extend( result .iter() .filter(|e| e.attribute != ATTR_LABEL) .map(|e| Entry { entity: Address::Attribute(e.attribute.clone()), attribute: ATTR_OF.parse().unwrap(), value: EntryValue::Address(EXIF_TYPE_INVARIANT.entity().unwrap()), provenance: context.provenance.clone() + "EXTRACTOR", user: context.user.clone(), timestamp: chrono::Utc::now().naive_utc(), }) .collect::>(), ); result.extend(vec![ (&EXIF_TYPE_INVARIANT as &InvariantEntry) .try_into() .unwrap(), EXIF_TYPE_LABEL.clone(), Entry { entity: address.clone(), attribute: ATTR_IN.parse().unwrap(), value: EntryValue::Address(EXIF_TYPE_INVARIANT.entity().unwrap()), provenance: context.provenance.clone() + "EXTRACTOR", user: context.user.clone(), timestamp: chrono::Utc::now().naive_utc(), }, ]); } let _ = job_handle.update_state(JobState::Done); Ok(result) } else { Err(anyhow!("Couldn't find file for {hash:?}!")) } } else { Ok(vec![]) } } fn is_needed(&self, address: &Address, connection: &UpEndConnection) -> Result { let is_exif = connection.retrieve_object(address)?.iter().any(|e| { if e.attribute == FILE_MIME_KEY { if let EntryValue::String(mime) = &e.value { return mime.starts_with("image"); } } false }); if !is_exif { return Ok(false); } let is_extracted = !connection .query(format!("(matches @{} (contains \"EXIF\") ?)", address).parse()?)? .is_empty(); if is_extracted { return Ok(false); } Ok(true) } }