upend/src/extractors/photo.rs

125 lines
4.4 KiB
Rust

use std::sync::Arc;
use super::Extractor;
use crate::{
addressing::Address,
database::{
constants,
entry::{Entry, EntryValue},
stores::{fs::FILE_MIME_KEY, UpStore},
UpEndConnection,
},
util::jobs::{JobContainer, JobState},
};
use anyhow::{anyhow, Result};
pub struct ExifExtractor;
// TODO: EXIF metadata is oftentimes a constant/enum value. What's the proper
// model for enum-like values in UpEnd?
impl Extractor for ExifExtractor {
fn get(
&self,
address: &Address,
_connection: &UpEndConnection,
store: Arc<Box<dyn UpStore + Send + Sync>>,
mut job_container: JobContainer,
) -> Result<Vec<Entry>> {
if let Address::Hash(hash) = address {
let files = store.retrieve(hash)?;
if let Some(file) = files.get(0) {
let file_path = file.get_file_path();
let mut job_handle = job_container.add_job(
None,
&format!(
r#"Getting EXIF info from "{:}""#,
file_path
.components()
.last()
.unwrap()
.as_os_str()
.to_string_lossy()
),
)?;
let file = std::fs::File::open(file_path)?;
let mut bufreader = std::io::BufReader::new(&file);
let exifreader = exif::Reader::new();
let exif = exifreader.read_from_container(&mut bufreader)?;
let result: Vec<Entry> = exif
.fields()
.filter(|field| !matches!(field.value, exif::Value::Undefined(..)))
.flat_map(|field| {
if let Some(tag_description) = field.tag.description() {
let attribute = format!("EXIF_{}", field.tag.1);
vec![
Entry {
entity: address.clone(),
attribute: attribute.clone(),
value: match field.tag {
exif::Tag::ExifVersion => {
EntryValue::String(format!("{}", field.display_value()))
}
_ => EntryValue::guess_from(format!(
"{}",
field.display_value()
)),
},
provenance: "SYSTEM EXTRACTOR".to_string(),
timestamp: chrono::Utc::now().naive_utc(),
},
Entry {
entity: Address::Attribute(attribute),
attribute: constants::LABEL_ATTR.into(),
value: format!("EXIF: {}", tag_description).into(),
provenance: "SYSTEM EXTRACTOR".to_string(),
timestamp: chrono::Utc::now().naive_utc(),
},
]
} else {
vec![]
}
})
.collect();
let _ = job_handle.update_state(JobState::Done);
Ok(result)
} else {
Err(anyhow!("Couldn't find file for {hash:?}!"))
}
} else {
Ok(vec![])
}
}
fn is_needed(&self, address: &Address, connection: &UpEndConnection) -> Result<bool> {
let is_photo = connection.retrieve_object(address)?.iter().any(|e| {
if e.attribute == FILE_MIME_KEY {
if let EntryValue::String(mime) = &e.value {
return mime.starts_with("image");
}
}
false
});
if !is_photo {
return Ok(false);
}
let is_extracted = !connection
.query(format!("(matches @{} (contains \"EXIF\") ?)", address).parse()?)?
.is_empty();
if is_extracted {
return Ok(false);
}
Ok(true)
}
}