174 lines
6.5 KiB
Rust
174 lines
6.5 KiB
Rust
use std::sync::Arc;
|
|
|
|
use super::Extractor;
|
|
use anyhow::{anyhow, Result};
|
|
use lazy_static::lazy_static;
|
|
use upend_base::entry::Attribute;
|
|
use upend_base::{
|
|
addressing::Address,
|
|
constants::{ATTR_IN, ATTR_KEY, ATTR_LABEL, ATTR_OF},
|
|
entry::{Entry, EntryValue, InvariantEntry},
|
|
};
|
|
use upend_db::{
|
|
jobs::{JobContainer, JobState},
|
|
stores::{fs::FILE_MIME_KEY, UpStore},
|
|
OperationContext, UpEndConnection,
|
|
};
|
|
|
|
pub struct ExifExtractor;
|
|
|
|
// TODO: EXIF metadata is oftentimes a constant/enum value. What's the proper
|
|
// model for enum-like values in UpEnd?
|
|
|
|
lazy_static! {
|
|
pub static ref EXIF_TYPE_INVARIANT: InvariantEntry = InvariantEntry {
|
|
attribute: ATTR_KEY.parse().unwrap(),
|
|
value: "TYPE_EXIF".into(),
|
|
};
|
|
pub static ref EXIF_TYPE_LABEL: Entry = Entry {
|
|
entity: EXIF_TYPE_INVARIANT.entity().unwrap(),
|
|
attribute: ATTR_LABEL.parse().unwrap(),
|
|
value: "EXIF".into(),
|
|
provenance: "INVARIANT".to_string(),
|
|
timestamp: chrono::Utc::now().naive_utc(),
|
|
user: None
|
|
};
|
|
}
|
|
|
|
impl Extractor for ExifExtractor {
|
|
fn get(
|
|
&self,
|
|
address: &Address,
|
|
_connection: &UpEndConnection,
|
|
store: Arc<Box<dyn UpStore + Send + Sync>>,
|
|
mut job_container: JobContainer,
|
|
context: OperationContext,
|
|
) -> Result<Vec<Entry>> {
|
|
if let Address::Hash(hash) = address {
|
|
let files = store.retrieve(hash)?;
|
|
|
|
if let Some(file) = files.first() {
|
|
let file_path = file.get_file_path();
|
|
let mut job_handle = job_container.add_job(
|
|
None,
|
|
&format!(
|
|
r#"Getting EXIF info from "{:}""#,
|
|
file_path
|
|
.components()
|
|
.last()
|
|
.unwrap()
|
|
.as_os_str()
|
|
.to_string_lossy()
|
|
),
|
|
)?;
|
|
|
|
let file = std::fs::File::open(file_path)?;
|
|
let mut bufreader = std::io::BufReader::new(&file);
|
|
let exifreader = exif::Reader::new();
|
|
let exif = exifreader.read_from_container(&mut bufreader)?;
|
|
|
|
let mut result: Vec<Entry> = vec![];
|
|
|
|
for field in exif
|
|
.fields()
|
|
.filter(|field| !matches!(field.value, exif::Value::Undefined(..)))
|
|
{
|
|
if let Some(tag_description) = field.tag.description() {
|
|
let attribute: Attribute = format!("EXIF_{}", field.tag.1).parse()?;
|
|
|
|
result.extend(vec![
|
|
Entry {
|
|
entity: address.clone(),
|
|
attribute: attribute.clone(),
|
|
value: match field.tag {
|
|
exif::Tag::ExifVersion => {
|
|
EntryValue::String(format!("{}", field.display_value()))
|
|
}
|
|
_ => {
|
|
EntryValue::guess_from(format!("{}", field.display_value()))
|
|
}
|
|
},
|
|
provenance: context.provenance.clone() + "EXTRACTOR",
|
|
user: context.user.clone(),
|
|
timestamp: chrono::Utc::now().naive_utc(),
|
|
},
|
|
Entry {
|
|
entity: Address::Attribute(attribute),
|
|
attribute: ATTR_LABEL.parse().unwrap(),
|
|
value: format!("EXIF: {}", tag_description).into(),
|
|
provenance: context.provenance.clone() + "EXTRACTOR",
|
|
user: context.user.clone(),
|
|
timestamp: chrono::Utc::now().naive_utc(),
|
|
},
|
|
]);
|
|
}
|
|
}
|
|
|
|
if !result.is_empty() {
|
|
result.extend(
|
|
result
|
|
.iter()
|
|
.filter(|e| e.attribute != ATTR_LABEL)
|
|
.map(|e| Entry {
|
|
entity: Address::Attribute(e.attribute.clone()),
|
|
attribute: ATTR_OF.parse().unwrap(),
|
|
value: EntryValue::Address(EXIF_TYPE_INVARIANT.entity().unwrap()),
|
|
provenance: context.provenance.clone() + "EXTRACTOR",
|
|
user: context.user.clone(),
|
|
timestamp: chrono::Utc::now().naive_utc(),
|
|
})
|
|
.collect::<Vec<Entry>>(),
|
|
);
|
|
result.extend(vec![
|
|
(&EXIF_TYPE_INVARIANT as &InvariantEntry)
|
|
.try_into()
|
|
.unwrap(),
|
|
EXIF_TYPE_LABEL.clone(),
|
|
Entry {
|
|
entity: address.clone(),
|
|
attribute: ATTR_IN.parse().unwrap(),
|
|
value: EntryValue::Address(EXIF_TYPE_INVARIANT.entity().unwrap()),
|
|
provenance: context.provenance.clone() + "EXTRACTOR",
|
|
user: context.user.clone(),
|
|
timestamp: chrono::Utc::now().naive_utc(),
|
|
},
|
|
]);
|
|
}
|
|
|
|
let _ = job_handle.update_state(JobState::Done);
|
|
|
|
Ok(result)
|
|
} else {
|
|
Err(anyhow!("Couldn't find file for {hash:?}!"))
|
|
}
|
|
} else {
|
|
Ok(vec![])
|
|
}
|
|
}
|
|
|
|
fn is_needed(&self, address: &Address, connection: &UpEndConnection) -> Result<bool> {
|
|
let is_exif = connection.retrieve_object(address)?.iter().any(|e| {
|
|
if e.attribute == FILE_MIME_KEY {
|
|
if let EntryValue::String(mime) = &e.value {
|
|
return mime.starts_with("image");
|
|
}
|
|
}
|
|
false
|
|
});
|
|
|
|
if !is_exif {
|
|
return Ok(false);
|
|
}
|
|
|
|
let is_extracted = !connection
|
|
.query(format!("(matches @{} (contains \"EXIF\") ?)", address).parse()?)?
|
|
.is_empty();
|
|
|
|
if is_extracted {
|
|
return Ok(false);
|
|
}
|
|
|
|
Ok(true)
|
|
}
|
|
}
|