first iteration of EXIF data extraction

feat/vaults
Tomáš Mládek 2022-03-15 17:20:50 +01:00
parent b9ea04109e
commit c9a79492de
No known key found for this signature in database
GPG Key ID: 65E225C8B3E2ED8A
4 changed files with 131 additions and 1 deletions

16
Cargo.lock generated
View File

@ -1361,6 +1361,15 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "kamadak-exif"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70494964492bf8e491eb3951c5d70c9627eb7100ede6cc56d748b9a3f302cfb6"
dependencies = [
"mutate_once",
]
[[package]]
name = "kernel32-sys"
version = "0.2.2"
@ -1677,6 +1686,12 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "mutate_once"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16cf681a23b4d0a43fc35024c176437f9dcd818db34e0f42ab456a0ee5ad497b"
[[package]]
name = "net2"
version = "0.2.37"
@ -2908,6 +2923,7 @@ dependencies = [
"id3",
"image",
"is_executable",
"kamadak-exif",
"lazy_static",
"lexpr",
"libsqlite3-sys",

View File

@ -71,6 +71,7 @@ webp = { version = "0.2.0", optional = true }
webpage = { version = "1.4.0", optional = true }
id3 = { version = "1.0.2", optional = true }
kamadak-exif = { version = "0.5.4", optional = true }
[build-dependencies]
built = "0.5.1"
@ -82,9 +83,11 @@ default = [
"previews-image",
"extractors-web",
"extractors-audio",
"extractors-photo",
]
desktop = ["webbrowser", "opener", "is_executable"]
previews = []
previews-image = ["image", "webp"]
extractors-web = ["webpage"]
extractors-audio = ["id3"]
extractors-photo = ["kamadak-exif"]

View File

@ -17,6 +17,9 @@ pub mod web;
#[cfg(feature = "extractors-audio")]
pub mod audio;
#[cfg(feature = "extractors-photo")]
pub mod photo;
pub trait Extractor {
fn get(
&self,
@ -108,7 +111,12 @@ pub fn extract(
#[cfg(feature = "extractors-audio")]
{
entry_count += audio::ID3Extractor.insert_info(address, connection, job_container)?;
entry_count += audio::ID3Extractor.insert_info(address, connection, job_container.clone())?;
}
#[cfg(feature = "extractors-photo")]
{
entry_count += photo::ExifExtractor.insert_info(address, connection, job_container)?;
}
trace!("Extracting metadata for {address:?} - got {entry_count} entries.");

103
src/extractors/photo.rs Normal file
View File

@ -0,0 +1,103 @@
use super::Extractor;
use crate::{
addressing::Address,
database::{
constants,
entry::{Entry, EntryValue},
UpEndConnection,
},
filesystem::FILE_MIME_KEY,
util::jobs::{JobContainer, JobState},
};
use anyhow::{anyhow, Result};
pub struct ExifExtractor;
// TODO: EXIF metadata is oftentimes a constant/enum value. What's the proper
// model for enum-like values in UpEnd?
impl Extractor for ExifExtractor {
fn get(
&self,
address: &Address,
connection: &UpEndConnection,
mut job_container: JobContainer,
) -> Result<Vec<Entry>> {
if let Address::Hash(hash) = address {
let is_photo = connection.retrieve_object(address)?.iter().any(|e| {
if e.attribute == FILE_MIME_KEY {
if let EntryValue::String(mime) = &e.value {
return mime.starts_with("image");
}
}
false
});
if !is_photo {
return Ok(vec![]);
}
let files = connection.retrieve_file(hash)?;
if let Some(file) = files.get(0) {
let mut job_handle = job_container.add_job(
None,
&format!(
"Getting EXIF info from \"{:}\"",
file.path
.components()
.last()
.unwrap()
.as_os_str()
.to_string_lossy()
),
)?;
let file = std::fs::File::open(&file.path)?;
let mut bufreader = std::io::BufReader::new(&file);
let exifreader = exif::Reader::new();
let exif = exifreader.read_from_container(&mut bufreader)?;
let result: Vec<Entry> = exif
.fields()
.flat_map(|field| {
if let Some(tag_description) = field.tag.description() {
let attribute = format!("EXIF_{}", field.tag.1);
vec![
Entry {
entity: address.clone(),
attribute: attribute.clone(),
value: match field.tag {
exif::Tag::ExifVersion => {
EntryValue::String(format!("{}", field.display_value()))
}
_ => EntryValue::guess_from(format!(
"{}",
field.display_value()
)),
},
},
Entry {
entity: Address::Attribute(attribute),
attribute: constants::LABEL_ATTR.into(),
value: format!("EXIF: {}", tag_description).into(),
},
]
} else {
vec![]
}
})
.collect();
let _ = job_handle.update_state(JobState::Done);
Ok(result)
} else {
Err(anyhow!("Couldn't find file for {hash:?}!"))
}
} else {
Ok(vec![])
}
}
}