upend/src/extractors/mod.rs

133 lines
3.5 KiB
Rust

use crate::{
addressing::Address,
database::{entry::Entry, stores::UpStore, UpEndConnection, UpEndDatabase},
util::jobs::JobContainer,
};
use anyhow::Result;
use log::{info, trace};
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use std::{
borrow::Borrow,
sync::{Arc, Mutex, RwLock},
};
#[cfg(feature = "extractors-web")]
pub mod web;
#[cfg(feature = "extractors-audio")]
pub mod audio;
#[cfg(feature = "extractors-photo")]
pub mod photo;
pub trait Extractor {
fn get(
&self,
address: &Address,
connection: &UpEndConnection,
store: Arc<Box<dyn UpStore + Send + Sync>>,
job_container: JobContainer,
) -> Result<Vec<Entry>>;
fn is_needed(&self, _address: &Address, _connection: &UpEndConnection) -> Result<bool> {
Ok(true)
}
fn insert_info(
&self,
address: &Address,
connection: &UpEndConnection,
store: Arc<Box<dyn UpStore + Send + Sync>>,
job_container: JobContainer,
) -> Result<usize> {
if self.is_needed(address, connection)? {
let entries = self.get(address, connection, store, job_container)?;
connection.transaction(|| {
let len = entries.len();
for entry in entries {
connection.insert_entry(entry)?;
}
Ok(len)
})
} else {
Ok(0)
}
}
}
pub fn extract_all<D: Borrow<UpEndDatabase>>(
db: D,
store: Arc<Box<dyn UpStore + Send + Sync>>,
mut job_container: JobContainer,
) -> Result<usize> {
info!("Extracting metadata for all addresses.");
let db = db.borrow();
let job_handle = job_container.add_job("EXTRACT_ALL", "Extracting additional metadata...")?;
let all_addresses = db.connection()?.get_all_addresses()?;
let total = all_addresses.len() as f32;
let count = RwLock::new(0_usize);
let shared_job_handle = Arc::new(Mutex::new(job_handle));
let result = all_addresses
.par_iter()
.map(|address| {
let connection = db.connection()?;
let extract_result = extract(address, &connection, store.clone(), job_container.clone());
let mut cnt = count.write().unwrap();
*cnt += 1;
shared_job_handle
.lock()
.unwrap()
.update_progress(*cnt as f32 / total * 100.0)?;
extract_result
})
.flatten()
.sum();
info!(
"Done extracting metadata; processed {} addresses, added {} entries.",
all_addresses.len(),
result
);
Ok(result)
}
pub fn extract(
address: &Address,
connection: &UpEndConnection,
store: Arc<Box<dyn UpStore + Send + Sync>>,
job_container: JobContainer,
) -> Result<usize> {
let mut entry_count = 0;
trace!("Extracting metadata for {address:?}");
#[cfg(feature = "extractors-web")]
{
entry_count +=
web::WebExtractor.insert_info(address, connection, store.clone(), job_container.clone())?;
}
#[cfg(feature = "extractors-audio")]
{
entry_count +=
audio::ID3Extractor.insert_info(address, connection, store.clone(), job_container.clone())?;
}
#[cfg(feature = "extractors-photo")]
{
entry_count +=
photo::ExifExtractor.insert_info(address, connection, store.clone(), job_container)?;
}
trace!("Extracting metadata for {address:?} - got {entry_count} entries.");
Ok(entry_count)
}