upend/cli/src/extractors/mod.rs

194 lines
5.0 KiB
Rust

use anyhow::Result;
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use std::{
borrow::Borrow,
sync::{Arc, Mutex, RwLock},
};
use tracing::{debug, info, trace};
use upend_base::{addressing::Address, entry::Entry};
use upend_db::{
jobs::JobContainer, stores::UpStore, OperationContext, UpEndConnection, UpEndDatabase,
};
#[cfg(feature = "extractors-web")]
pub mod web;
#[cfg(feature = "extractors-audio")]
pub mod audio;
#[cfg(feature = "extractors-exif")]
pub mod exif;
#[cfg(feature = "extractors-media")]
pub mod media;
pub trait Extractor {
fn get(
&self,
address: &Address,
connection: &UpEndConnection,
store: Arc<Box<dyn UpStore + Send + Sync>>,
job_container: JobContainer,
context: OperationContext,
) -> Result<Vec<Entry>>;
fn is_needed(&self, _address: &Address, _connection: &UpEndConnection) -> Result<bool> {
Ok(true)
}
fn insert_info(
&self,
address: &Address,
connection: &UpEndConnection,
store: Arc<Box<dyn UpStore + Send + Sync>>,
job_container: JobContainer,
context: OperationContext,
) -> Result<usize> {
if self.is_needed(address, connection)? {
let entries = self.get(address, connection, store, job_container, context)?;
trace!("For \"{address}\", got: {entries:?}");
connection.transaction(|| {
let len = entries.len();
for entry in entries {
connection.insert_entry(entry)?;
}
Ok(len)
})
} else {
Ok(0)
}
}
}
#[tracing::instrument(name = "Extract all metadata", skip_all)]
pub fn extract_all<D: Borrow<UpEndDatabase>>(
db: D,
store: Arc<Box<dyn UpStore + Send + Sync>>,
mut job_container: JobContainer,
context: OperationContext,
) -> Result<usize> {
info!("Extracting metadata for all addresses.");
let db = db.borrow();
let job_handle = job_container.add_job("EXTRACT_ALL", "Extracting additional metadata...")?;
let all_addresses = db.connection()?.get_all_addresses()?;
let total = all_addresses.len() as f32;
let count = RwLock::new(0_usize);
let shared_job_handle = Arc::new(Mutex::new(job_handle));
let result = all_addresses
.par_iter()
.map(|address| {
let connection = db.connection()?;
let entry_count = extract(
address,
&connection,
store.clone(),
job_container.clone(),
context.clone(),
);
let mut cnt = count.write().unwrap();
*cnt += 1;
shared_job_handle
.lock()
.unwrap()
.update_progress(*cnt as f32 / total * 100.0)?;
anyhow::Ok(entry_count)
})
.flatten()
.sum();
info!(
"Done extracting metadata; processed {} addresses, added {} entries.",
all_addresses.len(),
result
);
Ok(result)
}
#[tracing::instrument(skip(connection, store, job_container))]
pub fn extract(
address: &Address,
connection: &UpEndConnection,
store: Arc<Box<dyn UpStore + Send + Sync>>,
job_container: JobContainer,
context: OperationContext,
) -> usize {
let mut entry_count = 0;
trace!("Extracting metadata for {address:?}");
#[cfg(feature = "extractors-web")]
{
let extract_result = web::WebExtractor.insert_info(
address,
connection,
store.clone(),
job_container.clone(),
context.clone(),
);
match extract_result {
Ok(count) => entry_count += count,
Err(err) => debug!("web: {}", err),
}
}
#[cfg(feature = "extractors-audio")]
{
let extract_result = audio::ID3Extractor.insert_info(
address,
connection,
store.clone(),
job_container.clone(),
context.clone(),
);
match extract_result {
Ok(count) => entry_count += count,
Err(err) => debug!("audio: {}", err),
}
}
#[cfg(feature = "extractors-exif")]
{
let extract_result = exif::ExifExtractor.insert_info(
address,
connection,
store.clone(),
job_container.clone(),
context.clone(),
);
match extract_result {
Ok(count) => entry_count += count,
Err(err) => debug!("photo: {}", err),
}
}
#[cfg(feature = "extractors-media")]
{
let extract_result = media::MediaExtractor.insert_info(
address,
connection,
store.clone(),
job_container,
context.clone(),
);
match extract_result {
Ok(count) => entry_count += count,
Err(err) => debug!("media: {}", err),
}
}
trace!("Extracting metadata for {address:?} - got {entry_count} entries.");
entry_count
}