use crate::{ addressing::Address, database::{entry::Entry, stores::UpStore, UpEndConnection, UpEndDatabase}, util::jobs::JobContainer, }; use anyhow::Result; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use std::{ borrow::Borrow, sync::{Arc, Mutex, RwLock}, }; use tracing::{debug, info, trace}; #[cfg(feature = "extractors-web")] pub mod web; #[cfg(feature = "extractors-audio")] pub mod audio; #[cfg(feature = "extractors-photo")] pub mod photo; #[cfg(feature = "extractors-media")] pub mod media; pub trait Extractor { fn get( &self, address: &Address, connection: &UpEndConnection, store: Arc>, job_container: JobContainer, ) -> Result>; fn is_needed(&self, _address: &Address, _connection: &UpEndConnection) -> Result { Ok(true) } fn insert_info( &self, address: &Address, connection: &UpEndConnection, store: Arc>, job_container: JobContainer, ) -> Result { if self.is_needed(address, connection)? { let entries = self.get(address, connection, store, job_container)?; connection.transaction(|| { let len = entries.len(); for entry in entries { connection.insert_entry(entry)?; } Ok(len) }) } else { Ok(0) } } } #[tracing::instrument(name="Extract all metadata", skip_all)] pub fn extract_all>( db: D, store: Arc>, mut job_container: JobContainer, ) -> Result { info!("Extracting metadata for all addresses."); let db = db.borrow(); let job_handle = job_container.add_job("EXTRACT_ALL", "Extracting additional metadata...")?; let all_addresses = db.connection()?.get_all_addresses()?; let total = all_addresses.len() as f32; let count = RwLock::new(0_usize); let shared_job_handle = Arc::new(Mutex::new(job_handle)); let result = all_addresses .par_iter() .map(|address| { let connection = db.connection()?; let entry_count = extract(address, &connection, store.clone(), job_container.clone()); let mut cnt = count.write().unwrap(); *cnt += 1; shared_job_handle .lock() .unwrap() .update_progress(*cnt as f32 / total * 100.0)?; anyhow::Ok(entry_count) }) .flatten() .sum(); info!( "Done extracting metadata; processed {} addresses, added {} entries.", all_addresses.len(), result ); Ok(result) } #[tracing::instrument(skip(connection, store, job_container))] pub fn extract( address: &Address, connection: &UpEndConnection, store: Arc>, job_container: JobContainer, ) -> usize { let mut entry_count = 0; trace!("Extracting metadata for {address:?}"); #[cfg(feature = "extractors-web")] { let extract_result = web::WebExtractor.insert_info( address, connection, store.clone(), job_container.clone(), ); match extract_result { Ok(count) => entry_count += count, Err(err) => debug!("web: {}", err), } } #[cfg(feature = "extractors-audio")] { let extract_result = audio::ID3Extractor.insert_info( address, connection, store.clone(), job_container.clone(), ); match extract_result { Ok(count) => entry_count += count, Err(err) => debug!("audio: {}", err), } } #[cfg(feature = "extractors-photo")] { let extract_result = photo::ExifExtractor.insert_info( address, connection, store.clone(), job_container.clone(), ); match extract_result { Ok(count) => entry_count += count, Err(err) => debug!("photo: {}", err), } } #[cfg(feature = "extractors-media")] { let extract_result = media::MediaExtractor.insert_info(address, connection, store.clone(), job_container); match extract_result { Ok(count) => entry_count += count, Err(err) => debug!("media: {}", err), } } trace!("Extracting metadata for {address:?} - got {entry_count} entries."); entry_count }