2022-02-15 13:32:46 +01:00
|
|
|
use crate::{
|
|
|
|
addressing::Address,
|
2022-09-13 19:16:22 +02:00
|
|
|
database::{entry::Entry, stores::UpStore, UpEndConnection, UpEndDatabase},
|
2022-03-02 01:14:23 +01:00
|
|
|
util::jobs::JobContainer,
|
2022-02-15 13:32:46 +01:00
|
|
|
};
|
|
|
|
use anyhow::Result;
|
2022-03-02 01:14:46 +01:00
|
|
|
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
|
|
|
|
use std::{
|
|
|
|
borrow::Borrow,
|
2022-03-02 01:14:23 +01:00
|
|
|
sync::{Arc, Mutex, RwLock},
|
2022-03-02 01:14:46 +01:00
|
|
|
};
|
2022-10-23 10:54:52 +02:00
|
|
|
use tracing::{debug, info, trace};
|
2022-02-10 11:38:45 +01:00
|
|
|
|
|
|
|
#[cfg(feature = "extractors-web")]
|
|
|
|
pub mod web;
|
2022-02-15 13:32:46 +01:00
|
|
|
|
2022-02-28 21:36:55 +01:00
|
|
|
#[cfg(feature = "extractors-audio")]
|
|
|
|
pub mod audio;
|
|
|
|
|
2022-03-15 17:20:50 +01:00
|
|
|
#[cfg(feature = "extractors-photo")]
|
|
|
|
pub mod photo;
|
|
|
|
|
2022-10-22 19:25:28 +02:00
|
|
|
#[cfg(feature = "extractors-media")]
|
|
|
|
pub mod media;
|
|
|
|
|
2022-02-15 13:32:46 +01:00
|
|
|
pub trait Extractor {
|
2022-02-28 20:36:16 +01:00
|
|
|
fn get(
|
|
|
|
&self,
|
|
|
|
address: &Address,
|
2022-02-28 21:36:55 +01:00
|
|
|
connection: &UpEndConnection,
|
2022-09-13 19:16:22 +02:00
|
|
|
store: Arc<Box<dyn UpStore + Send + Sync>>,
|
2022-03-02 01:14:23 +01:00
|
|
|
job_container: JobContainer,
|
2022-02-28 20:36:16 +01:00
|
|
|
) -> Result<Vec<Entry>>;
|
|
|
|
|
|
|
|
fn is_needed(&self, _address: &Address, _connection: &UpEndConnection) -> Result<bool> {
|
|
|
|
Ok(true)
|
|
|
|
}
|
2022-02-15 13:32:46 +01:00
|
|
|
|
2022-02-28 19:49:42 +01:00
|
|
|
fn insert_info(
|
2022-02-15 13:32:46 +01:00
|
|
|
&self,
|
2022-02-28 20:36:16 +01:00
|
|
|
address: &Address,
|
|
|
|
connection: &UpEndConnection,
|
2022-09-13 19:16:22 +02:00
|
|
|
store: Arc<Box<dyn UpStore + Send + Sync>>,
|
2022-03-02 01:14:23 +01:00
|
|
|
job_container: JobContainer,
|
2022-02-28 21:36:55 +01:00
|
|
|
) -> Result<usize> {
|
2022-02-28 20:36:16 +01:00
|
|
|
if self.is_needed(address, connection)? {
|
2022-08-19 14:04:18 +02:00
|
|
|
let entries = self.get(address, connection, store, job_container)?;
|
2022-02-15 13:32:46 +01:00
|
|
|
|
2022-02-28 20:36:16 +01:00
|
|
|
connection.transaction(|| {
|
2022-02-28 21:36:55 +01:00
|
|
|
let len = entries.len();
|
2022-02-28 20:36:16 +01:00
|
|
|
for entry in entries {
|
|
|
|
connection.insert_entry(entry)?;
|
|
|
|
}
|
2022-02-28 21:36:55 +01:00
|
|
|
Ok(len)
|
2022-02-28 20:36:16 +01:00
|
|
|
})
|
|
|
|
} else {
|
2022-02-28 21:36:55 +01:00
|
|
|
Ok(0)
|
2022-02-28 20:36:16 +01:00
|
|
|
}
|
2022-02-15 13:32:46 +01:00
|
|
|
}
|
2022-02-28 22:43:23 +01:00
|
|
|
}
|
|
|
|
|
2022-10-23 13:46:06 +02:00
|
|
|
#[tracing::instrument(name="Extract all metadata", skip_all)]
|
2022-09-13 19:16:22 +02:00
|
|
|
pub fn extract_all<D: Borrow<UpEndDatabase>>(
|
2022-03-02 01:14:46 +01:00
|
|
|
db: D,
|
2022-09-13 19:16:22 +02:00
|
|
|
store: Arc<Box<dyn UpStore + Send + Sync>>,
|
2022-03-02 01:14:23 +01:00
|
|
|
mut job_container: JobContainer,
|
2022-03-02 01:14:46 +01:00
|
|
|
) -> Result<usize> {
|
|
|
|
info!("Extracting metadata for all addresses.");
|
|
|
|
|
|
|
|
let db = db.borrow();
|
2022-03-02 01:14:23 +01:00
|
|
|
let job_handle = job_container.add_job("EXTRACT_ALL", "Extracting additional metadata...")?;
|
|
|
|
|
|
|
|
let all_addresses = db.connection()?.get_all_addresses()?;
|
|
|
|
let total = all_addresses.len() as f32;
|
|
|
|
let count = RwLock::new(0_usize);
|
|
|
|
let shared_job_handle = Arc::new(Mutex::new(job_handle));
|
|
|
|
|
|
|
|
let result = all_addresses
|
|
|
|
.par_iter()
|
|
|
|
.map(|address| {
|
|
|
|
let connection = db.connection()?;
|
2022-10-23 10:50:14 +02:00
|
|
|
let entry_count = extract(address, &connection, store.clone(), job_container.clone());
|
2022-03-02 01:14:23 +01:00
|
|
|
|
|
|
|
let mut cnt = count.write().unwrap();
|
|
|
|
*cnt += 1;
|
|
|
|
|
|
|
|
shared_job_handle
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.update_progress(*cnt as f32 / total * 100.0)?;
|
|
|
|
|
2022-10-23 10:50:14 +02:00
|
|
|
anyhow::Ok(entry_count)
|
2022-03-02 01:14:23 +01:00
|
|
|
})
|
|
|
|
.flatten()
|
|
|
|
.sum();
|
|
|
|
|
|
|
|
info!(
|
|
|
|
"Done extracting metadata; processed {} addresses, added {} entries.",
|
|
|
|
all_addresses.len(),
|
|
|
|
result
|
|
|
|
);
|
|
|
|
|
|
|
|
Ok(result)
|
2022-03-02 01:14:46 +01:00
|
|
|
}
|
|
|
|
|
2022-10-23 10:51:24 +02:00
|
|
|
#[tracing::instrument(skip(connection, store, job_container))]
|
2022-03-02 01:14:46 +01:00
|
|
|
pub fn extract(
|
2022-02-28 22:43:23 +01:00
|
|
|
address: &Address,
|
|
|
|
connection: &UpEndConnection,
|
2022-09-13 19:16:22 +02:00
|
|
|
store: Arc<Box<dyn UpStore + Send + Sync>>,
|
2022-03-02 01:14:23 +01:00
|
|
|
job_container: JobContainer,
|
2022-10-23 10:50:14 +02:00
|
|
|
) -> usize {
|
2022-02-28 22:43:23 +01:00
|
|
|
let mut entry_count = 0;
|
2022-03-02 01:14:46 +01:00
|
|
|
trace!("Extracting metadata for {address:?}");
|
2022-02-28 22:43:23 +01:00
|
|
|
|
|
|
|
#[cfg(feature = "extractors-web")]
|
|
|
|
{
|
2022-10-23 10:50:14 +02:00
|
|
|
let extract_result = web::WebExtractor.insert_info(
|
|
|
|
address,
|
|
|
|
connection,
|
|
|
|
store.clone(),
|
|
|
|
job_container.clone(),
|
|
|
|
);
|
|
|
|
|
|
|
|
match extract_result {
|
|
|
|
Ok(count) => entry_count += count,
|
2022-10-23 10:54:52 +02:00
|
|
|
Err(err) => debug!("web: {}", err),
|
2022-10-23 10:50:14 +02:00
|
|
|
}
|
2022-02-28 22:43:23 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(feature = "extractors-audio")]
|
|
|
|
{
|
2022-10-23 10:50:14 +02:00
|
|
|
let extract_result = audio::ID3Extractor.insert_info(
|
|
|
|
address,
|
|
|
|
connection,
|
|
|
|
store.clone(),
|
|
|
|
job_container.clone(),
|
|
|
|
);
|
|
|
|
|
|
|
|
match extract_result {
|
|
|
|
Ok(count) => entry_count += count,
|
2022-10-23 10:54:52 +02:00
|
|
|
Err(err) => debug!("audio: {}", err),
|
2022-10-23 10:50:14 +02:00
|
|
|
}
|
2022-03-15 17:20:50 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(feature = "extractors-photo")]
|
|
|
|
{
|
2022-10-23 10:50:14 +02:00
|
|
|
let extract_result = photo::ExifExtractor.insert_info(
|
|
|
|
address,
|
|
|
|
connection,
|
|
|
|
store.clone(),
|
|
|
|
job_container.clone(),
|
|
|
|
);
|
|
|
|
|
|
|
|
match extract_result {
|
|
|
|
Ok(count) => entry_count += count,
|
2022-10-23 10:54:52 +02:00
|
|
|
Err(err) => debug!("photo: {}", err),
|
2022-10-23 10:50:14 +02:00
|
|
|
}
|
2022-10-22 19:25:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(feature = "extractors-media")]
|
|
|
|
{
|
2022-10-23 10:50:14 +02:00
|
|
|
let extract_result =
|
|
|
|
media::MediaExtractor.insert_info(address, connection, store.clone(), job_container);
|
|
|
|
|
|
|
|
match extract_result {
|
|
|
|
Ok(count) => entry_count += count,
|
2022-10-23 10:54:52 +02:00
|
|
|
Err(err) => debug!("media: {}", err),
|
2022-10-23 10:50:14 +02:00
|
|
|
}
|
2022-02-28 22:43:23 +01:00
|
|
|
}
|
|
|
|
|
2022-03-02 01:14:46 +01:00
|
|
|
trace!("Extracting metadata for {address:?} - got {entry_count} entries.");
|
|
|
|
|
2022-10-23 10:50:14 +02:00
|
|
|
entry_count
|
2022-02-28 22:43:23 +01:00
|
|
|
}
|