diff --git a/src/database/inner/models.rs b/src/database/inner/models.rs index 0db8861..dd91522 100644 --- a/src/database/inner/models.rs +++ b/src/database/inner/models.rs @@ -4,7 +4,7 @@ use serde::Serialize; use super::schema::{data, files}; use crate::util::hash::Hash; -#[derive(Queryable, Serialize, Debug)] +#[derive(Queryable, Serialize, Clone, Debug)] pub struct File { pub id: i32, pub hash: Hash, diff --git a/src/filesystem.rs b/src/filesystem.rs index f204b9b..139c975 100644 --- a/src/filesystem.rs +++ b/src/filesystem.rs @@ -14,13 +14,12 @@ use crate::database::inner::models; use crate::database::{ file_set_valid, insert_entry, insert_file, retrieve_all_files, DbPool, DATABASE_FILENAME, }; -use crate::util::hash::Hashable; +use crate::util::hash::{Hash, Hashable}; use crate::util::jobs::{Job, JobContainer, JobId, State}; -use anyhow::{anyhow, Error, Result}; +use anyhow::{Error, Result}; use chrono::prelude::*; use diesel::Connection; use log::{error, info, warn}; -use once_cell::unsync::Lazy; use rayon::prelude::*; use serde_json::Value; use walkdir::WalkDir; @@ -192,13 +191,13 @@ fn _process_directory_entry>( info!("Processing: {:?}", path); // Prepare the data - let db_pool = Arc::clone(db_pool); + let connection = &db_pool.write().unwrap().get()?; let existing_files = Arc::clone(existing_files); let normalized_path = path.strip_prefix(&directory_path)?; let normalized_path_str = normalized_path.to_str().expect("path not valid unicode?!"); - let file_hash = Lazy::new(|| path.hash()); + let mut file_hash: Option = None; // Get size & mtime for quick comparison let metadata = fs::metadata(&path)?; @@ -214,33 +213,50 @@ fn _process_directory_entry>( .ok(); // Check if the path entry for this file already exists in database - { - // Only grab existing_files for the duration of this block - let mut existing_files = existing_files.write().unwrap(); + let existing_files_read = existing_files.read().unwrap(); + let maybe_existing_file = existing_files_read + .iter() + .find(|file| file.path == normalized_path_str) + .clone(); - let maybe_existing_file = existing_files - .iter() - .enumerate() - .find(|(_, file)| file.path == normalized_path_str); + if let Some(existing_file) = maybe_existing_file { + let existing_file = existing_file.clone(); + drop(existing_files_read); + + let mut same = size == existing_file.size && mtime == existing_file.mtime; + if !same { + file_hash = Some(path.hash()?); + same = file_hash.as_ref().unwrap() == &existing_file.hash; + } + + if same { + if !existing_file.valid { + file_set_valid(connection, existing_file.id, true)?; + } - if let Some((idx, existing_file)) = maybe_existing_file { - if (size == existing_file.size && mtime == existing_file.mtime) - || ((*file_hash).is_ok() && &existing_file.hash == (*file_hash).as_ref().unwrap()) { - if !existing_file.valid { - file_set_valid(&db_pool.write().unwrap().get()?, existing_file.id, true)?; + let mut existing_files_write = existing_files.write().unwrap(); + let maybe_existing_file = existing_files_write + .iter() + .enumerate() + .find(|(_, file)| file.path == normalized_path_str) + .map(|(idx, _)| idx); + + if let Some(idx) = maybe_existing_file { + existing_files_write.swap_remove(idx); + return Ok(UpdatePathOutcome::Unchanged(path)); } - existing_files.swap_remove(idx); - return Ok(UpdatePathOutcome::Unchanged(path)); } } + } else { + drop(existing_files_read); } // If not, add it! - if let Err(err) = &*file_hash { - return Err(anyhow!(format!("Error hashing: {}", err))); + if file_hash.is_none() { + file_hash = Some(path.hash()?); } - let file_hash = (*file_hash).as_ref().unwrap().clone(); + let file_hash = file_hash.unwrap(); let new_file = models::NewFile { path: normalized_path_str.to_string(), @@ -250,7 +266,7 @@ fn _process_directory_entry>( mtime, }; - insert_file(&db_pool.write().unwrap().get()?, new_file)?; + insert_file(connection, new_file)?; // Insert metadata let type_entry = Entry { @@ -258,21 +274,21 @@ fn _process_directory_entry>( attribute: String::from(IS_OF_TYPE_ATTR), value: EntryValue::Address(BLOB_TYPE_ADDR.clone()), }; - insert_entry(&db_pool.write().unwrap().get()?, type_entry)?; + insert_entry(connection, type_entry)?; let size_entry = Entry { entity: Address::Hash(file_hash.clone()), attribute: FILE_SIZE_KEY.to_string(), value: EntryValue::Value(Value::from(size)), }; - insert_entry(&db_pool.write().unwrap().get()?, size_entry)?; + insert_entry(connection, size_entry)?; let mime_entry = Entry { entity: Address::Hash(file_hash.clone()), attribute: FILE_MIME_KEY.to_string(), value: EntryValue::Value(Value::String(tree_magic::from_filepath(&path))), }; - insert_entry(&db_pool.write().unwrap().get()?, mime_entry)?; + insert_entry(connection, mime_entry)?; // Finally, add the appropriate entries w/r/t virtual filesystem location let components = normalized_path.components().collect::>(); @@ -285,18 +301,16 @@ fn _process_directory_entry>( })) .collect(), ); - let resolved_path = resolve_path(&db_pool.write().unwrap().get()?, &upath, true)?; + let resolved_path = resolve_path(connection, &upath, true)?; let parent_dir = resolved_path.last().unwrap(); - let _pool = &db_pool.write().unwrap(); - let connection = _pool.get()?; connection.transaction::<_, Error, _>(|| { let dir_has_entry = Entry { entity: parent_dir.clone(), attribute: HIER_HAS_ATTR.to_string(), value: EntryValue::Address(Address::Hash(file_hash.clone())), }; - let dir_has_entry_addr = insert_entry(&connection, dir_has_entry)?; + let dir_has_entry_addr = insert_entry(connection, dir_has_entry)?; let name_entry = Entry { entity: dir_has_entry_addr, @@ -305,7 +319,7 @@ fn _process_directory_entry>( filename.as_os_str().to_string_lossy().to_string(), )), }; - insert_entry(&connection, name_entry)?; + insert_entry(connection, name_entry)?; Ok(UpdatePathOutcome::Added(path.clone())) })