small optimizations in fs import
parent
d34c1def43
commit
2448169d64
|
@ -4,7 +4,7 @@ use serde::Serialize;
|
|||
use super::schema::{data, files};
|
||||
use crate::util::hash::Hash;
|
||||
|
||||
#[derive(Queryable, Serialize, Debug)]
|
||||
#[derive(Queryable, Serialize, Clone, Debug)]
|
||||
pub struct File {
|
||||
pub id: i32,
|
||||
pub hash: Hash,
|
||||
|
|
|
@ -14,13 +14,12 @@ use crate::database::inner::models;
|
|||
use crate::database::{
|
||||
file_set_valid, insert_entry, insert_file, retrieve_all_files, DbPool, DATABASE_FILENAME,
|
||||
};
|
||||
use crate::util::hash::Hashable;
|
||||
use crate::util::hash::{Hash, Hashable};
|
||||
use crate::util::jobs::{Job, JobContainer, JobId, State};
|
||||
use anyhow::{anyhow, Error, Result};
|
||||
use anyhow::{Error, Result};
|
||||
use chrono::prelude::*;
|
||||
use diesel::Connection;
|
||||
use log::{error, info, warn};
|
||||
use once_cell::unsync::Lazy;
|
||||
use rayon::prelude::*;
|
||||
use serde_json::Value;
|
||||
use walkdir::WalkDir;
|
||||
|
@ -192,13 +191,13 @@ fn _process_directory_entry<P: AsRef<Path>>(
|
|||
info!("Processing: {:?}", path);
|
||||
|
||||
// Prepare the data
|
||||
let db_pool = Arc::clone(db_pool);
|
||||
let connection = &db_pool.write().unwrap().get()?;
|
||||
let existing_files = Arc::clone(existing_files);
|
||||
|
||||
let normalized_path = path.strip_prefix(&directory_path)?;
|
||||
let normalized_path_str = normalized_path.to_str().expect("path not valid unicode?!");
|
||||
|
||||
let file_hash = Lazy::new(|| path.hash());
|
||||
let mut file_hash: Option<Hash> = None;
|
||||
|
||||
// Get size & mtime for quick comparison
|
||||
let metadata = fs::metadata(&path)?;
|
||||
|
@ -214,33 +213,50 @@ fn _process_directory_entry<P: AsRef<Path>>(
|
|||
.ok();
|
||||
|
||||
// Check if the path entry for this file already exists in database
|
||||
{
|
||||
// Only grab existing_files for the duration of this block
|
||||
let mut existing_files = existing_files.write().unwrap();
|
||||
let existing_files_read = existing_files.read().unwrap();
|
||||
let maybe_existing_file = existing_files_read
|
||||
.iter()
|
||||
.find(|file| file.path == normalized_path_str)
|
||||
.clone();
|
||||
|
||||
let maybe_existing_file = existing_files
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_, file)| file.path == normalized_path_str);
|
||||
if let Some(existing_file) = maybe_existing_file {
|
||||
let existing_file = existing_file.clone();
|
||||
drop(existing_files_read);
|
||||
|
||||
let mut same = size == existing_file.size && mtime == existing_file.mtime;
|
||||
if !same {
|
||||
file_hash = Some(path.hash()?);
|
||||
same = file_hash.as_ref().unwrap() == &existing_file.hash;
|
||||
}
|
||||
|
||||
if same {
|
||||
if !existing_file.valid {
|
||||
file_set_valid(connection, existing_file.id, true)?;
|
||||
}
|
||||
|
||||
if let Some((idx, existing_file)) = maybe_existing_file {
|
||||
if (size == existing_file.size && mtime == existing_file.mtime)
|
||||
|| ((*file_hash).is_ok() && &existing_file.hash == (*file_hash).as_ref().unwrap())
|
||||
{
|
||||
if !existing_file.valid {
|
||||
file_set_valid(&db_pool.write().unwrap().get()?, existing_file.id, true)?;
|
||||
let mut existing_files_write = existing_files.write().unwrap();
|
||||
let maybe_existing_file = existing_files_write
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_, file)| file.path == normalized_path_str)
|
||||
.map(|(idx, _)| idx);
|
||||
|
||||
if let Some(idx) = maybe_existing_file {
|
||||
existing_files_write.swap_remove(idx);
|
||||
return Ok(UpdatePathOutcome::Unchanged(path));
|
||||
}
|
||||
existing_files.swap_remove(idx);
|
||||
return Ok(UpdatePathOutcome::Unchanged(path));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
drop(existing_files_read);
|
||||
}
|
||||
|
||||
// If not, add it!
|
||||
if let Err(err) = &*file_hash {
|
||||
return Err(anyhow!(format!("Error hashing: {}", err)));
|
||||
if file_hash.is_none() {
|
||||
file_hash = Some(path.hash()?);
|
||||
}
|
||||
let file_hash = (*file_hash).as_ref().unwrap().clone();
|
||||
let file_hash = file_hash.unwrap();
|
||||
|
||||
let new_file = models::NewFile {
|
||||
path: normalized_path_str.to_string(),
|
||||
|
@ -250,7 +266,7 @@ fn _process_directory_entry<P: AsRef<Path>>(
|
|||
mtime,
|
||||
};
|
||||
|
||||
insert_file(&db_pool.write().unwrap().get()?, new_file)?;
|
||||
insert_file(connection, new_file)?;
|
||||
|
||||
// Insert metadata
|
||||
let type_entry = Entry {
|
||||
|
@ -258,21 +274,21 @@ fn _process_directory_entry<P: AsRef<Path>>(
|
|||
attribute: String::from(IS_OF_TYPE_ATTR),
|
||||
value: EntryValue::Address(BLOB_TYPE_ADDR.clone()),
|
||||
};
|
||||
insert_entry(&db_pool.write().unwrap().get()?, type_entry)?;
|
||||
insert_entry(connection, type_entry)?;
|
||||
|
||||
let size_entry = Entry {
|
||||
entity: Address::Hash(file_hash.clone()),
|
||||
attribute: FILE_SIZE_KEY.to_string(),
|
||||
value: EntryValue::Value(Value::from(size)),
|
||||
};
|
||||
insert_entry(&db_pool.write().unwrap().get()?, size_entry)?;
|
||||
insert_entry(connection, size_entry)?;
|
||||
|
||||
let mime_entry = Entry {
|
||||
entity: Address::Hash(file_hash.clone()),
|
||||
attribute: FILE_MIME_KEY.to_string(),
|
||||
value: EntryValue::Value(Value::String(tree_magic::from_filepath(&path))),
|
||||
};
|
||||
insert_entry(&db_pool.write().unwrap().get()?, mime_entry)?;
|
||||
insert_entry(connection, mime_entry)?;
|
||||
|
||||
// Finally, add the appropriate entries w/r/t virtual filesystem location
|
||||
let components = normalized_path.components().collect::<Vec<Component>>();
|
||||
|
@ -285,18 +301,16 @@ fn _process_directory_entry<P: AsRef<Path>>(
|
|||
}))
|
||||
.collect(),
|
||||
);
|
||||
let resolved_path = resolve_path(&db_pool.write().unwrap().get()?, &upath, true)?;
|
||||
let resolved_path = resolve_path(connection, &upath, true)?;
|
||||
let parent_dir = resolved_path.last().unwrap();
|
||||
|
||||
let _pool = &db_pool.write().unwrap();
|
||||
let connection = _pool.get()?;
|
||||
connection.transaction::<_, Error, _>(|| {
|
||||
let dir_has_entry = Entry {
|
||||
entity: parent_dir.clone(),
|
||||
attribute: HIER_HAS_ATTR.to_string(),
|
||||
value: EntryValue::Address(Address::Hash(file_hash.clone())),
|
||||
};
|
||||
let dir_has_entry_addr = insert_entry(&connection, dir_has_entry)?;
|
||||
let dir_has_entry_addr = insert_entry(connection, dir_has_entry)?;
|
||||
|
||||
let name_entry = Entry {
|
||||
entity: dir_has_entry_addr,
|
||||
|
@ -305,7 +319,7 @@ fn _process_directory_entry<P: AsRef<Path>>(
|
|||
filename.as_os_str().to_string_lossy().to_string(),
|
||||
)),
|
||||
};
|
||||
insert_entry(&connection, name_entry)?;
|
||||
insert_entry(connection, name_entry)?;
|
||||
|
||||
Ok(UpdatePathOutcome::Added(path.clone()))
|
||||
})
|
||||
|
|
Loading…
Reference in New Issue