diff --git a/Cargo.lock b/Cargo.lock index 77b02ee..6df21d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -346,6 +346,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom 0.2.3", + "once_cell", + "version_check 0.9.3", +] + [[package]] name = "aho-corasick" version = "0.7.18" @@ -1029,6 +1040,9 @@ name = "hashbrown" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +dependencies = [ + "ahash", +] [[package]] name = "heck" @@ -1243,6 +1257,15 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "lru" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c748cfe47cb8da225c37595b3108bea1c198c84aaae8ea0ba76d01dda9fc803" +dependencies = [ + "hashbrown", +] + [[package]] name = "lru-cache" version = "0.1.2" @@ -2410,6 +2433,7 @@ dependencies = [ "lexpr", "libsqlite3-sys", "log", + "lru", "nonempty", "once_cell", "rayon", diff --git a/Cargo.toml b/Cargo.toml index 7f189ff..e09799d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ rayon = "1.4.0" futures-util = "~0.3.12" lazy_static = "1.4.0" once_cell = "1.7.2" +lru = "0.7.0" diesel = { version = "1.4", features = ["sqlite", "r2d2", "chrono", "serde_json"] } diesel_migrations = "1.4" diff --git a/src/addressing.rs b/src/addressing.rs index 8534df3..5f3c6d3 100644 --- a/src/addressing.rs +++ b/src/addressing.rs @@ -10,7 +10,7 @@ use thiserror::private::DisplayAsDisplay; use unsigned_varint::encode; use uuid::Uuid; -#[derive(Clone, PartialEq)] +#[derive(Clone, Eq, PartialEq, Hash)] pub enum Address { Hash(Hash), Uuid(Uuid), diff --git a/src/database/hierarchies.rs b/src/database/hierarchies.rs index 91e8327..2f44105 100644 --- a/src/database/hierarchies.rs +++ b/src/database/hierarchies.rs @@ -1,9 +1,11 @@ use std::convert::TryFrom; +use std::sync::{Arc, Mutex}; use anyhow::{anyhow, Result}; use diesel::sqlite::Sqlite; use diesel::Connection; use log::trace; +use lru::LruCache; use serde_json::Value; use uuid::Uuid; @@ -15,7 +17,7 @@ use crate::database::entry::{Entry, EntryValue}; use crate::database::lang::{EntryQuery, Query, QueryComponent, QueryPart}; use crate::database::{bulk_retrieve_objects, insert_entry, query, DbPool}; -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct UNode(String); impl UNode { @@ -245,6 +247,38 @@ pub fn resolve_path>( Ok(result) } +pub type ResolveCache = LruCache<(Option
, UNode), Address>; + +pub fn resolve_path_cached>( + connection: &C, + path: &UPath, + create: bool, + cache: &Arc>, +) -> Result> { + let mut result: Vec
= vec![]; + let mut path_stack = path.0.to_vec(); + + path_stack.reverse(); + while !path_stack.is_empty() { + let node = path_stack.pop().unwrap(); + let parent = result.last().cloned(); + let key = (parent.clone(), node.clone()); + let mut cache_lock = cache.lock().unwrap(); + let cached_address = cache_lock.get(&key); + if let Some(address) = cached_address { + result.push(address.clone()); + } else { + drop(cache_lock); + let address = + fetch_or_create_dir(connection, parent, node, create)?; + result.push(address.clone()); + cache.lock().unwrap().put(key, address); + } + } + + Ok(result) +} + pub fn initialize_hier(pool: &DbPool) -> Result<()> { insert_entry(&pool.get()?, Entry::try_from(&*HIER_INVARIANT)?)?; upend_insert_addr!(&pool.get()?, HIER_ADDR, IS_OF_TYPE_ATTR, TYPE_ADDR); diff --git a/src/filesystem.rs b/src/filesystem.rs index 420d517..08594a3 100644 --- a/src/filesystem.rs +++ b/src/filesystem.rs @@ -1,6 +1,6 @@ use std::convert::TryFrom; use std::path::{Component, Path, PathBuf}; -use std::sync::{Arc, RwLock}; +use std::sync::{Arc, Mutex, RwLock}; use std::time::{Instant, UNIX_EPOCH}; use std::{fs, iter}; @@ -9,7 +9,7 @@ use crate::database::constants::{ HIER_HAS_ATTR, IS_OF_TYPE_ATTR, TYPE_ADDR, TYPE_BASE_ATTR, TYPE_HAS_ATTR, }; use crate::database::entry::{Entry, EntryValue, InvariantEntry}; -use crate::database::hierarchies::{resolve_path, UNode, UPath}; +use crate::database::hierarchies::{resolve_path_cached, ResolveCache, UNode, UPath}; use crate::database::inner::models; use crate::database::{ file_set_valid, insert_entry, insert_file, retrieve_all_files, DbPool, DATABASE_FILENAME, @@ -20,6 +20,7 @@ use anyhow::{Error, Result}; use chrono::prelude::*; use diesel::Connection; use log::{error, info, warn}; +use lru::LruCache; use rayon::prelude::*; use serde_json::Value; use walkdir::WalkDir; @@ -112,11 +113,18 @@ fn _rescan_vault>( // Actual processing let count = RwLock::new(0_usize); + let resolve_cache = Arc::new(Mutex::new(LruCache::new(256))); let total = path_entries.len() as f32; let path_results: Vec = path_entries .into_par_iter() .map(|path| { - let result = _process_directory_entry(&rw_pool, path, &absolute_path, &existing_files)?; + let result = _process_directory_entry( + &rw_pool, + &resolve_cache, + path, + &absolute_path, + &existing_files, + )?; let mut cnt = count.write().unwrap(); *cnt += 1; @@ -184,6 +192,7 @@ fn _rescan_vault>( fn _process_directory_entry>( db_pool: &Arc>, + resolve_cache: &Arc>, path: PathBuf, directory_path: &P, existing_files: &Arc>>, @@ -300,7 +309,7 @@ fn _process_directory_entry>( })) .collect(), ); - let resolved_path = resolve_path(connection, &upath, true)?; + let resolved_path = resolve_path_cached(connection, &upath, true, resolve_cache)?; let parent_dir = resolved_path.last().unwrap(); connection.transaction::<_, Error, _>(|| { diff --git a/src/util/hash.rs b/src/util/hash.rs index 38b70a5..58b0732 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -11,7 +11,7 @@ use serde::{ser, Serialize, Serializer}; use std::path::PathBuf; use tiny_keccak::{Hasher, KangarooTwelve}; -#[derive(Debug, Clone, PartialEq, FromSqlRow)] +#[derive(Debug, Clone, Eq, PartialEq, FromSqlRow, Hash)] pub struct Hash(pub Vec); impl AsRef<[u8]> for Hash {