add lru cache to resolve_path fn

feat/vaults
Tomáš Mládek 2021-12-04 18:33:40 +01:00
parent 4403f518c9
commit dd765ee94b
6 changed files with 75 additions and 7 deletions

24
Cargo.lock generated
View File

@ -346,6 +346,17 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom 0.2.3",
"once_cell",
"version_check 0.9.3",
]
[[package]]
name = "aho-corasick"
version = "0.7.18"
@ -1029,6 +1040,9 @@ name = "hashbrown"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
dependencies = [
"ahash",
]
[[package]]
name = "heck"
@ -1243,6 +1257,15 @@ dependencies = [
"cfg-if 1.0.0",
]
[[package]]
name = "lru"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c748cfe47cb8da225c37595b3108bea1c198c84aaae8ea0ba76d01dda9fc803"
dependencies = [
"hashbrown",
]
[[package]]
name = "lru-cache"
version = "0.1.2"
@ -2410,6 +2433,7 @@ dependencies = [
"lexpr",
"libsqlite3-sys",
"log",
"lru",
"nonempty",
"once_cell",
"rayon",

View File

@ -21,6 +21,7 @@ rayon = "1.4.0"
futures-util = "~0.3.12"
lazy_static = "1.4.0"
once_cell = "1.7.2"
lru = "0.7.0"
diesel = { version = "1.4", features = ["sqlite", "r2d2", "chrono", "serde_json"] }
diesel_migrations = "1.4"

View File

@ -10,7 +10,7 @@ use thiserror::private::DisplayAsDisplay;
use unsigned_varint::encode;
use uuid::Uuid;
#[derive(Clone, PartialEq)]
#[derive(Clone, Eq, PartialEq, Hash)]
pub enum Address {
Hash(Hash),
Uuid(Uuid),

View File

@ -1,9 +1,11 @@
use std::convert::TryFrom;
use std::sync::{Arc, Mutex};
use anyhow::{anyhow, Result};
use diesel::sqlite::Sqlite;
use diesel::Connection;
use log::trace;
use lru::LruCache;
use serde_json::Value;
use uuid::Uuid;
@ -15,7 +17,7 @@ use crate::database::entry::{Entry, EntryValue};
use crate::database::lang::{EntryQuery, Query, QueryComponent, QueryPart};
use crate::database::{bulk_retrieve_objects, insert_entry, query, DbPool};
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct UNode(String);
impl UNode {
@ -245,6 +247,38 @@ pub fn resolve_path<C: Connection<Backend = Sqlite>>(
Ok(result)
}
pub type ResolveCache = LruCache<(Option<Address>, UNode), Address>;
pub fn resolve_path_cached<C: Connection<Backend = Sqlite>>(
connection: &C,
path: &UPath,
create: bool,
cache: &Arc<Mutex<ResolveCache>>,
) -> Result<Vec<Address>> {
let mut result: Vec<Address> = vec![];
let mut path_stack = path.0.to_vec();
path_stack.reverse();
while !path_stack.is_empty() {
let node = path_stack.pop().unwrap();
let parent = result.last().cloned();
let key = (parent.clone(), node.clone());
let mut cache_lock = cache.lock().unwrap();
let cached_address = cache_lock.get(&key);
if let Some(address) = cached_address {
result.push(address.clone());
} else {
drop(cache_lock);
let address =
fetch_or_create_dir(connection, parent, node, create)?;
result.push(address.clone());
cache.lock().unwrap().put(key, address);
}
}
Ok(result)
}
pub fn initialize_hier(pool: &DbPool) -> Result<()> {
insert_entry(&pool.get()?, Entry::try_from(&*HIER_INVARIANT)?)?;
upend_insert_addr!(&pool.get()?, HIER_ADDR, IS_OF_TYPE_ATTR, TYPE_ADDR);

View File

@ -1,6 +1,6 @@
use std::convert::TryFrom;
use std::path::{Component, Path, PathBuf};
use std::sync::{Arc, RwLock};
use std::sync::{Arc, Mutex, RwLock};
use std::time::{Instant, UNIX_EPOCH};
use std::{fs, iter};
@ -9,7 +9,7 @@ use crate::database::constants::{
HIER_HAS_ATTR, IS_OF_TYPE_ATTR, TYPE_ADDR, TYPE_BASE_ATTR, TYPE_HAS_ATTR,
};
use crate::database::entry::{Entry, EntryValue, InvariantEntry};
use crate::database::hierarchies::{resolve_path, UNode, UPath};
use crate::database::hierarchies::{resolve_path_cached, ResolveCache, UNode, UPath};
use crate::database::inner::models;
use crate::database::{
file_set_valid, insert_entry, insert_file, retrieve_all_files, DbPool, DATABASE_FILENAME,
@ -20,6 +20,7 @@ use anyhow::{Error, Result};
use chrono::prelude::*;
use diesel::Connection;
use log::{error, info, warn};
use lru::LruCache;
use rayon::prelude::*;
use serde_json::Value;
use walkdir::WalkDir;
@ -112,11 +113,18 @@ fn _rescan_vault<T: AsRef<Path>>(
// Actual processing
let count = RwLock::new(0_usize);
let resolve_cache = Arc::new(Mutex::new(LruCache::new(256)));
let total = path_entries.len() as f32;
let path_results: Vec<UpdatePathResult> = path_entries
.into_par_iter()
.map(|path| {
let result = _process_directory_entry(&rw_pool, path, &absolute_path, &existing_files)?;
let result = _process_directory_entry(
&rw_pool,
&resolve_cache,
path,
&absolute_path,
&existing_files,
)?;
let mut cnt = count.write().unwrap();
*cnt += 1;
@ -184,6 +192,7 @@ fn _rescan_vault<T: AsRef<Path>>(
fn _process_directory_entry<P: AsRef<Path>>(
db_pool: &Arc<RwLock<DbPool>>,
resolve_cache: &Arc<Mutex<ResolveCache>>,
path: PathBuf,
directory_path: &P,
existing_files: &Arc<RwLock<Vec<models::File>>>,
@ -300,7 +309,7 @@ fn _process_directory_entry<P: AsRef<Path>>(
}))
.collect(),
);
let resolved_path = resolve_path(connection, &upath, true)?;
let resolved_path = resolve_path_cached(connection, &upath, true, resolve_cache)?;
let parent_dir = resolved_path.last().unwrap();
connection.transaction::<_, Error, _>(|| {

View File

@ -11,7 +11,7 @@ use serde::{ser, Serialize, Serializer};
use std::path::PathBuf;
use tiny_keccak::{Hasher, KangarooTwelve};
#[derive(Debug, Clone, PartialEq, FromSqlRow)]
#[derive(Debug, Clone, Eq, PartialEq, FromSqlRow, Hash)]
pub struct Hash(pub Vec<u8>);
impl AsRef<[u8]> for Hash {