upend hierarchies - first step

separate hierarchies.rs module from filesystem.rs, simplify and fix path handling
feat/vaults
Tomáš Mládek 2021-08-18 11:06:36 +02:00
parent 0b45f29319
commit 816f04fc86
5 changed files with 336 additions and 339 deletions

View File

@ -9,10 +9,18 @@ pub const TYPE_ID_ATTR: &str = "TYPE_ID";
pub const TYPE_INSTANCES_ATTR: &str = "TYPE_INSTANCES";
pub const IS_OF_TYPE_ATTR: &str = "IS";
pub const HIER_TYPE: &str = "HIER";
pub const HIER_HAS_ATTR: &str = "HAS";
lazy_static! {
pub static ref TYPE_INVARIANT: InvariantEntry = InvariantEntry {
attribute: String::from(TYPE_IS_ATTR),
value: EntryValue::Value(serde_json::Value::from(TYPE_TYPE)),
};
pub static ref TYPE_ADDR: Address = TYPE_INVARIANT.entity().unwrap();
pub static ref HIER_INVARIANT: InvariantEntry = InvariantEntry {
attribute: String::from(TYPE_IS_ATTR),
value: EntryValue::Value(serde_json::Value::from(HIER_TYPE)),
};
pub static ref HIER_ADDR: Address = HIER_INVARIANT.entity().unwrap();
}

292
src/database/hierarchies.rs Normal file
View File

@ -0,0 +1,292 @@
use std::convert::TryFrom;
use anyhow::{anyhow, Result};
use diesel::sqlite::Sqlite;
use diesel::Connection;
use log::trace;
use serde_json::Value;
use uuid::Uuid;
use crate::addressing::Address;
use crate::database::constants::{
HIER_ADDR, HIER_HAS_ATTR, HIER_INVARIANT, IS_OF_TYPE_ATTR, TYPE_ADDR, TYPE_HAS_ATTR,
};
use crate::database::entry::{Entry, EntryValue};
use crate::database::lang::{EntryQuery, Query, QueryComponent, QueryPart};
use crate::database::{bulk_retrieve_objects, insert_entry, query, DbPool};
#[derive(Debug, Clone, PartialEq)]
pub struct UNode(pub String);
#[derive(Debug, Clone, PartialEq)]
pub struct UPath(pub Vec<UNode>);
impl std::str::FromStr for UPath {
type Err = anyhow::Error;
fn from_str(string: &str) -> Result<Self, Self::Err> {
if string.is_empty() {
Ok(UPath(vec![]))
} else {
let result: Vec<UNode> = string
.trim_end_matches('/')
.split('/')
.map(|part| UNode(String::from(part)))
.collect();
for directory in &result {
if directory.0.is_empty() {
return Err(anyhow!("INVALID PATH: Directory name cannot be empty!"));
}
}
Ok(UPath(result))
}
}
}
impl std::fmt::Display for UNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl std::fmt::Display for UPath {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
self.0
.iter()
.map(|node| node.to_string())
.collect::<Vec<String>>()
.join("/")
)
}
}
trait EntryList {
fn extract_addresses(&self) -> Vec<Address>;
}
impl EntryList for Vec<Entry> {
fn extract_addresses(&self) -> Vec<Address> {
self.iter()
.filter_map(|e| {
if let EntryValue::Address(address) = &e.value {
Some(address.clone())
} else {
None
}
})
.collect()
}
}
pub fn list_roots<C: Connection<Backend = Sqlite>>(connection: &C) -> Result<Vec<Address>> {
let all_directories: Vec<Entry> = query(
connection,
Query::SingleQuery(QueryPart::Matches(EntryQuery {
entity: QueryComponent::Any,
attribute: QueryComponent::Exact(IS_OF_TYPE_ATTR.to_string()),
value: QueryComponent::Exact(EntryValue::Address(HIER_ADDR.clone())),
})),
)?;
let directories_with_parents: Vec<Address> = query(
connection,
Query::SingleQuery(QueryPart::Matches(EntryQuery {
entity: QueryComponent::Any,
attribute: QueryComponent::Exact(HIER_HAS_ATTR.to_string()),
value: QueryComponent::Any,
})),
)?
.extract_addresses();
Ok(all_directories
.into_iter()
.filter(|entry| !directories_with_parents.contains(&entry.entity))
.map(|e| e.entity)
.collect())
}
pub async fn list_node<C: Connection<Backend = Sqlite>>(
connection: &C,
path: &UPath,
) -> Result<Vec<Entry>> {
let entry_addresses = if path.0.is_empty() {
list_roots(connection)?
} else {
let resolved_path: Vec<Address> = resolve_path(connection, path, false)?;
let last = resolved_path.last().unwrap();
query(
connection,
Query::SingleQuery(QueryPart::Matches(EntryQuery {
entity: QueryComponent::Exact(last.clone()),
attribute: QueryComponent::Exact(HIER_HAS_ATTR.to_string()),
value: QueryComponent::Any,
})),
)?
.extract_addresses()
};
Ok(bulk_retrieve_objects(connection, entry_addresses)?
.into_iter()
// .filter(|e| [DIR_KEY, FILENAME_KEY, FILE_IDENTITY_KEY].contains(&e.attribute.as_str()))
.collect::<Vec<Entry>>())
}
pub fn fetch_or_create_dir<C: Connection<Backend = Sqlite>>(
connection: &C,
parent: Option<Address>,
directory: UNode,
create: bool,
) -> Result<Address> {
match parent.clone() {
Some(address) => trace!("FETCHING/CREATING {}/{:#}", address, directory),
None => trace!("FETCHING/CREATING /{:#}", directory),
}
let matching_directories: Vec<Address> = query(
connection,
Query::SingleQuery(QueryPart::Matches(EntryQuery {
entity: QueryComponent::Any,
attribute: QueryComponent::Exact(String::from(HIER_HAS_ATTR)),
value: QueryComponent::Exact(EntryValue::Value(Value::String(directory.0.clone()))),
})),
)?
.into_iter()
.map(|e: Entry| e.entity)
.collect();
let valid_directories: Vec<Address> = match parent.clone() {
Some(address) => {
let parent_has: Vec<Address> = query(
connection,
Query::SingleQuery(QueryPart::Matches(EntryQuery {
entity: QueryComponent::Exact(address),
attribute: QueryComponent::Exact(String::from(HIER_HAS_ATTR)),
value: QueryComponent::Any,
})),
)?
.extract_addresses();
matching_directories
.into_iter()
.filter(|a| parent_has.contains(a))
.collect()
}
None => {
let roots = list_roots(connection)?;
matching_directories
.into_iter()
.filter(|a| roots.contains(a))
.collect()
}
};
match valid_directories.len() {
0 => {
if create {
let new_directory_address = Address::Uuid(Uuid::new_v4());
let type_entry = Entry {
entity: new_directory_address.clone(),
attribute: String::from(IS_OF_TYPE_ATTR),
value: EntryValue::Address(HIER_ADDR.clone()),
};
insert_entry(connection, type_entry)?;
let directory_entry = Entry {
entity: new_directory_address.clone(),
attribute: String::from(HIER_HAS_ATTR),
value: EntryValue::Value(Value::String(directory.0)),
};
insert_entry(connection, directory_entry)?;
if let Some(parent_addr) = parent {
let has_entry = Entry {
entity: parent_addr,
attribute: String::from(HIER_HAS_ATTR),
value: EntryValue::Address(new_directory_address.clone()),
};
insert_entry(connection, has_entry)?;
}
Ok(new_directory_address)
} else {
Err(anyhow!("Directory does not exist."))
}
}
1 => Ok(valid_directories[0].clone()),
_ => Err(anyhow!(
"Invalid database state - more than one directory matches the query!"
)),
}
}
pub fn resolve_path<C: Connection<Backend = Sqlite>>(
connection: &C,
path: &UPath,
create: bool,
) -> Result<Vec<Address>> {
let mut result: Vec<Address> = vec![];
let mut path_stack = path.0.to_vec();
path_stack.reverse();
while !path_stack.is_empty() {
let dir_address = fetch_or_create_dir(
connection,
result.last().cloned(),
path_stack.pop().unwrap(),
create,
)?;
result.push(dir_address);
}
Ok(result)
}
pub fn initialize_hier(pool: &DbPool) -> Result<()> {
insert_entry(&pool.get()?, Entry::try_from(&*HIER_INVARIANT)?)?;
upend_insert_addr!(&pool.get()?, HIER_ADDR, IS_OF_TYPE_ATTR, TYPE_ADDR);
upend_insert_val!(&pool.get()?, HIER_ADDR, TYPE_HAS_ATTR, HIER_HAS_ATTR);
Ok(())
}
#[cfg(test)]
mod tests {
use anyhow::Result;
use super::{UNode, UPath};
#[test]
fn test_path_codec() {
let path = UPath(vec![
UNode("top".to_string()),
UNode("foo".to_string()),
UNode("bar".to_string()),
UNode("baz".to_string()),
]);
let str_path = path.to_string();
assert!(str_path.len() > 0);
let decoded_path: Result<UPath> = str_path.parse();
assert!(decoded_path.is_ok());
assert_eq!(path, decoded_path.unwrap());
}
#[test]
fn test_validation() {
let valid_path: Result<UPath> = "a/b/c/d/e/f/g".parse();
assert!(valid_path.is_ok());
let invalid_path: Result<UPath> = "a/b/c//d/e/f/g".parse();
assert!(invalid_path.is_err());
let invalid_path: Result<UPath> = "a//b/c//d/e/f///g".parse();
assert!(invalid_path.is_err());
}
}

View File

@ -5,6 +5,7 @@ mod macros;
pub mod constants;
pub mod entry;
pub mod hierarchies;
pub mod inner;
pub mod lang;
@ -24,6 +25,7 @@ use diesel::prelude::*;
use diesel::r2d2::{self, ConnectionManager};
use diesel::result::{DatabaseErrorKind, Error};
use diesel::sqlite::{Sqlite, SqliteConnection};
use hierarchies::initialize_hier;
use log::{debug, trace};
use std::convert::TryFrom;
use std::fs;
@ -132,6 +134,7 @@ pub fn bulk_retrieve_objects<C: Connection<Backend = Sqlite>>(
)
// .or_filter(value.eq(EntryValue::Address(object_address).to_str()?))
.load::<models::Entry>(connection)?;
let entries = matches
.iter()
.map(Entry::try_from)
@ -270,6 +273,7 @@ pub fn open_upend<P: AsRef<Path>>(
trace!("Initializing types...");
initialize_types(&pool)?;
initialize_hier(&pool)?;
Ok(OpenResult { pool, new })
}

View File

@ -1,36 +1,34 @@
use crate::addressing::Address;
use crate::database::constants::{
IS_OF_TYPE_ATTR, TYPE_ADDR, TYPE_HAS_ATTR, TYPE_ID_ATTR, TYPE_INSTANCES_ATTR, TYPE_IS_ATTR,
TYPE_REQUIRES_ATTR,
};
use crate::database::entry::{Entry, EntryValue, InvariantEntry};
use crate::database::inner::models;
use crate::database::lang::{EntryQuery, Query, QueryComponent, QueryPart};
use crate::database::{
bulk_retrieve_objects, file_set_valid, insert_entry, insert_file, query, retrieve_all_files,
DbPool, DATABASE_FILENAME,
};
use crate::util::hash::Hashable;
use crate::util::jobs::{Job, JobContainer, JobId, State};
use anyhow::{anyhow, Error, Result};
use chrono::prelude::*;
use diesel::sqlite::Sqlite;
use diesel::Connection;
use log::{error, info, trace, warn};
use once_cell::unsync::Lazy;
use rayon::prelude::*;
use serde_json::Value;
use std::convert::TryFrom;
use std::path::{Component, Path, PathBuf};
use std::sync::{Arc, RwLock};
use std::time::{Instant, UNIX_EPOCH};
use std::{fs, iter};
use crate::addressing::Address;
use crate::database::constants::{
HIER_HAS_ATTR, IS_OF_TYPE_ATTR, TYPE_ADDR, TYPE_HAS_ATTR, TYPE_ID_ATTR, TYPE_INSTANCES_ATTR,
TYPE_IS_ATTR, TYPE_REQUIRES_ATTR,
};
use crate::database::entry::{Entry, EntryValue, InvariantEntry};
use crate::database::hierarchies::{resolve_path, UNode, UPath};
use crate::database::inner::models;
use crate::database::{
file_set_valid, insert_entry, insert_file, retrieve_all_files, DbPool, DATABASE_FILENAME,
};
use crate::util::hash::Hashable;
use crate::util::jobs::{Job, JobContainer, JobId, State};
use anyhow::{anyhow, Error, Result};
use chrono::prelude::*;
use diesel::Connection;
use log::{error, info, warn};
use once_cell::unsync::Lazy;
use rayon::prelude::*;
use serde_json::Value;
use uuid::Uuid;
use walkdir::WalkDir;
const DIR_TYPE: &str = "FS_DIR";
const DIR_KEY: &str = "DIR";
const DIR_HAS_KEY: &str = "DIR_HAS";
lazy_static! {
static ref DIR_TYPE_INVARIANT: InvariantEntry = InvariantEntry {
attribute: String::from(TYPE_IS_ATTR),
@ -89,277 +87,11 @@ fn initialize_types(pool: &DbPool) -> Result<()> {
insert_entry(&pool.get()?, Entry::try_from(&*DIR_TYPE_INVARIANT)?)?;
upend_insert_addr!(&pool.get()?, DIR_TYPE_ADDR, IS_OF_TYPE_ATTR, TYPE_ADDR);
upend_insert_val!(&pool.get()?, DIR_TYPE_ADDR, TYPE_ID_ATTR, DIR_KEY);
upend_insert_val!(&pool.get()?, DIR_TYPE_ADDR, TYPE_HAS_ATTR, DIR_HAS_KEY);
upend_insert_val!(&pool.get()?, DIR_TYPE_ADDR, TYPE_HAS_ATTR, HIER_HAS_ATTR);
Ok(())
}
#[derive(Debug, Clone, PartialEq)]
pub struct UDirectory {
name: String,
}
#[derive(Debug, Clone, PartialEq)]
pub struct UPath(Vec<UDirectory>);
const TOP_SEPARATOR: &str = "//";
impl std::str::FromStr for UPath {
type Err = anyhow::Error;
fn from_str(string: &str) -> Result<Self, Self::Err> {
if string.is_empty() {
Ok(UPath(vec![]))
} else {
let result = match string.find(TOP_SEPARATOR) {
Some(head_idx) => {
let (head, rest) = string.split_at(head_idx);
let mut result: Vec<UDirectory> = vec![UDirectory {
name: String::from(head),
}];
result.append(
rest[TOP_SEPARATOR.len()..rest.len()]
.trim_end_matches('/')
.split('/')
.map(|part| UDirectory {
name: String::from(part),
})
.collect::<Vec<UDirectory>>()
.as_mut(),
);
result
}
None => string
.trim_end_matches('/')
.split('/')
.map(|part| UDirectory {
name: String::from(part),
})
.collect(),
};
for directory in &result {
if directory.name.is_empty() {
return Err(anyhow!("INVALID PATH: Directory name cannot be empty!"));
}
}
Ok(UPath(result))
}
}
}
impl std::fmt::Display for UDirectory {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.name)
}
}
impl std::fmt::Display for UPath {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.0.len() {
0 => write!(f, ""),
1 => write!(f, "{}", self.0.first().unwrap().name),
_ => {
let (head, tail) = self.0.split_first().unwrap();
write!(
f,
"{}//{}",
head.name,
tail.iter()
.map(|udir| udir.name.clone())
.collect::<Vec<String>>()
.join("/")
)
}
}
}
}
trait EntryList {
fn extract_addresses(&self) -> Vec<Address>;
}
impl EntryList for Vec<Entry> {
fn extract_addresses(&self) -> Vec<Address> {
self.iter()
.filter_map(|e| {
if let EntryValue::Address(address) = &e.value {
Some(address.clone())
} else {
None
}
})
.collect()
}
}
pub fn list_roots<C: Connection<Backend = Sqlite>>(connection: &C) -> Result<Vec<Address>> {
let all_directories: Vec<Entry> = query(
connection,
Query::SingleQuery(QueryPart::Matches(EntryQuery {
entity: QueryComponent::Any,
attribute: QueryComponent::Exact(DIR_KEY.to_string()),
value: QueryComponent::Any,
})),
)?;
let directories_with_parents: Vec<Address> = query(
connection,
Query::SingleQuery(QueryPart::Matches(EntryQuery {
entity: QueryComponent::Any,
attribute: QueryComponent::Exact(DIR_HAS_KEY.to_string()),
value: QueryComponent::Any,
})),
)?
.extract_addresses();
Ok(all_directories
.into_iter()
.filter(|entry| !directories_with_parents.contains(&entry.entity))
.map(|e| e.entity)
.collect())
}
pub async fn list_directory<C: Connection<Backend = Sqlite>>(
connection: &C,
path: &UPath,
) -> Result<Vec<Entry>> {
let entry_addresses = match path.0.len() {
0 => list_roots(connection)?,
_ => {
let resolved_path: Vec<Address> = resolve_path(connection, path, false)?;
let last = resolved_path.last().unwrap();
query(
connection,
Query::SingleQuery(QueryPart::Matches(EntryQuery {
entity: QueryComponent::Exact(last.clone()),
attribute: QueryComponent::Exact(DIR_HAS_KEY.to_string()),
value: QueryComponent::Any,
})),
)?
.extract_addresses()
}
};
Ok(bulk_retrieve_objects(connection, entry_addresses)?
.into_iter()
.filter(|e| [DIR_KEY, FILENAME_KEY, FILE_IDENTITY_KEY].contains(&e.attribute.as_str()))
.collect::<Vec<Entry>>())
}
pub fn fetch_or_create_dir<C: Connection<Backend = Sqlite>>(
connection: &C,
parent: Option<Address>,
directory: UDirectory,
create: bool,
) -> Result<Address> {
match parent.clone() {
Some(address) => trace!("FETCHING/CREATING {}/{:#}", address, directory),
None => trace!("FETCHING/CREATING /{:#}", directory),
}
let matching_directories: Vec<Address> = query(
connection,
Query::SingleQuery(QueryPart::Matches(EntryQuery {
entity: QueryComponent::Any,
attribute: QueryComponent::Exact(String::from(DIR_KEY)),
value: QueryComponent::Exact(EntryValue::Value(Value::String(directory.name.clone()))),
})),
)?
.into_iter()
.map(|e: Entry| e.entity)
.collect();
let valid_directories: Vec<Address> = match parent.clone() {
Some(address) => {
let parent_has: Vec<Address> = query(
connection,
Query::SingleQuery(QueryPart::Matches(EntryQuery {
entity: QueryComponent::Exact(address),
attribute: QueryComponent::Exact(String::from(DIR_HAS_KEY)),
value: QueryComponent::Any,
})),
)?
.extract_addresses();
matching_directories
.into_iter()
.filter(|a| parent_has.contains(a))
.collect()
}
None => {
let roots = list_roots(connection)?;
matching_directories
.into_iter()
.filter(|a| roots.contains(a))
.collect()
}
};
match valid_directories.len() {
0 => {
if create {
let new_directory_address = Address::Uuid(Uuid::new_v4());
let type_entry = Entry {
entity: new_directory_address.clone(),
attribute: String::from(IS_OF_TYPE_ATTR),
value: EntryValue::Address(DIR_TYPE_ADDR.clone()),
};
insert_entry(connection, type_entry)?;
let directory_entry = Entry {
entity: new_directory_address.clone(),
attribute: String::from(DIR_KEY),
value: EntryValue::Value(Value::String(directory.name)),
};
insert_entry(connection, directory_entry)?;
if let Some(parent_addr) = parent {
let has_entry = Entry {
entity: parent_addr,
attribute: String::from(DIR_HAS_KEY),
value: EntryValue::Address(new_directory_address.clone()),
};
insert_entry(connection, has_entry)?;
}
Ok(new_directory_address)
} else {
Err(anyhow!("Directory does not exist."))
}
}
1 => Ok(valid_directories[0].clone()),
_ => Err(anyhow!(
"Invalid database state - more than one directory matches the query!"
)),
}
}
pub fn resolve_path<C: Connection<Backend = Sqlite>>(
connection: &C,
path: &UPath,
create: bool,
) -> Result<Vec<Address>> {
let mut result: Vec<Address> = vec![];
let mut path_stack = path.0.to_vec();
path_stack.reverse();
while !path_stack.is_empty() {
let dir_address = fetch_or_create_dir(
connection,
result.last().cloned(),
path_stack.pop().unwrap(),
create,
)?;
result.push(dir_address);
}
Ok(result)
}
pub async fn rescan_vault(
pool: DbPool,
directory: PathBuf,
@ -603,13 +335,13 @@ fn _process_directory_entry<P: AsRef<Path>>(
let (filename, dir_path) = components.split_last().unwrap();
let upath = UPath(
iter::once(UDirectory {
name: "NATIVE".to_string(),
})
.chain(dir_path.iter().map(|component| UDirectory {
name: component.as_os_str().to_string_lossy().to_string(),
}))
.collect(),
iter::once(UNode("NATIVE".to_string()))
.chain(
dir_path
.iter()
.map(|component| UNode(component.as_os_str().to_string_lossy().to_string())),
)
.collect(),
);
let resolved_path = resolve_path(&db_pool.write().unwrap().get()?, &upath, true)?;
let parent_dir = resolved_path.last().unwrap();
@ -643,7 +375,7 @@ fn _process_directory_entry<P: AsRef<Path>>(
let dir_has_entry = Entry {
entity: parent_dir.clone(),
attribute: DIR_HAS_KEY.to_string(),
attribute: HIER_HAS_ATTR.to_string(),
value: EntryValue::Address(file_address),
};
insert_entry(&connection, dir_has_entry)?;
@ -651,42 +383,3 @@ fn _process_directory_entry<P: AsRef<Path>>(
Ok(UpdatePathOutcome::Added(path.clone()))
})
}
#[cfg(test)]
mod tests {
use anyhow::Result;
use crate::filesystem::{UDirectory, UPath};
#[test]
fn test_path_codec() {
let path = UPath(vec![
UDirectory {
name: "top".to_string(),
},
UDirectory {
name: "foo".to_string(),
},
UDirectory {
name: "bar".to_string(),
},
UDirectory {
name: "baz".to_string(),
},
]);
let str_path = path.to_string();
assert!(str_path.len() > 0);
let decoded_path: Result<UPath> = str_path.parse();
assert!(decoded_path.is_ok());
assert_eq!(path, decoded_path.unwrap());
}
#[test]
fn test_validation() {
let invalid_path: Result<UPath> = "a//b/c//d/e/f///g".parse();
assert!(invalid_path.is_err())
}
}

View File

@ -1,10 +1,10 @@
use crate::addressing::{Address, Addressable};
use crate::database::entry::{Entry, InEntry};
use crate::database::hierarchies::{list_node, UPath};
use crate::database::lang::Query;
use crate::database::{
get_latest_files, insert_entry, query, remove_object, retrieve_file, retrieve_object, DbPool,
};
use crate::filesystem::{list_directory, UPath};
use crate::util::hash::{decode, encode};
use crate::util::jobs::JobContainer;
use actix_files::NamedFile;
@ -173,7 +173,7 @@ pub async fn list_hier(
) -> Result<HttpResponse, Error> {
let connection = state.db_pool.get().map_err(ErrorInternalServerError)?;
let upath: UPath = path.into_inner().parse().map_err(ErrorBadRequest)?;
let entries: Vec<Entry> = list_directory(&connection, &upath)
let entries: Vec<Entry> = list_node(&connection, &upath)
.await
.map_err(ErrorNotFound)?; // todo: 500 if actual error occurs