2020-09-12 14:27:45 +02:00
|
|
|
use crate::addressing::Address;
|
2020-09-12 22:50:14 +02:00
|
|
|
use crate::database::{
|
|
|
|
DbExecutor, Entry, EntryValue, InnerEntry, InsertEntry, QueryEntries, RetrieveByHash,
|
|
|
|
};
|
|
|
|
use crate::hash::{ComputeHash, HasherWorker};
|
2020-09-12 14:27:45 +02:00
|
|
|
use crate::models;
|
|
|
|
use anyhow::{anyhow, Result};
|
2020-09-12 22:50:14 +02:00
|
|
|
use log::{info, trace, warn};
|
2020-09-12 14:27:45 +02:00
|
|
|
use serde::export::Formatter;
|
|
|
|
use serde_json::Value;
|
2020-09-12 22:50:14 +02:00
|
|
|
use std::path::{Component, Path, PathBuf};
|
|
|
|
use std::{fs, iter};
|
2020-09-12 14:27:45 +02:00
|
|
|
use walkdir::WalkDir;
|
2020-08-27 00:11:50 +02:00
|
|
|
|
|
|
|
use actix::prelude::*;
|
2020-09-06 12:32:17 +02:00
|
|
|
use chrono::prelude::*;
|
2020-09-12 14:27:45 +02:00
|
|
|
use uuid::Uuid;
|
|
|
|
|
|
|
|
const DIR_KEY: &str = "DIR";
|
|
|
|
const DIR_HAS_KEY: &str = "DIR_HAS";
|
2020-09-12 23:07:50 +02:00
|
|
|
|
|
|
|
const FILE_IDENTITY_KEY: &str = "FILE_IS";
|
|
|
|
const FILENAME_KEY: &str = "FILE_NAME";
|
2020-08-27 00:11:50 +02:00
|
|
|
|
2020-09-12 22:50:14 +02:00
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
2020-09-12 14:27:45 +02:00
|
|
|
pub struct UDirectory {
|
|
|
|
name: String,
|
|
|
|
}
|
2020-08-27 00:11:50 +02:00
|
|
|
|
2020-09-12 22:50:14 +02:00
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
2020-09-12 14:27:45 +02:00
|
|
|
pub struct UPath(Vec<UDirectory>);
|
|
|
|
|
|
|
|
const TOP_SEPARATOR: &str = "//";
|
|
|
|
|
|
|
|
impl std::str::FromStr for UPath {
|
|
|
|
type Err = anyhow::Error;
|
|
|
|
|
|
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
|
|
if s.len() == 0 {
|
|
|
|
Ok(UPath(vec![]))
|
|
|
|
} else {
|
|
|
|
match s.find(TOP_SEPARATOR) {
|
|
|
|
Some(head_idx) => {
|
|
|
|
let (head, rest) = s.split_at(head_idx);
|
|
|
|
let mut result: Vec<UDirectory> = Vec::new();
|
|
|
|
result.push(UDirectory {
|
|
|
|
name: String::from(head),
|
|
|
|
});
|
|
|
|
result.append(
|
|
|
|
rest[TOP_SEPARATOR.len()..rest.len()]
|
|
|
|
.split("/")
|
|
|
|
.map(|part| UDirectory {
|
|
|
|
name: String::from(part),
|
|
|
|
})
|
|
|
|
.collect::<Vec<UDirectory>>()
|
|
|
|
.as_mut(),
|
|
|
|
);
|
|
|
|
Ok(UPath(result))
|
|
|
|
}
|
|
|
|
None => Ok(UPath(
|
|
|
|
s.split("/")
|
|
|
|
.map(|part| UDirectory {
|
|
|
|
name: String::from(part),
|
|
|
|
})
|
|
|
|
.collect(),
|
|
|
|
)),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl std::fmt::Display for UDirectory {
|
|
|
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
|
|
write!(f, "{}", self.name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl std::fmt::Display for UPath {
|
|
|
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
|
|
match self.0.len() {
|
|
|
|
0 => write!(f, ""),
|
|
|
|
1 => write!(f, "{}", self.0.first().unwrap().name),
|
|
|
|
_ => {
|
|
|
|
let (head, tail) = self.0.split_first().unwrap();
|
|
|
|
write!(
|
|
|
|
f,
|
|
|
|
"{}//{}",
|
|
|
|
head.name,
|
|
|
|
tail.iter()
|
|
|
|
.map(|udir| udir.name.clone())
|
|
|
|
.collect::<Vec<String>>()
|
|
|
|
.join("/")
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub async fn fetch_or_create_dir(
|
|
|
|
db_executor: &Addr<crate::database::DbExecutor>,
|
|
|
|
parent: Option<Address>,
|
|
|
|
directory: UDirectory,
|
|
|
|
) -> Result<Address> {
|
2020-09-12 22:50:14 +02:00
|
|
|
match parent.clone() {
|
|
|
|
Some(address) => trace!("FETCHING/CREATING {}/{:#}", address, directory),
|
|
|
|
None => trace!("FETCHING/CREATING /{:#}", directory),
|
|
|
|
}
|
|
|
|
|
2020-09-12 14:27:45 +02:00
|
|
|
let dir_value = EntryValue::Value(Value::String(directory.name));
|
|
|
|
let directories: Vec<Address> = db_executor
|
|
|
|
.send(QueryEntries {
|
|
|
|
target: None,
|
|
|
|
key: Some(String::from(DIR_KEY)),
|
|
|
|
value: Some(dir_value.clone()),
|
|
|
|
})
|
|
|
|
.await??
|
|
|
|
.into_iter()
|
|
|
|
.map(|e: Entry| e.target)
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
let valid_directories: Vec<Address> = match parent.clone() {
|
|
|
|
Some(address) => {
|
|
|
|
let parent_has: Vec<Address> = db_executor
|
|
|
|
.send(QueryEntries {
|
|
|
|
target: Some(address),
|
|
|
|
key: Some(String::from(DIR_HAS_KEY)),
|
|
|
|
value: None,
|
|
|
|
})
|
|
|
|
.await??
|
|
|
|
.into_iter()
|
2020-09-12 22:50:14 +02:00
|
|
|
.filter_map(|e: Entry| {
|
|
|
|
if let EntryValue::Address(address) = e.value {
|
|
|
|
Some(address)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
})
|
2020-09-12 14:27:45 +02:00
|
|
|
.collect();
|
|
|
|
|
2020-09-12 22:50:14 +02:00
|
|
|
let valid = directories
|
2020-09-12 14:27:45 +02:00
|
|
|
.into_iter()
|
|
|
|
.filter(|a| parent_has.contains(a))
|
2020-09-12 22:50:14 +02:00
|
|
|
.collect();
|
|
|
|
|
|
|
|
valid
|
2020-09-12 14:27:45 +02:00
|
|
|
}
|
|
|
|
None => directories,
|
|
|
|
};
|
|
|
|
|
|
|
|
match valid_directories.len() {
|
|
|
|
0 => {
|
|
|
|
let new_directory_address = Address::UUID(Uuid::new_v4());
|
|
|
|
let directory_entry = InnerEntry {
|
|
|
|
target: new_directory_address.clone(),
|
|
|
|
key: String::from(DIR_KEY),
|
|
|
|
value: dir_value,
|
|
|
|
};
|
|
|
|
let _ = db_executor
|
|
|
|
.send(InsertEntry {
|
|
|
|
entry: directory_entry,
|
|
|
|
})
|
|
|
|
.await??;
|
|
|
|
|
|
|
|
if parent.is_some() {
|
|
|
|
let has_entry = InnerEntry {
|
|
|
|
target: parent.unwrap(),
|
|
|
|
key: String::from(DIR_HAS_KEY),
|
|
|
|
value: EntryValue::Address(new_directory_address.clone()),
|
|
|
|
};
|
|
|
|
let _ = db_executor.send(InsertEntry { entry: has_entry }).await??;
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(new_directory_address)
|
|
|
|
}
|
|
|
|
1 => Ok(valid_directories[0].clone()),
|
|
|
|
_ => Err(anyhow!(
|
|
|
|
"Invalid database state - more than one directory matches the query!"
|
|
|
|
)),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-12 22:50:14 +02:00
|
|
|
pub async fn resolve_path_with_parents(
|
|
|
|
db_executor: &Addr<DbExecutor>,
|
|
|
|
path: &UPath,
|
|
|
|
) -> Result<Vec<Address>> {
|
|
|
|
let mut result: Vec<Address> = vec![];
|
|
|
|
let mut path_stack = path.0.to_vec();
|
|
|
|
|
|
|
|
path_stack.reverse();
|
|
|
|
while path_stack.len() > 0 {
|
|
|
|
let dir_address = fetch_or_create_dir(
|
|
|
|
db_executor,
|
|
|
|
result.last().cloned(),
|
|
|
|
path_stack.pop().unwrap(),
|
|
|
|
)
|
|
|
|
.await?;
|
|
|
|
result.push(dir_address);
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(result)
|
2020-09-12 14:27:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
async fn _reimport_directory<T: AsRef<Path>>(
|
2020-08-27 00:11:50 +02:00
|
|
|
directory: T,
|
|
|
|
db_executor: &Addr<crate::database::DbExecutor>,
|
|
|
|
hasher_worker: &Addr<HasherWorker>,
|
|
|
|
) -> Result<()> {
|
2020-09-12 22:50:14 +02:00
|
|
|
for path in WalkDir::new(&directory)
|
2020-08-27 00:11:50 +02:00
|
|
|
.into_iter()
|
|
|
|
.filter_map(|e| e.ok())
|
|
|
|
.filter(|e| e.path().is_file())
|
2020-09-12 22:50:14 +02:00
|
|
|
.map(|e| e.into_path())
|
2020-08-27 00:11:50 +02:00
|
|
|
{
|
2020-09-12 22:50:14 +02:00
|
|
|
info!("Processing: {:?}", path);
|
2020-08-27 00:11:50 +02:00
|
|
|
|
2020-09-12 22:50:14 +02:00
|
|
|
let metadata = fs::metadata(&path)?;
|
2020-08-27 00:11:50 +02:00
|
|
|
let size = metadata.len() as i64;
|
|
|
|
if size < 0 {
|
2020-09-12 22:50:14 +02:00
|
|
|
panic!("File {} too large?!", path.display());
|
2020-08-27 00:11:50 +02:00
|
|
|
}
|
|
|
|
|
2020-09-12 22:50:14 +02:00
|
|
|
let digest = hasher_worker
|
|
|
|
.send(ComputeHash {
|
|
|
|
path: path.to_path_buf(),
|
|
|
|
})
|
|
|
|
.await??;
|
2020-08-27 00:11:50 +02:00
|
|
|
|
2020-09-12 22:50:14 +02:00
|
|
|
let existing_file: Option<String> = db_executor
|
|
|
|
.send(RetrieveByHash {
|
|
|
|
hash: digest.clone(),
|
|
|
|
})
|
|
|
|
.await??;
|
|
|
|
|
|
|
|
if existing_file.is_none() {
|
|
|
|
let new_file = models::NewFile {
|
|
|
|
path: path.to_str().expect("path not valid unicode?!").to_string(),
|
|
|
|
hash: (digest.clone()).0,
|
|
|
|
created: NaiveDateTime::from_timestamp(Utc::now().timestamp(), 0),
|
|
|
|
size,
|
|
|
|
};
|
|
|
|
|
|
|
|
db_executor
|
|
|
|
.send(crate::database::InsertFile { file: new_file })
|
|
|
|
.await??;
|
|
|
|
}
|
|
|
|
|
|
|
|
let components = path.components().collect::<Vec<Component>>();
|
|
|
|
let (filename, dir_path) = components.split_last().unwrap();
|
|
|
|
|
2020-09-12 23:07:50 +02:00
|
|
|
let file_address = Address::UUID(Uuid::new_v4());
|
|
|
|
|
2020-09-12 22:50:14 +02:00
|
|
|
let name_entry = InnerEntry {
|
2020-09-12 23:07:50 +02:00
|
|
|
target: file_address.clone(),
|
2020-09-12 22:50:14 +02:00
|
|
|
key: FILENAME_KEY.to_string(),
|
|
|
|
value: EntryValue::Value(Value::String(
|
|
|
|
filename.as_os_str().to_string_lossy().to_string(),
|
|
|
|
)),
|
2020-08-27 00:11:50 +02:00
|
|
|
};
|
2020-09-12 22:50:14 +02:00
|
|
|
db_executor
|
|
|
|
.send(crate::database::InsertEntry { entry: name_entry })
|
|
|
|
.await??;
|
2020-08-27 00:11:50 +02:00
|
|
|
|
2020-09-12 23:07:50 +02:00
|
|
|
let identity_entry = InnerEntry {
|
|
|
|
target: file_address.clone(),
|
|
|
|
key: FILE_IDENTITY_KEY.to_string(),
|
|
|
|
value: EntryValue::Address(Address::Hash(digest.clone())),
|
|
|
|
};
|
|
|
|
db_executor
|
|
|
|
.send(crate::database::InsertEntry {
|
|
|
|
entry: identity_entry,
|
|
|
|
})
|
|
|
|
.await??;
|
|
|
|
|
2020-09-12 22:50:14 +02:00
|
|
|
let upath = UPath(
|
|
|
|
iter::once(UDirectory {
|
|
|
|
name: "NATIVE".to_string(),
|
|
|
|
})
|
|
|
|
.chain(dir_path.iter().map(|component| UDirectory {
|
|
|
|
name: component.as_os_str().to_string_lossy().to_string(),
|
|
|
|
}))
|
|
|
|
.collect(),
|
|
|
|
);
|
|
|
|
let resolved_path = resolve_path_with_parents(db_executor, &upath).await?;
|
|
|
|
let parent_dir = resolved_path.last().unwrap();
|
|
|
|
let dir_has_entry = InnerEntry {
|
|
|
|
target: parent_dir.clone(),
|
|
|
|
key: DIR_HAS_KEY.to_string(),
|
2020-09-12 23:07:50 +02:00
|
|
|
value: EntryValue::Address(file_address),
|
2020-09-12 22:50:14 +02:00
|
|
|
};
|
|
|
|
db_executor
|
|
|
|
.send(crate::database::InsertEntry {
|
|
|
|
entry: dir_has_entry,
|
|
|
|
})
|
|
|
|
.await??;
|
2020-08-27 00:11:50 +02:00
|
|
|
}
|
|
|
|
info!("Finished updating {}.", directory.as_ref().display());
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2020-09-12 14:27:45 +02:00
|
|
|
pub async fn reimport_directory(
|
2020-08-30 22:11:32 +02:00
|
|
|
directory: PathBuf,
|
|
|
|
db_executor: Addr<crate::database::DbExecutor>,
|
|
|
|
hasher_worker: Addr<HasherWorker>,
|
|
|
|
) {
|
2020-09-12 14:27:45 +02:00
|
|
|
let result = _reimport_directory(directory, &db_executor, &hasher_worker).await;
|
2020-08-30 22:14:24 +02:00
|
|
|
if result.is_err() {
|
2020-09-12 22:50:14 +02:00
|
|
|
warn!("Update did not succeed! {}", result.err().unwrap());
|
2020-08-30 22:11:32 +02:00
|
|
|
}
|
|
|
|
}
|
2020-09-12 14:27:45 +02:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use crate::filesystem::{UDirectory, UPath};
|
|
|
|
use anyhow::Result;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_path_codec() {
|
|
|
|
let path = UPath(vec![
|
|
|
|
UDirectory {
|
|
|
|
name: "top".to_string(),
|
|
|
|
},
|
|
|
|
UDirectory {
|
|
|
|
name: "foo".to_string(),
|
|
|
|
},
|
|
|
|
UDirectory {
|
|
|
|
name: "bar".to_string(),
|
|
|
|
},
|
|
|
|
UDirectory {
|
|
|
|
name: "baz".to_string(),
|
|
|
|
},
|
|
|
|
]);
|
|
|
|
|
|
|
|
let str_path = path.to_string();
|
|
|
|
assert!(str_path.len() > 0);
|
|
|
|
|
|
|
|
let decoded_path: Result<UPath> = str_path.parse();
|
|
|
|
assert!(decoded_path.is_ok());
|
|
|
|
|
|
|
|
assert_eq!(path, decoded_path.unwrap());
|
|
|
|
}
|
|
|
|
}
|