2021-03-20 16:49:01 +01:00
|
|
|
use crate::addressing::Address;
|
|
|
|
use crate::database::{
|
|
|
|
bulk_retrieve_objects, file_set_valid, insert_entry, insert_file, query, retrieve_all_files,
|
|
|
|
DbPool, Entry, EntryQuery, EntryValue, InvariantEntry, Query, QueryComponent, QueryPart,
|
2021-06-06 23:50:27 +02:00
|
|
|
DATABASE_FILENAME, IS_OF_TYPE_ATTR, TYPE_ADDR, TYPE_HAS_ATTR, TYPE_ID_ATTR,
|
|
|
|
TYPE_INSTANCED_ATTR, TYPE_IS_ATTR, TYPE_REQUIRES_ATTR,
|
2021-03-20 16:49:01 +01:00
|
|
|
};
|
|
|
|
use crate::hash::Hashable;
|
|
|
|
use crate::jobs::{Job, JobContainer, JobId};
|
|
|
|
use crate::models;
|
|
|
|
use crate::models::File;
|
2021-03-18 22:42:03 +01:00
|
|
|
use anyhow::{anyhow, Error, Result};
|
|
|
|
use chrono::prelude::*;
|
|
|
|
use diesel::sqlite::Sqlite;
|
|
|
|
use diesel::Connection;
|
2021-03-20 16:49:01 +01:00
|
|
|
use log::{error, info, trace, warn};
|
2021-05-28 23:36:00 +02:00
|
|
|
use once_cell::unsync::Lazy;
|
2021-03-18 22:42:03 +01:00
|
|
|
use rayon::prelude::*;
|
|
|
|
use serde_json::Value;
|
2021-03-18 23:59:55 +01:00
|
|
|
use std::convert::TryFrom;
|
|
|
|
use std::path::{Component, Path, PathBuf};
|
|
|
|
use std::sync::{Arc, RwLock};
|
|
|
|
use std::time::{Instant, UNIX_EPOCH};
|
|
|
|
use std::{fs, iter};
|
2021-03-18 22:42:03 +01:00
|
|
|
use uuid::Uuid;
|
|
|
|
use walkdir::WalkDir;
|
|
|
|
|
2021-03-14 22:16:28 +01:00
|
|
|
const DIR_TYPE: &str = "FS_DIR";
|
2020-09-12 14:27:45 +02:00
|
|
|
const DIR_KEY: &str = "DIR";
|
|
|
|
const DIR_HAS_KEY: &str = "DIR_HAS";
|
2021-03-15 22:32:04 +01:00
|
|
|
lazy_static! {
|
|
|
|
static ref DIR_TYPE_INVARIANT: InvariantEntry = InvariantEntry {
|
2021-03-18 23:59:55 +01:00
|
|
|
attribute: String::from(TYPE_IS_ATTR),
|
2021-03-15 22:32:04 +01:00
|
|
|
value: EntryValue::Value(Value::from(DIR_TYPE)),
|
|
|
|
};
|
2021-03-18 22:42:03 +01:00
|
|
|
static ref DIR_TYPE_ADDR: Address = DIR_TYPE_INVARIANT.entity().unwrap();
|
2021-03-15 22:32:04 +01:00
|
|
|
}
|
2020-09-12 23:07:50 +02:00
|
|
|
|
2021-06-04 15:14:58 +02:00
|
|
|
const BLOB_TYPE: &str = "BLOB";
|
2021-03-14 22:16:28 +01:00
|
|
|
const FILE_TYPE: &str = "FS_FILE";
|
2020-09-12 23:07:50 +02:00
|
|
|
const FILE_IDENTITY_KEY: &str = "FILE_IS";
|
|
|
|
const FILENAME_KEY: &str = "FILE_NAME";
|
2021-03-22 23:07:39 +01:00
|
|
|
const FILE_MIME_KEY: &str = "FILE_MIME";
|
2021-03-25 21:29:49 +01:00
|
|
|
const FILE_MTIME_KEY: &str = "FILE_MTIME";
|
|
|
|
const FILE_SIZE_KEY: &str = "FILE_SIZE";
|
2021-03-15 22:32:04 +01:00
|
|
|
lazy_static! {
|
2021-06-04 15:14:58 +02:00
|
|
|
static ref BLOB_TYPE_INVARIANT: InvariantEntry = InvariantEntry {
|
|
|
|
attribute: String::from(TYPE_IS_ATTR),
|
|
|
|
value: EntryValue::Value(Value::from(BLOB_TYPE)),
|
|
|
|
};
|
|
|
|
static ref BLOB_TYPE_ADDR: Address = BLOB_TYPE_INVARIANT.entity().unwrap();
|
2021-03-15 22:32:04 +01:00
|
|
|
static ref FILE_TYPE_INVARIANT: InvariantEntry = InvariantEntry {
|
2021-03-18 23:59:55 +01:00
|
|
|
attribute: String::from(TYPE_IS_ATTR),
|
2021-03-15 22:32:04 +01:00
|
|
|
value: EntryValue::Value(Value::from(FILE_TYPE)),
|
|
|
|
};
|
2021-03-18 22:42:03 +01:00
|
|
|
static ref FILE_TYPE_ADDR: Address = FILE_TYPE_INVARIANT.entity().unwrap();
|
2021-03-15 22:32:04 +01:00
|
|
|
}
|
2020-08-27 00:11:50 +02:00
|
|
|
|
2021-03-18 23:59:55 +01:00
|
|
|
fn initialize_types(pool: &DbPool) -> Result<()> {
|
2021-06-04 15:14:58 +02:00
|
|
|
// BLOB_TYPE
|
|
|
|
insert_entry(&pool.get()?, Entry::try_from(&*BLOB_TYPE_INVARIANT)?)?;
|
|
|
|
insert_entry(
|
|
|
|
&pool.get()?,
|
|
|
|
Entry {
|
|
|
|
entity: BLOB_TYPE_ADDR.clone(),
|
|
|
|
attribute: String::from(IS_OF_TYPE_ATTR),
|
|
|
|
value: EntryValue::Address(TYPE_ADDR.clone()),
|
|
|
|
},
|
|
|
|
)?;
|
2021-06-06 23:50:27 +02:00
|
|
|
insert_entry(
|
|
|
|
&pool.get()?,
|
|
|
|
Entry {
|
|
|
|
entity: BLOB_TYPE_ADDR.clone(),
|
|
|
|
attribute: String::from(TYPE_INSTANCED_ATTR),
|
|
|
|
value: EntryValue::Value(Value::from(FILE_TYPE)),
|
|
|
|
},
|
|
|
|
)?;
|
2021-06-04 15:14:58 +02:00
|
|
|
|
2021-03-18 23:59:55 +01:00
|
|
|
// FILE_TYPE
|
|
|
|
insert_entry(&pool.get()?, Entry::try_from(&*FILE_TYPE_INVARIANT)?)?;
|
|
|
|
insert_entry(
|
|
|
|
&pool.get()?,
|
|
|
|
Entry {
|
|
|
|
entity: FILE_TYPE_ADDR.clone(),
|
|
|
|
attribute: String::from(IS_OF_TYPE_ATTR),
|
|
|
|
value: EntryValue::Address(TYPE_ADDR.clone()),
|
|
|
|
},
|
|
|
|
)?;
|
|
|
|
insert_entry(
|
|
|
|
&pool.get()?,
|
|
|
|
Entry {
|
|
|
|
entity: FILE_TYPE_ADDR.clone(),
|
|
|
|
attribute: String::from(TYPE_ID_ATTR),
|
|
|
|
value: EntryValue::Value(Value::from(FILENAME_KEY)),
|
|
|
|
},
|
|
|
|
)?;
|
|
|
|
insert_entry(
|
|
|
|
&pool.get()?,
|
|
|
|
Entry {
|
|
|
|
entity: FILE_TYPE_ADDR.clone(),
|
|
|
|
attribute: String::from(TYPE_REQUIRES_ATTR),
|
|
|
|
value: EntryValue::Value(Value::from(FILE_IDENTITY_KEY)),
|
|
|
|
},
|
|
|
|
)?;
|
2021-03-22 23:07:39 +01:00
|
|
|
insert_entry(
|
|
|
|
&pool.get()?,
|
|
|
|
Entry {
|
|
|
|
entity: FILE_TYPE_ADDR.clone(),
|
|
|
|
attribute: String::from(TYPE_HAS_ATTR),
|
|
|
|
value: EntryValue::Value(Value::from(FILE_MIME_KEY)),
|
|
|
|
},
|
|
|
|
)?;
|
2021-03-18 23:59:55 +01:00
|
|
|
|
|
|
|
// DIR_TYPE
|
|
|
|
insert_entry(&pool.get()?, Entry::try_from(&*DIR_TYPE_INVARIANT)?)?;
|
|
|
|
insert_entry(
|
|
|
|
&pool.get()?,
|
|
|
|
Entry {
|
|
|
|
entity: DIR_TYPE_ADDR.clone(),
|
|
|
|
attribute: String::from(IS_OF_TYPE_ATTR),
|
|
|
|
value: EntryValue::Address(TYPE_ADDR.clone()),
|
|
|
|
},
|
|
|
|
)?;
|
|
|
|
insert_entry(
|
|
|
|
&pool.get()?,
|
|
|
|
Entry {
|
|
|
|
entity: DIR_TYPE_ADDR.clone(),
|
|
|
|
attribute: String::from(TYPE_ID_ATTR),
|
|
|
|
value: EntryValue::Value(Value::from(DIR_KEY)),
|
|
|
|
},
|
|
|
|
)?;
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2020-09-12 22:50:14 +02:00
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
2020-09-12 14:27:45 +02:00
|
|
|
pub struct UDirectory {
|
|
|
|
name: String,
|
|
|
|
}
|
2020-08-27 00:11:50 +02:00
|
|
|
|
2020-09-12 22:50:14 +02:00
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
2020-09-12 14:27:45 +02:00
|
|
|
pub struct UPath(Vec<UDirectory>);
|
|
|
|
|
|
|
|
const TOP_SEPARATOR: &str = "//";
|
|
|
|
|
|
|
|
impl std::str::FromStr for UPath {
|
|
|
|
type Err = anyhow::Error;
|
|
|
|
|
2020-09-13 19:20:32 +02:00
|
|
|
fn from_str(string: &str) -> Result<Self, Self::Err> {
|
2020-09-14 01:16:01 +02:00
|
|
|
if string.is_empty() {
|
2020-09-12 14:27:45 +02:00
|
|
|
Ok(UPath(vec![]))
|
|
|
|
} else {
|
2020-09-13 19:20:32 +02:00
|
|
|
let result = match string.find(TOP_SEPARATOR) {
|
2020-09-12 14:27:45 +02:00
|
|
|
Some(head_idx) => {
|
2020-09-13 19:20:32 +02:00
|
|
|
let (head, rest) = string.split_at(head_idx);
|
2021-04-24 00:08:17 +02:00
|
|
|
let mut result: Vec<UDirectory> = vec![UDirectory {
|
2020-09-12 14:27:45 +02:00
|
|
|
name: String::from(head),
|
2021-04-24 00:08:17 +02:00
|
|
|
}];
|
2020-09-12 14:27:45 +02:00
|
|
|
result.append(
|
|
|
|
rest[TOP_SEPARATOR.len()..rest.len()]
|
2020-09-13 20:10:18 +02:00
|
|
|
.trim_end_matches('/')
|
2020-09-14 01:16:01 +02:00
|
|
|
.split('/')
|
2020-09-12 14:27:45 +02:00
|
|
|
.map(|part| UDirectory {
|
|
|
|
name: String::from(part),
|
|
|
|
})
|
|
|
|
.collect::<Vec<UDirectory>>()
|
|
|
|
.as_mut(),
|
|
|
|
);
|
2020-09-13 19:20:32 +02:00
|
|
|
|
|
|
|
result
|
|
|
|
}
|
|
|
|
None => string
|
2020-09-13 20:10:18 +02:00
|
|
|
.trim_end_matches('/')
|
2020-09-14 01:16:01 +02:00
|
|
|
.split('/')
|
2020-09-13 19:20:32 +02:00
|
|
|
.map(|part| UDirectory {
|
|
|
|
name: String::from(part),
|
|
|
|
})
|
|
|
|
.collect(),
|
|
|
|
};
|
|
|
|
|
|
|
|
for directory in &result {
|
2020-09-14 01:16:01 +02:00
|
|
|
if directory.name.is_empty() {
|
2020-09-13 19:20:32 +02:00
|
|
|
return Err(anyhow!("INVALID PATH: Directory name cannot be empty!"));
|
2020-09-12 14:27:45 +02:00
|
|
|
}
|
|
|
|
}
|
2020-09-13 19:20:32 +02:00
|
|
|
|
|
|
|
Ok(UPath(result))
|
2020-09-12 14:27:45 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl std::fmt::Display for UDirectory {
|
2021-02-18 19:20:52 +01:00
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
2020-09-12 14:27:45 +02:00
|
|
|
write!(f, "{}", self.name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl std::fmt::Display for UPath {
|
2021-02-18 19:20:52 +01:00
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
2020-09-12 14:27:45 +02:00
|
|
|
match self.0.len() {
|
|
|
|
0 => write!(f, ""),
|
|
|
|
1 => write!(f, "{}", self.0.first().unwrap().name),
|
|
|
|
_ => {
|
|
|
|
let (head, tail) = self.0.split_first().unwrap();
|
|
|
|
write!(
|
|
|
|
f,
|
|
|
|
"{}//{}",
|
|
|
|
head.name,
|
|
|
|
tail.iter()
|
|
|
|
.map(|udir| udir.name.clone())
|
|
|
|
.collect::<Vec<String>>()
|
|
|
|
.join("/")
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-13 20:10:18 +02:00
|
|
|
trait EntryList {
|
|
|
|
fn extract_addresses(&self) -> Vec<Address>;
|
|
|
|
}
|
|
|
|
|
|
|
|
impl EntryList for Vec<Entry> {
|
|
|
|
fn extract_addresses(&self) -> Vec<Address> {
|
2020-09-14 01:16:01 +02:00
|
|
|
self.iter()
|
2020-09-13 20:10:18 +02:00
|
|
|
.filter_map(|e| {
|
|
|
|
if let EntryValue::Address(address) = &e.value {
|
|
|
|
Some(address.clone())
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-04 23:24:52 +02:00
|
|
|
pub fn list_roots<C: Connection<Backend = Sqlite>>(connection: &C) -> Result<Vec<Address>> {
|
2021-02-20 12:39:03 +01:00
|
|
|
let all_directories: Vec<Entry> = query(
|
2020-09-15 19:26:47 +02:00
|
|
|
connection,
|
2021-02-20 12:39:03 +01:00
|
|
|
Query::SingleQuery(QueryPart::Matches(EntryQuery {
|
2021-02-07 20:18:55 +01:00
|
|
|
entity: QueryComponent::Any,
|
|
|
|
attribute: QueryComponent::Exact(DIR_KEY.to_string()),
|
2020-09-29 00:30:00 +02:00
|
|
|
value: QueryComponent::Any,
|
2021-02-20 12:39:03 +01:00
|
|
|
})),
|
2020-09-15 19:26:47 +02:00
|
|
|
)?;
|
2020-09-13 20:10:18 +02:00
|
|
|
|
2021-02-20 12:39:03 +01:00
|
|
|
let directories_with_parents: Vec<Address> = query(
|
2020-09-15 19:26:47 +02:00
|
|
|
connection,
|
2021-02-20 12:39:03 +01:00
|
|
|
Query::SingleQuery(QueryPart::Matches(EntryQuery {
|
2021-02-07 20:18:55 +01:00
|
|
|
entity: QueryComponent::Any,
|
|
|
|
attribute: QueryComponent::Exact(DIR_HAS_KEY.to_string()),
|
2020-09-29 00:30:00 +02:00
|
|
|
value: QueryComponent::Any,
|
2021-02-20 12:39:03 +01:00
|
|
|
})),
|
2020-09-15 19:26:47 +02:00
|
|
|
)?
|
|
|
|
.extract_addresses();
|
2020-09-13 20:10:18 +02:00
|
|
|
|
|
|
|
Ok(all_directories
|
|
|
|
.into_iter()
|
2021-03-14 10:44:13 +01:00
|
|
|
.filter(|entry| !directories_with_parents.contains(&entry.entity))
|
2021-04-04 23:24:52 +02:00
|
|
|
.map(|e| e.entity)
|
2020-09-13 20:10:18 +02:00
|
|
|
.collect())
|
|
|
|
}
|
|
|
|
|
2020-09-15 19:26:47 +02:00
|
|
|
pub async fn list_directory<C: Connection<Backend = Sqlite>>(
|
|
|
|
connection: &C,
|
|
|
|
path: &UPath,
|
|
|
|
) -> Result<Vec<Entry>> {
|
2020-09-13 20:10:18 +02:00
|
|
|
let entry_addresses = match path.0.len() {
|
2021-04-04 23:24:52 +02:00
|
|
|
0 => list_roots(connection)?,
|
2020-09-13 20:10:18 +02:00
|
|
|
_ => {
|
2020-09-20 19:28:44 +02:00
|
|
|
let resolved_path: Vec<Address> = resolve_path(connection, path, false)?;
|
2020-09-13 20:10:18 +02:00
|
|
|
let last = resolved_path.last().unwrap();
|
|
|
|
|
2021-02-20 12:39:03 +01:00
|
|
|
query(
|
2020-09-15 19:26:47 +02:00
|
|
|
connection,
|
2021-02-20 12:39:03 +01:00
|
|
|
Query::SingleQuery(QueryPart::Matches(EntryQuery {
|
2021-02-07 20:18:55 +01:00
|
|
|
entity: QueryComponent::Exact(last.clone()),
|
|
|
|
attribute: QueryComponent::Exact(DIR_HAS_KEY.to_string()),
|
2020-09-29 00:30:00 +02:00
|
|
|
value: QueryComponent::Any,
|
2021-02-20 12:39:03 +01:00
|
|
|
})),
|
2020-09-15 19:26:47 +02:00
|
|
|
)?
|
|
|
|
.extract_addresses()
|
2020-09-13 20:10:18 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-09-25 02:45:17 +02:00
|
|
|
Ok(bulk_retrieve_objects(connection, entry_addresses)?
|
|
|
|
.into_iter()
|
2021-03-14 10:44:13 +01:00
|
|
|
.filter(|e| [DIR_KEY, FILENAME_KEY, FILE_IDENTITY_KEY].contains(&e.attribute.as_str()))
|
2020-09-25 02:45:17 +02:00
|
|
|
.collect::<Vec<Entry>>())
|
2020-09-13 20:10:18 +02:00
|
|
|
}
|
|
|
|
|
2020-09-20 19:28:44 +02:00
|
|
|
pub fn fetch_or_create_dir<C: Connection<Backend = Sqlite>>(
|
2020-09-15 19:26:47 +02:00
|
|
|
connection: &C,
|
2020-09-12 14:27:45 +02:00
|
|
|
parent: Option<Address>,
|
|
|
|
directory: UDirectory,
|
2020-09-13 20:10:18 +02:00
|
|
|
create: bool,
|
2020-09-12 14:27:45 +02:00
|
|
|
) -> Result<Address> {
|
2020-09-12 22:50:14 +02:00
|
|
|
match parent.clone() {
|
|
|
|
Some(address) => trace!("FETCHING/CREATING {}/{:#}", address, directory),
|
|
|
|
None => trace!("FETCHING/CREATING /{:#}", directory),
|
|
|
|
}
|
|
|
|
|
2021-04-04 18:44:01 +02:00
|
|
|
let matching_directories: Vec<Address> = query(
|
2020-09-15 19:26:47 +02:00
|
|
|
connection,
|
2021-02-20 12:39:03 +01:00
|
|
|
Query::SingleQuery(QueryPart::Matches(EntryQuery {
|
2021-02-07 20:18:55 +01:00
|
|
|
entity: QueryComponent::Any,
|
|
|
|
attribute: QueryComponent::Exact(String::from(DIR_KEY)),
|
2021-04-04 18:44:01 +02:00
|
|
|
value: QueryComponent::Exact(EntryValue::Value(Value::String(directory.name.clone()))),
|
2021-02-20 12:39:03 +01:00
|
|
|
})),
|
2020-09-15 19:26:47 +02:00
|
|
|
)?
|
|
|
|
.into_iter()
|
2021-03-14 10:44:13 +01:00
|
|
|
.map(|e: Entry| e.entity)
|
2020-09-15 19:26:47 +02:00
|
|
|
.collect();
|
2020-09-12 14:27:45 +02:00
|
|
|
|
|
|
|
let valid_directories: Vec<Address> = match parent.clone() {
|
|
|
|
Some(address) => {
|
2021-02-20 12:39:03 +01:00
|
|
|
let parent_has: Vec<Address> = query(
|
2020-09-15 19:26:47 +02:00
|
|
|
connection,
|
2021-02-20 12:39:03 +01:00
|
|
|
Query::SingleQuery(QueryPart::Matches(EntryQuery {
|
2021-02-07 20:18:55 +01:00
|
|
|
entity: QueryComponent::Exact(address),
|
|
|
|
attribute: QueryComponent::Exact(String::from(DIR_HAS_KEY)),
|
2020-09-29 00:30:00 +02:00
|
|
|
value: QueryComponent::Any,
|
2021-02-20 12:39:03 +01:00
|
|
|
})),
|
2020-09-15 19:26:47 +02:00
|
|
|
)?
|
|
|
|
.extract_addresses();
|
2020-09-12 14:27:45 +02:00
|
|
|
|
2021-04-04 18:44:01 +02:00
|
|
|
matching_directories
|
2020-09-12 14:27:45 +02:00
|
|
|
.into_iter()
|
|
|
|
.filter(|a| parent_has.contains(a))
|
2020-09-14 01:16:01 +02:00
|
|
|
.collect()
|
2020-09-12 14:27:45 +02:00
|
|
|
}
|
2021-04-04 23:24:52 +02:00
|
|
|
None => {
|
|
|
|
let roots = list_roots(connection)?;
|
|
|
|
matching_directories
|
|
|
|
.into_iter()
|
|
|
|
.filter(|a| roots.contains(a))
|
|
|
|
.collect()
|
|
|
|
}
|
2020-09-12 14:27:45 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
match valid_directories.len() {
|
|
|
|
0 => {
|
2020-09-13 20:10:18 +02:00
|
|
|
if create {
|
2021-04-24 00:08:17 +02:00
|
|
|
let new_directory_address = Address::Uuid(Uuid::new_v4());
|
2021-03-14 22:16:28 +01:00
|
|
|
let type_entry = Entry {
|
|
|
|
entity: new_directory_address.clone(),
|
2021-03-18 23:59:55 +01:00
|
|
|
attribute: String::from(IS_OF_TYPE_ATTR),
|
2021-03-15 22:32:04 +01:00
|
|
|
value: EntryValue::Address(DIR_TYPE_ADDR.clone()),
|
2021-03-14 22:16:28 +01:00
|
|
|
};
|
|
|
|
insert_entry(connection, type_entry)?;
|
|
|
|
|
2021-02-19 20:27:30 +01:00
|
|
|
let directory_entry = Entry {
|
2021-03-14 10:44:13 +01:00
|
|
|
entity: new_directory_address.clone(),
|
|
|
|
attribute: String::from(DIR_KEY),
|
2021-04-04 18:44:01 +02:00
|
|
|
value: EntryValue::Value(Value::String(directory.name)),
|
2020-09-12 14:27:45 +02:00
|
|
|
};
|
2021-03-14 22:16:28 +01:00
|
|
|
insert_entry(connection, directory_entry)?;
|
2020-09-13 20:10:18 +02:00
|
|
|
|
2020-09-20 19:32:28 +02:00
|
|
|
if let Some(parent_addr) = parent {
|
2021-02-19 20:27:30 +01:00
|
|
|
let has_entry = Entry {
|
2021-03-14 10:44:13 +01:00
|
|
|
entity: parent_addr,
|
|
|
|
attribute: String::from(DIR_HAS_KEY),
|
2020-09-13 20:10:18 +02:00
|
|
|
value: EntryValue::Address(new_directory_address.clone()),
|
|
|
|
};
|
2021-03-14 22:16:28 +01:00
|
|
|
insert_entry(connection, has_entry)?;
|
2020-09-13 20:10:18 +02:00
|
|
|
}
|
2020-09-12 14:27:45 +02:00
|
|
|
|
2020-09-13 20:10:18 +02:00
|
|
|
Ok(new_directory_address)
|
|
|
|
} else {
|
|
|
|
Err(anyhow!("Directory does not exist."))
|
|
|
|
}
|
2020-09-12 14:27:45 +02:00
|
|
|
}
|
|
|
|
1 => Ok(valid_directories[0].clone()),
|
|
|
|
_ => Err(anyhow!(
|
|
|
|
"Invalid database state - more than one directory matches the query!"
|
|
|
|
)),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-20 19:28:44 +02:00
|
|
|
pub fn resolve_path<C: Connection<Backend = Sqlite>>(
|
2020-09-15 19:26:47 +02:00
|
|
|
connection: &C,
|
2020-09-12 22:50:14 +02:00
|
|
|
path: &UPath,
|
2020-09-13 20:10:18 +02:00
|
|
|
create: bool,
|
2020-09-12 22:50:14 +02:00
|
|
|
) -> Result<Vec<Address>> {
|
|
|
|
let mut result: Vec<Address> = vec![];
|
|
|
|
let mut path_stack = path.0.to_vec();
|
|
|
|
|
|
|
|
path_stack.reverse();
|
2020-09-14 01:16:01 +02:00
|
|
|
while !path_stack.is_empty() {
|
2020-09-12 22:50:14 +02:00
|
|
|
let dir_address = fetch_or_create_dir(
|
2020-09-15 19:26:47 +02:00
|
|
|
connection,
|
2020-09-12 22:50:14 +02:00
|
|
|
result.last().cloned(),
|
|
|
|
path_stack.pop().unwrap(),
|
2020-09-13 20:10:18 +02:00
|
|
|
create,
|
2020-09-20 19:28:44 +02:00
|
|
|
)?;
|
2020-09-12 22:50:14 +02:00
|
|
|
result.push(dir_address);
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(result)
|
2020-09-12 14:27:45 +02:00
|
|
|
}
|
2020-09-25 02:45:17 +02:00
|
|
|
|
2021-03-06 22:14:17 +01:00
|
|
|
pub async fn rescan_vault(
|
2021-02-20 17:36:19 +01:00
|
|
|
pool: DbPool,
|
|
|
|
directory: PathBuf,
|
|
|
|
job_container: Arc<RwLock<JobContainer>>,
|
|
|
|
) {
|
|
|
|
let job_id = job_container
|
|
|
|
.write()
|
|
|
|
.unwrap()
|
|
|
|
.add_job(Job::new("REIMPORT", "Reimporting vault..."))
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
let result =
|
2021-03-06 22:14:17 +01:00
|
|
|
actix_web::web::block(move || _rescan_vault(pool, directory, job_container, job_id)).await;
|
2020-09-20 16:29:16 +02:00
|
|
|
if result.is_err() {
|
2020-09-20 19:28:44 +02:00
|
|
|
let err = result.err().unwrap();
|
2021-02-07 20:18:55 +01:00
|
|
|
error!("Update did not succeed! {:?}", err);
|
2020-09-20 16:29:16 +02:00
|
|
|
}
|
|
|
|
}
|
2020-09-25 02:45:17 +02:00
|
|
|
|
2020-09-30 01:31:48 +02:00
|
|
|
type UpdatePathResult = Result<UpdatePathOutcome>;
|
|
|
|
|
|
|
|
enum UpdatePathOutcome {
|
|
|
|
Added(PathBuf),
|
|
|
|
Unchanged(PathBuf),
|
|
|
|
Removed(PathBuf),
|
|
|
|
}
|
|
|
|
|
2021-03-06 22:14:17 +01:00
|
|
|
fn _rescan_vault<T: AsRef<Path>>(
|
2020-09-30 01:31:48 +02:00
|
|
|
pool: DbPool,
|
|
|
|
directory: T,
|
2021-02-20 17:36:19 +01:00
|
|
|
job_container: Arc<RwLock<JobContainer>>,
|
|
|
|
job_id: JobId,
|
2020-09-30 01:31:48 +02:00
|
|
|
) -> Result<Vec<UpdatePathResult>> {
|
2020-09-23 23:11:50 +02:00
|
|
|
let start = Instant::now();
|
|
|
|
|
2021-03-14 22:16:28 +01:00
|
|
|
// Initialize types, etc...
|
2021-03-18 23:59:55 +01:00
|
|
|
initialize_types(&pool)?;
|
2021-03-14 22:16:28 +01:00
|
|
|
|
|
|
|
// Walk through the vault, find all paths
|
2020-09-20 19:28:44 +02:00
|
|
|
let path_entries: Vec<PathBuf> = WalkDir::new(&directory)
|
2020-08-27 00:11:50 +02:00
|
|
|
.into_iter()
|
|
|
|
.filter_map(|e| e.ok())
|
2020-09-22 00:41:59 +02:00
|
|
|
.filter(|e| e.path().is_file() && e.file_name() != DATABASE_FILENAME)
|
2020-09-20 19:42:53 +02:00
|
|
|
.map(|e| fs::canonicalize(e.into_path()).unwrap())
|
2020-09-13 20:43:45 +02:00
|
|
|
.collect();
|
|
|
|
|
2021-03-14 22:16:28 +01:00
|
|
|
// Prepare for processing
|
2020-09-22 00:41:59 +02:00
|
|
|
let rw_pool = Arc::new(RwLock::new(pool.clone()));
|
2020-09-20 16:29:16 +02:00
|
|
|
let absolute_path = fs::canonicalize(&directory)?;
|
2020-09-22 00:41:59 +02:00
|
|
|
let existing_files = Arc::new(RwLock::new(retrieve_all_files(&pool.get()?)?));
|
|
|
|
|
2021-03-14 22:16:28 +01:00
|
|
|
// Actual processing
|
2021-02-20 17:36:19 +01:00
|
|
|
let count = RwLock::new(0_usize);
|
|
|
|
let total = path_entries.len() as f32;
|
2020-09-30 01:31:48 +02:00
|
|
|
let path_results: Vec<UpdatePathResult> = path_entries
|
2020-09-20 19:28:44 +02:00
|
|
|
.into_par_iter()
|
2020-09-22 00:41:59 +02:00
|
|
|
.map(|path| {
|
2021-02-20 17:36:19 +01:00
|
|
|
let result = _process_directory_entry(&rw_pool, path, &absolute_path, &existing_files)?;
|
|
|
|
|
|
|
|
let mut cnt = count.write().unwrap();
|
|
|
|
*cnt += 1;
|
|
|
|
|
|
|
|
job_container
|
|
|
|
.write()
|
|
|
|
.unwrap()
|
|
|
|
.update_progress(&job_id, *cnt as f32 / total * 100.0)
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
Ok(result)
|
2020-09-22 00:41:59 +02:00
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
|
2020-09-30 01:31:48 +02:00
|
|
|
let cleanup_results: Vec<UpdatePathResult> = existing_files
|
2020-09-22 00:41:59 +02:00
|
|
|
.write()
|
|
|
|
.unwrap()
|
|
|
|
.iter()
|
|
|
|
.filter(|f| f.valid)
|
|
|
|
.map(|file| {
|
|
|
|
let connection = pool.get()?;
|
|
|
|
connection.transaction::<_, Error, _>(|| {
|
2021-03-14 22:16:28 +01:00
|
|
|
file_set_valid(&connection, file.id, false)?;
|
2020-09-22 00:41:59 +02:00
|
|
|
// remove_object(&connection, )?
|
2020-09-30 01:31:48 +02:00
|
|
|
Ok(UpdatePathOutcome::Removed(PathBuf::from(file.path.clone())))
|
2020-09-22 00:41:59 +02:00
|
|
|
})
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
|
2021-03-20 16:49:01 +01:00
|
|
|
let mut failed: Vec<&Error> = vec![];
|
|
|
|
let mut created = 0;
|
|
|
|
let mut unchanged = 0;
|
|
|
|
let mut deleted = 0;
|
|
|
|
|
|
|
|
for result in &path_results {
|
|
|
|
match result {
|
|
|
|
Ok(result) => match result {
|
|
|
|
UpdatePathOutcome::Added(_) => created += 1,
|
|
|
|
UpdatePathOutcome::Unchanged(_) => unchanged += 1,
|
|
|
|
UpdatePathOutcome::Removed(_) => deleted += 1,
|
|
|
|
},
|
|
|
|
Err(err) => failed.push(err),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if !failed.is_empty() {
|
|
|
|
warn!(
|
|
|
|
"{} path updates failed! ({})",
|
|
|
|
failed.len(),
|
|
|
|
failed
|
|
|
|
.iter()
|
|
|
|
.map(|e| e.to_string())
|
|
|
|
.collect::<Vec<String>>()
|
|
|
|
.join(", ")
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2020-09-23 23:11:50 +02:00
|
|
|
info!(
|
2021-03-20 16:49:01 +01:00
|
|
|
"Finished updating {} ({} created, {} deleted, {} left unchanged). Took {}s.",
|
2020-09-23 23:11:50 +02:00
|
|
|
directory.as_ref().display(),
|
2021-03-20 16:49:01 +01:00
|
|
|
created,
|
|
|
|
deleted,
|
|
|
|
unchanged,
|
2020-09-23 23:11:50 +02:00
|
|
|
start.elapsed().as_secs()
|
|
|
|
);
|
2020-08-27 00:11:50 +02:00
|
|
|
|
2020-09-22 00:41:59 +02:00
|
|
|
Ok(path_results
|
|
|
|
.into_iter()
|
|
|
|
.chain(cleanup_results.into_iter())
|
|
|
|
.collect())
|
2020-08-27 00:11:50 +02:00
|
|
|
}
|
|
|
|
|
2020-09-20 20:14:05 +02:00
|
|
|
fn _process_directory_entry<P: AsRef<Path>>(
|
2020-09-22 00:41:59 +02:00
|
|
|
db_pool: &Arc<RwLock<DbPool>>,
|
2020-09-20 16:29:16 +02:00
|
|
|
path: PathBuf,
|
|
|
|
directory_path: &P,
|
2020-09-22 00:41:59 +02:00
|
|
|
existing_files: &Arc<RwLock<Vec<File>>>,
|
2020-09-30 01:31:48 +02:00
|
|
|
) -> UpdatePathResult {
|
2020-09-20 16:29:16 +02:00
|
|
|
info!("Processing: {:?}", path);
|
|
|
|
|
2021-03-25 21:29:49 +01:00
|
|
|
// Prepare the data
|
2020-09-22 00:41:59 +02:00
|
|
|
let db_pool = Arc::clone(&db_pool);
|
|
|
|
let existing_files = Arc::clone(&existing_files);
|
|
|
|
|
|
|
|
let normalized_path = path.strip_prefix(&directory_path)?;
|
|
|
|
let normalized_path_str = normalized_path.to_str().expect("path not valid unicode?!");
|
2020-09-20 20:14:05 +02:00
|
|
|
|
2021-05-28 23:36:00 +02:00
|
|
|
let digest = Lazy::new(|| path.hash());
|
|
|
|
|
2021-03-25 21:29:49 +01:00
|
|
|
// Get size & mtime for quick comparison
|
2020-09-20 16:29:16 +02:00
|
|
|
let metadata = fs::metadata(&path)?;
|
|
|
|
let size = metadata.len() as i64;
|
|
|
|
if size < 0 {
|
|
|
|
panic!("File {} too large?!", path.display());
|
2020-08-30 22:11:32 +02:00
|
|
|
}
|
2020-09-22 00:41:59 +02:00
|
|
|
let mtime = metadata
|
|
|
|
.modified()
|
|
|
|
.map(|t| {
|
|
|
|
NaiveDateTime::from_timestamp(t.duration_since(UNIX_EPOCH).unwrap().as_secs() as i64, 0)
|
|
|
|
})
|
|
|
|
.ok();
|
|
|
|
|
2021-03-25 21:29:49 +01:00
|
|
|
// Check if the path entry for this file already exists in database
|
2020-09-22 00:41:59 +02:00
|
|
|
{
|
2021-03-25 21:29:49 +01:00
|
|
|
// Only grab existing_files for the duration of this block
|
2020-09-22 00:41:59 +02:00
|
|
|
let mut existing_files = existing_files.write().unwrap();
|
|
|
|
|
|
|
|
let maybe_existing_file = existing_files
|
|
|
|
.iter()
|
|
|
|
.enumerate()
|
2020-09-30 01:33:36 +02:00
|
|
|
.find(|(_, file)| file.path == normalized_path_str);
|
2020-09-22 00:41:59 +02:00
|
|
|
|
|
|
|
if let Some((idx, existing_file)) = maybe_existing_file {
|
2021-05-28 23:36:00 +02:00
|
|
|
if (size == existing_file.size && mtime == existing_file.mtime)
|
|
|
|
|| ((*digest).is_ok() && &existing_file.hash == (*digest).as_ref().unwrap())
|
|
|
|
{
|
2020-09-22 00:41:59 +02:00
|
|
|
if !existing_file.valid {
|
2021-03-25 21:29:49 +01:00
|
|
|
file_set_valid(&db_pool.write().unwrap().get()?, existing_file.id, true)?;
|
2020-09-22 00:41:59 +02:00
|
|
|
}
|
2021-03-25 21:29:49 +01:00
|
|
|
existing_files.swap_remove(idx);
|
2020-09-30 01:31:48 +02:00
|
|
|
return Ok(UpdatePathOutcome::Unchanged(path));
|
2020-09-22 00:41:59 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-09-20 16:29:16 +02:00
|
|
|
|
2021-03-25 21:29:49 +01:00
|
|
|
// If not, add it!
|
2021-05-28 23:36:00 +02:00
|
|
|
if let Err(err) = &*digest {
|
|
|
|
return Err(anyhow!(format!("Error hashing: {}", err)));
|
|
|
|
}
|
|
|
|
let digest = (*digest).as_ref().unwrap().clone();
|
2020-09-20 16:29:16 +02:00
|
|
|
|
|
|
|
let new_file = models::NewFile {
|
2020-09-22 00:41:59 +02:00
|
|
|
path: normalized_path_str.to_string(),
|
2020-09-20 16:29:16 +02:00
|
|
|
hash: (digest.clone()).0,
|
2020-09-22 00:41:59 +02:00
|
|
|
added: NaiveDateTime::from_timestamp(Utc::now().timestamp(), 0),
|
2020-09-20 16:29:16 +02:00
|
|
|
size,
|
2020-09-22 00:41:59 +02:00
|
|
|
mtime,
|
2020-09-20 16:29:16 +02:00
|
|
|
};
|
|
|
|
|
2021-03-14 22:16:28 +01:00
|
|
|
insert_file(&db_pool.write().unwrap().get()?, new_file)?;
|
2020-09-20 16:29:16 +02:00
|
|
|
|
2021-03-25 21:29:49 +01:00
|
|
|
// Insert metadata
|
2021-06-04 15:14:58 +02:00
|
|
|
let type_entry = Entry {
|
|
|
|
entity: Address::Hash(digest.clone()),
|
|
|
|
attribute: String::from(IS_OF_TYPE_ATTR),
|
|
|
|
value: EntryValue::Address(BLOB_TYPE_ADDR.clone()),
|
|
|
|
};
|
|
|
|
insert_entry(&db_pool.write().unwrap().get()?, type_entry)?;
|
|
|
|
|
2021-03-25 21:29:49 +01:00
|
|
|
let size_entry = Entry {
|
|
|
|
entity: Address::Hash(digest.clone()),
|
|
|
|
attribute: FILE_SIZE_KEY.to_string(),
|
|
|
|
value: EntryValue::Value(Value::from(size)),
|
|
|
|
};
|
|
|
|
insert_entry(&db_pool.write().unwrap().get()?, size_entry)?;
|
|
|
|
|
|
|
|
if let Some(mtime) = mtime {
|
|
|
|
let mtime_entry = Entry {
|
|
|
|
entity: Address::Hash(digest.clone()),
|
|
|
|
attribute: FILE_MTIME_KEY.to_string(),
|
|
|
|
value: EntryValue::Value(Value::from(mtime.timestamp())),
|
|
|
|
};
|
|
|
|
insert_entry(&db_pool.write().unwrap().get()?, mtime_entry)?;
|
|
|
|
}
|
|
|
|
let mime_entry = Entry {
|
|
|
|
entity: Address::Hash(digest.clone()),
|
|
|
|
attribute: FILE_MIME_KEY.to_string(),
|
|
|
|
value: EntryValue::Value(Value::String(tree_magic::from_filepath(&path))),
|
|
|
|
};
|
|
|
|
insert_entry(&db_pool.write().unwrap().get()?, mime_entry)?;
|
|
|
|
|
|
|
|
// Finally, add the appropriate entries w/r/t virtual filesystem location
|
2020-09-22 00:41:59 +02:00
|
|
|
let components = normalized_path.components().collect::<Vec<Component>>();
|
2020-09-20 16:29:16 +02:00
|
|
|
let (filename, dir_path) = components.split_last().unwrap();
|
|
|
|
|
|
|
|
let upath = UPath(
|
|
|
|
iter::once(UDirectory {
|
|
|
|
name: "NATIVE".to_string(),
|
|
|
|
})
|
|
|
|
.chain(dir_path.iter().map(|component| UDirectory {
|
|
|
|
name: component.as_os_str().to_string_lossy().to_string(),
|
|
|
|
}))
|
|
|
|
.collect(),
|
|
|
|
);
|
2020-09-22 00:41:59 +02:00
|
|
|
let resolved_path = resolve_path(&db_pool.write().unwrap().get()?, &upath, true)?;
|
2020-09-20 16:29:16 +02:00
|
|
|
let parent_dir = resolved_path.last().unwrap();
|
|
|
|
|
2020-09-22 00:41:59 +02:00
|
|
|
let _pool = &db_pool.write().unwrap();
|
2020-09-20 20:14:05 +02:00
|
|
|
let connection = _pool.get()?;
|
2020-09-20 17:17:43 +02:00
|
|
|
connection.transaction::<_, Error, _>(|| {
|
2021-04-24 00:08:17 +02:00
|
|
|
let file_address = Address::Uuid(Uuid::new_v4());
|
2021-03-14 22:16:28 +01:00
|
|
|
let type_entry = Entry {
|
|
|
|
entity: file_address.clone(),
|
2021-03-18 23:59:55 +01:00
|
|
|
attribute: String::from(IS_OF_TYPE_ATTR),
|
2021-03-15 22:32:04 +01:00
|
|
|
value: EntryValue::Address(FILE_TYPE_ADDR.clone()),
|
2021-03-14 22:16:28 +01:00
|
|
|
};
|
|
|
|
insert_entry(&connection, type_entry)?;
|
2020-09-20 17:17:43 +02:00
|
|
|
|
2021-02-19 20:27:30 +01:00
|
|
|
let name_entry = Entry {
|
2021-03-14 10:44:13 +01:00
|
|
|
entity: file_address.clone(),
|
|
|
|
attribute: FILENAME_KEY.to_string(),
|
2020-09-20 17:17:43 +02:00
|
|
|
value: EntryValue::Value(Value::String(
|
|
|
|
filename.as_os_str().to_string_lossy().to_string(),
|
|
|
|
)),
|
|
|
|
};
|
2021-03-14 22:16:28 +01:00
|
|
|
insert_entry(&connection, name_entry)?;
|
2020-09-20 17:17:43 +02:00
|
|
|
|
2021-02-19 20:27:30 +01:00
|
|
|
let identity_entry = Entry {
|
2021-03-14 10:44:13 +01:00
|
|
|
entity: file_address.clone(),
|
|
|
|
attribute: FILE_IDENTITY_KEY.to_string(),
|
2020-09-20 17:17:43 +02:00
|
|
|
value: EntryValue::Address(Address::Hash(digest.clone())),
|
|
|
|
};
|
2021-03-14 22:16:28 +01:00
|
|
|
insert_entry(&connection, identity_entry)?;
|
2020-09-20 17:17:43 +02:00
|
|
|
|
2021-02-19 20:27:30 +01:00
|
|
|
let dir_has_entry = Entry {
|
2021-03-14 10:44:13 +01:00
|
|
|
entity: parent_dir.clone(),
|
|
|
|
attribute: DIR_HAS_KEY.to_string(),
|
2020-09-20 17:17:43 +02:00
|
|
|
value: EntryValue::Address(file_address),
|
|
|
|
};
|
2021-03-14 22:16:28 +01:00
|
|
|
insert_entry(&connection, dir_has_entry)?;
|
2020-09-20 17:17:43 +02:00
|
|
|
|
2020-09-30 01:31:48 +02:00
|
|
|
Ok(UpdatePathOutcome::Added(path.clone()))
|
2020-09-20 17:17:43 +02:00
|
|
|
})
|
2020-08-30 22:11:32 +02:00
|
|
|
}
|
2020-09-12 14:27:45 +02:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use anyhow::Result;
|
|
|
|
|
2021-03-15 22:37:09 +01:00
|
|
|
use crate::filesystem::{UDirectory, UPath};
|
|
|
|
|
2020-09-12 14:27:45 +02:00
|
|
|
#[test]
|
|
|
|
fn test_path_codec() {
|
|
|
|
let path = UPath(vec![
|
|
|
|
UDirectory {
|
|
|
|
name: "top".to_string(),
|
|
|
|
},
|
|
|
|
UDirectory {
|
|
|
|
name: "foo".to_string(),
|
|
|
|
},
|
|
|
|
UDirectory {
|
|
|
|
name: "bar".to_string(),
|
|
|
|
},
|
|
|
|
UDirectory {
|
|
|
|
name: "baz".to_string(),
|
|
|
|
},
|
|
|
|
]);
|
|
|
|
|
|
|
|
let str_path = path.to_string();
|
|
|
|
assert!(str_path.len() > 0);
|
|
|
|
|
|
|
|
let decoded_path: Result<UPath> = str_path.parse();
|
|
|
|
assert!(decoded_path.is_ok());
|
|
|
|
|
|
|
|
assert_eq!(path, decoded_path.unwrap());
|
|
|
|
}
|
2020-09-13 19:20:32 +02:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_validation() {
|
|
|
|
let invalid_path: Result<UPath> = "a//b/c//d/e/f///g".parse();
|
|
|
|
assert!(invalid_path.is_err())
|
|
|
|
}
|
2020-09-12 14:27:45 +02:00
|
|
|
}
|