From 44d76aa8d4a49b32f8364a2abbc85b0049fb4b1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Ml=C3=A1dek?= Date: Sun, 30 Jun 2024 13:30:40 +0200 Subject: [PATCH] refactor(cli): Blob is no longer tied just to filepaths (this lays the foundation for future backing by e.g. S3, also in-memory blobs) --- cli/src/extractors/audio.rs | 11 ++--- cli/src/extractors/exif.rs | 2 +- cli/src/extractors/media.rs | 2 +- cli/src/previews/mod.rs | 12 +++--- cli/src/routes.rs | 12 +++--- db/src/stores/fs/mod.rs | 20 +++++---- db/src/stores/mod.rs | 82 +++++++++++++++++++++++++++++++++---- db/src/util.rs | 5 +++ 8 files changed, 108 insertions(+), 38 deletions(-) diff --git a/cli/src/extractors/audio.rs b/cli/src/extractors/audio.rs index a421414..015ae9f 100644 --- a/cli/src/extractors/audio.rs +++ b/cli/src/extractors/audio.rs @@ -9,7 +9,6 @@ use upend_base::{ constants::{ATTR_IN, ATTR_KEY, ATTR_LABEL, ATTR_OF}, entry::{Entry, EntryValue, InvariantEntry}, }; -use upend_db::stores::Blob; use upend_db::{ jobs::{JobContainer, JobState}, stores::{fs::FILE_MIME_KEY, UpStore}, @@ -46,7 +45,7 @@ impl Extractor for ID3Extractor { let files = store.retrieve(hash)?; if let Some(file) = files.first() { - let file_path = file.get_file_path(); + let file_path = file.get_file_path()?; let mut job_handle = job_container.add_job( None, &format!( @@ -98,7 +97,7 @@ impl Extractor for ID3Extractor { file.write_all(&picture.data)?; let hash = store.store( connection, - Blob::from_filepath(&tmp_path), + tmp_path.into(), None, Some(BlobMode::StoreOnly), context.clone(), @@ -182,10 +181,6 @@ impl Extractor for ID3Extractor { .query(format!("(matches @{} (contains \"ID3\") ?)", address).parse()?)? .is_empty(); - if is_extracted { - return Ok(false); - } - - Ok(true) + return Ok(!is_extracted); } } diff --git a/cli/src/extractors/exif.rs b/cli/src/extractors/exif.rs index 9c73825..ddcffcc 100644 --- a/cli/src/extractors/exif.rs +++ b/cli/src/extractors/exif.rs @@ -48,7 +48,7 @@ impl Extractor for ExifExtractor { let files = store.retrieve(hash)?; if let Some(file) = files.first() { - let file_path = file.get_file_path(); + let file_path = file.get_file_path()?; let mut job_handle = job_container.add_job( None, &format!( diff --git a/cli/src/extractors/media.rs b/cli/src/extractors/media.rs index 0f15e5f..ef8e130 100644 --- a/cli/src/extractors/media.rs +++ b/cli/src/extractors/media.rs @@ -55,7 +55,7 @@ impl Extractor for MediaExtractor { let files = store.retrieve(hash)?; if let Some(file) = files.first() { - let file_path = file.get_file_path(); + let file_path = file.get_file_path()?; let mut job_handle = job_container.add_job( None, &format!( diff --git a/cli/src/previews/mod.rs b/cli/src/previews/mod.rs index 66418cf..1f2a6a3 100644 --- a/cli/src/previews/mod.rs +++ b/cli/src/previews/mod.rs @@ -91,7 +91,7 @@ impl PreviewStore { trace!("Calculating preview for {hash:?}..."); let files = self.store.retrieve(&hash)?; if let Some(file) = files.first() { - let file_path = file.get_file_path(); + let file_path = file.get_file_path()?; let mut job_handle = job_container.add_job( None, &format!("Creating preview for {:?}", file_path.file_name().unwrap()), @@ -102,21 +102,21 @@ impl PreviewStore { let mime_type: Option = if mime_type.is_some() { mime_type } else { - tree_magic_mini::from_filepath(file_path).map(|m| m.into()) + tree_magic_mini::from_filepath(&file_path).map(|m| m.into()) }; let preview = match mime_type { Some(tm) if tm.starts_with("text") => { - TextPath(file_path).get_thumbnail(options) + TextPath(&file_path).get_thumbnail(options) } Some(tm) if tm.starts_with("video") || tm == "application/x-matroska" => { - VideoPath(file_path).get_thumbnail(options) + VideoPath(&file_path).get_thumbnail(options) } Some(tm) if tm.starts_with("audio") || tm == "application/x-riff" => { - AudioPath(file_path).get_thumbnail(options) + AudioPath(&file_path).get_thumbnail(options) } Some(tm) if tm.starts_with("image") => { - ImagePath(file_path).get_thumbnail(options) + ImagePath(&file_path).get_thumbnail(options) } Some(unknown) => Err(anyhow!("No capability for {:?} thumbnails.", unknown)), _ => Err(anyhow!("Unknown file type, or file doesn't exist.")), diff --git a/cli/src/routes.rs b/cli/src/routes.rs index f519033..2359cf8 100644 --- a/cli/src/routes.rs +++ b/cli/src/routes.rs @@ -38,8 +38,8 @@ use upend_base::hash::{b58_decode, b58_encode, sha256hash}; use upend_base::lang::Query; use upend_db::hierarchies::{list_roots, resolve_path, UHierPath}; use upend_db::jobs; +use upend_db::stores::UpStore; use upend_db::stores::UpdateOptions; -use upend_db::stores::{Blob, UpStore}; use upend_db::BlobMode; use upend_db::OperationContext; use upend_db::UpEndDatabase; @@ -225,7 +225,7 @@ pub async fn get_raw( .await? .map_err(ErrorInternalServerError)?; if let Some(blob) = blobs.first() { - let file_path = blob.get_file_path(); + let file_path = blob.get_file_path().map_err(ErrorInternalServerError)?; if query.native.is_none() { return Ok(Either::Left( @@ -253,7 +253,7 @@ pub async fn get_raw( info!("Opening {:?}...", file_path); let mut response = HttpResponse::NoContent(); let path = if !file_path.is_executable() || state.config.trust_executables { - file_path + &file_path } else { response .append_header(( @@ -317,10 +317,10 @@ pub async fn head_raw( .await? .map_err(ErrorInternalServerError)?; if let Some(blob) = blobs.first() { - let file_path = blob.get_file_path(); + let file_path = blob.get_file_path().map_err(ErrorInternalServerError)?; let mut response = HttpResponse::NoContent(); - if let Some(mime_type) = tree_magic_mini::from_filepath(file_path) { + if let Some(mime_type) = tree_magic_mini::from_filepath(&file_path) { if let Ok(mime) = mime_type.parse::() { return Ok(response.content_type(mime).finish()); } @@ -672,7 +672,7 @@ pub async fn put_blob( _store .store( &connection, - Blob::from_filepath(file.path()), + file.path().into(), _filename, options.blob_mode, OperationContext { diff --git a/db/src/stores/fs/mod.rs b/db/src/stores/fs/mod.rs index d3c0359..ae5f452 100644 --- a/db/src/stores/fs/mod.rs +++ b/db/src/stores/fs/mod.rs @@ -3,7 +3,7 @@ use self::db::files; use super::{Blob, StoreError, UpStore, UpdateOptions, UpdatePathOutcome}; use crate::hierarchies::{resolve_path, resolve_path_cached, ResolveCache, UHierPath, UNode}; use crate::jobs::{JobContainer, JobHandle}; -use crate::util::hash_at_path; +use crate::util::{hash, hash_at_path}; use crate::{ BlobMode, ConnectionOptions, LoggingHandler, OperationContext, UpEndConnection, UpEndDatabase, UPEND_SUBDIR, @@ -628,15 +628,13 @@ impl FsStore { impl From for Blob { fn from(of: db::OutFile) -> Self { - Blob { file_path: of.path } + of.path.into() } } impl From for Blob { fn from(f: db::File) -> Self { - Blob { - file_path: PathBuf::from(f.path), - } + PathBuf::from(f.path).into() } } @@ -667,8 +665,11 @@ impl UpStore for FsStore { blob_mode: Option, context: OperationContext, ) -> Result { - let file_path = blob.get_file_path(); - let hash = hash_at_path(file_path).map_err(|e| StoreError::Unknown(e.to_string()))?; + let hash = hash( + blob.read() + .map_err(|e| StoreError::Unknown(e.to_string()))?, + ) + .map_err(|e| StoreError::Unknown(e.to_string()))?; let existing_files = self.retrieve(&hash)?; @@ -687,7 +688,8 @@ impl UpStore for FsStore { }; let final_path = self.path.join(final_name); - fs::copy(file_path, &final_path).map_err(|e| StoreError::Unknown(e.to_string()))?; + blob.copy_to(&final_path) + .map_err(|e| StoreError::Unknown(e.to_string()))?; let upath = if let Some(bm) = blob_mode { self.path_to_upath(&final_path, bm) @@ -1112,7 +1114,7 @@ mod test { paths.iter().for_each(|path| { let upath: UHierPath = path.parse().unwrap(); assert!( - resolve_path(&connection, &upath, false, OperationContext::default()).is_ok(), + resolve_path(connection, &upath, false, OperationContext::default()).is_ok(), "Failed: {}", upath ); diff --git a/db/src/stores/mod.rs b/db/src/stores/mod.rs index 5b01c60..9c062d0 100644 --- a/db/src/stores/mod.rs +++ b/db/src/stores/mod.rs @@ -1,3 +1,4 @@ +use std::io::Read; use std::path::{Path, PathBuf}; use super::{UpEndConnection, UpEndDatabase}; @@ -28,19 +29,86 @@ impl std::error::Error for StoreError {} type Result = std::result::Result; -pub struct Blob { - file_path: PathBuf, +pub enum Blob { + File(PathBuf), + Data(Vec), +} + +impl std::io::Read for Blob { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + match self { + Blob::File(path) => { + let mut file = std::fs::File::open(path)?; + file.read(buf) + } + Blob::Data(data) => { + let len = std::cmp::min(data.len(), buf.len()); + buf[..len].copy_from_slice(&data[..len]); + Ok(len) + } + } + } } impl Blob { - pub fn from_filepath>(path: P) -> Blob { - Blob { - file_path: PathBuf::from(path.as_ref()), + pub fn read(&self) -> Result> { + match self { + Blob::File(path) => { + let file = std::fs::File::open(path) + .map_err(|err| StoreError::Unknown(err.to_string()))?; + let mut reader = std::io::BufReader::new(file); + let mut buffer = Vec::new(); + reader + .read_to_end(&mut buffer) + .map_err(|err| StoreError::Unknown(err.to_string()))?; + Ok(buffer) + } + Blob::Data(data) => Ok(data.clone()), } } - pub fn get_file_path(&self) -> &Path { - self.file_path.as_path() + pub fn copy_to(&self, path: &Path) -> Result<()> { + match self { + Blob::File(src) => { + std::fs::copy(src, path).map_err(|err| StoreError::Unknown(err.to_string()))?; + Ok(()) + } + Blob::Data(data) => { + std::fs::write(path, data).map_err(|err| StoreError::Unknown(err.to_string()))?; + Ok(()) + } + } + } + + pub fn get_file_path(&self) -> Result { + match self { + Blob::File(path) => Ok(path.clone()), + Blob::Data(_) => Err(StoreError::Unknown("Blob is not a file".to_string())), + } + } +} + +impl From<&Path> for Blob { + fn from(path: &Path) -> Blob { + Blob::File(path.into()) + } +} + +impl From for Blob { + fn from(path: PathBuf) -> Blob { + Blob::File(path) + } +} + +impl From> for Blob { + fn from(data: Vec) -> Blob { + Blob::Data(data) + } +} + +impl From for Blob { + fn from(data: String) -> Blob { + Blob::Data(data.into_bytes()) } } diff --git a/db/src/util.rs b/db/src/util.rs index 96ee5d3..c7795f3 100644 --- a/db/src/util.rs +++ b/db/src/util.rs @@ -34,6 +34,11 @@ impl std::io::Write for LoggerSink { } } +pub fn hash>(input: T) -> anyhow::Result { + let hash = upend_base::hash::sha256hash(input)?; + Ok(hash) +} + pub fn hash_at_path>(path: P) -> anyhow::Result { let path = path.as_ref(); trace!("Hashing {:?}...", path);