refactor(cli): Blob is no longer tied just to filepaths

(this lays the foundation for future backing by e.g. S3, also in-memory blobs)
This commit is contained in:
Tomáš Mládek 2024-06-30 13:30:40 +02:00
parent 1efd45806a
commit 44d76aa8d4
8 changed files with 108 additions and 38 deletions

View file

@ -9,7 +9,6 @@ use upend_base::{
constants::{ATTR_IN, ATTR_KEY, ATTR_LABEL, ATTR_OF},
entry::{Entry, EntryValue, InvariantEntry},
};
use upend_db::stores::Blob;
use upend_db::{
jobs::{JobContainer, JobState},
stores::{fs::FILE_MIME_KEY, UpStore},
@ -46,7 +45,7 @@ impl Extractor for ID3Extractor {
let files = store.retrieve(hash)?;
if let Some(file) = files.first() {
let file_path = file.get_file_path();
let file_path = file.get_file_path()?;
let mut job_handle = job_container.add_job(
None,
&format!(
@ -98,7 +97,7 @@ impl Extractor for ID3Extractor {
file.write_all(&picture.data)?;
let hash = store.store(
connection,
Blob::from_filepath(&tmp_path),
tmp_path.into(),
None,
Some(BlobMode::StoreOnly),
context.clone(),
@ -182,10 +181,6 @@ impl Extractor for ID3Extractor {
.query(format!("(matches @{} (contains \"ID3\") ?)", address).parse()?)?
.is_empty();
if is_extracted {
return Ok(false);
}
Ok(true)
return Ok(!is_extracted);
}
}

View file

@ -48,7 +48,7 @@ impl Extractor for ExifExtractor {
let files = store.retrieve(hash)?;
if let Some(file) = files.first() {
let file_path = file.get_file_path();
let file_path = file.get_file_path()?;
let mut job_handle = job_container.add_job(
None,
&format!(

View file

@ -55,7 +55,7 @@ impl Extractor for MediaExtractor {
let files = store.retrieve(hash)?;
if let Some(file) = files.first() {
let file_path = file.get_file_path();
let file_path = file.get_file_path()?;
let mut job_handle = job_container.add_job(
None,
&format!(

View file

@ -91,7 +91,7 @@ impl PreviewStore {
trace!("Calculating preview for {hash:?}...");
let files = self.store.retrieve(&hash)?;
if let Some(file) = files.first() {
let file_path = file.get_file_path();
let file_path = file.get_file_path()?;
let mut job_handle = job_container.add_job(
None,
&format!("Creating preview for {:?}", file_path.file_name().unwrap()),
@ -102,21 +102,21 @@ impl PreviewStore {
let mime_type: Option<String> = if mime_type.is_some() {
mime_type
} else {
tree_magic_mini::from_filepath(file_path).map(|m| m.into())
tree_magic_mini::from_filepath(&file_path).map(|m| m.into())
};
let preview = match mime_type {
Some(tm) if tm.starts_with("text") => {
TextPath(file_path).get_thumbnail(options)
TextPath(&file_path).get_thumbnail(options)
}
Some(tm) if tm.starts_with("video") || tm == "application/x-matroska" => {
VideoPath(file_path).get_thumbnail(options)
VideoPath(&file_path).get_thumbnail(options)
}
Some(tm) if tm.starts_with("audio") || tm == "application/x-riff" => {
AudioPath(file_path).get_thumbnail(options)
AudioPath(&file_path).get_thumbnail(options)
}
Some(tm) if tm.starts_with("image") => {
ImagePath(file_path).get_thumbnail(options)
ImagePath(&file_path).get_thumbnail(options)
}
Some(unknown) => Err(anyhow!("No capability for {:?} thumbnails.", unknown)),
_ => Err(anyhow!("Unknown file type, or file doesn't exist.")),

View file

@ -38,8 +38,8 @@ use upend_base::hash::{b58_decode, b58_encode, sha256hash};
use upend_base::lang::Query;
use upend_db::hierarchies::{list_roots, resolve_path, UHierPath};
use upend_db::jobs;
use upend_db::stores::UpStore;
use upend_db::stores::UpdateOptions;
use upend_db::stores::{Blob, UpStore};
use upend_db::BlobMode;
use upend_db::OperationContext;
use upend_db::UpEndDatabase;
@ -225,7 +225,7 @@ pub async fn get_raw(
.await?
.map_err(ErrorInternalServerError)?;
if let Some(blob) = blobs.first() {
let file_path = blob.get_file_path();
let file_path = blob.get_file_path().map_err(ErrorInternalServerError)?;
if query.native.is_none() {
return Ok(Either::Left(
@ -253,7 +253,7 @@ pub async fn get_raw(
info!("Opening {:?}...", file_path);
let mut response = HttpResponse::NoContent();
let path = if !file_path.is_executable() || state.config.trust_executables {
file_path
&file_path
} else {
response
.append_header((
@ -317,10 +317,10 @@ pub async fn head_raw(
.await?
.map_err(ErrorInternalServerError)?;
if let Some(blob) = blobs.first() {
let file_path = blob.get_file_path();
let file_path = blob.get_file_path().map_err(ErrorInternalServerError)?;
let mut response = HttpResponse::NoContent();
if let Some(mime_type) = tree_magic_mini::from_filepath(file_path) {
if let Some(mime_type) = tree_magic_mini::from_filepath(&file_path) {
if let Ok(mime) = mime_type.parse::<mime::Mime>() {
return Ok(response.content_type(mime).finish());
}
@ -672,7 +672,7 @@ pub async fn put_blob(
_store
.store(
&connection,
Blob::from_filepath(file.path()),
file.path().into(),
_filename,
options.blob_mode,
OperationContext {

View file

@ -3,7 +3,7 @@ use self::db::files;
use super::{Blob, StoreError, UpStore, UpdateOptions, UpdatePathOutcome};
use crate::hierarchies::{resolve_path, resolve_path_cached, ResolveCache, UHierPath, UNode};
use crate::jobs::{JobContainer, JobHandle};
use crate::util::hash_at_path;
use crate::util::{hash, hash_at_path};
use crate::{
BlobMode, ConnectionOptions, LoggingHandler, OperationContext, UpEndConnection, UpEndDatabase,
UPEND_SUBDIR,
@ -628,15 +628,13 @@ impl FsStore {
impl From<db::OutFile> for Blob {
fn from(of: db::OutFile) -> Self {
Blob { file_path: of.path }
of.path.into()
}
}
impl From<db::File> for Blob {
fn from(f: db::File) -> Self {
Blob {
file_path: PathBuf::from(f.path),
}
PathBuf::from(f.path).into()
}
}
@ -667,8 +665,11 @@ impl UpStore for FsStore {
blob_mode: Option<BlobMode>,
context: OperationContext,
) -> Result<UpMultihash, super::StoreError> {
let file_path = blob.get_file_path();
let hash = hash_at_path(file_path).map_err(|e| StoreError::Unknown(e.to_string()))?;
let hash = hash(
blob.read()
.map_err(|e| StoreError::Unknown(e.to_string()))?,
)
.map_err(|e| StoreError::Unknown(e.to_string()))?;
let existing_files = self.retrieve(&hash)?;
@ -687,7 +688,8 @@ impl UpStore for FsStore {
};
let final_path = self.path.join(final_name);
fs::copy(file_path, &final_path).map_err(|e| StoreError::Unknown(e.to_string()))?;
blob.copy_to(&final_path)
.map_err(|e| StoreError::Unknown(e.to_string()))?;
let upath = if let Some(bm) = blob_mode {
self.path_to_upath(&final_path, bm)
@ -1112,7 +1114,7 @@ mod test {
paths.iter().for_each(|path| {
let upath: UHierPath = path.parse().unwrap();
assert!(
resolve_path(&connection, &upath, false, OperationContext::default()).is_ok(),
resolve_path(connection, &upath, false, OperationContext::default()).is_ok(),
"Failed: {}",
upath
);

View file

@ -1,3 +1,4 @@
use std::io::Read;
use std::path::{Path, PathBuf};
use super::{UpEndConnection, UpEndDatabase};
@ -28,19 +29,86 @@ impl std::error::Error for StoreError {}
type Result<T> = std::result::Result<T, StoreError>;
pub struct Blob {
file_path: PathBuf,
pub enum Blob {
File(PathBuf),
Data(Vec<u8>),
}
impl std::io::Read for Blob {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
match self {
Blob::File(path) => {
let mut file = std::fs::File::open(path)?;
file.read(buf)
}
Blob::Data(data) => {
let len = std::cmp::min(data.len(), buf.len());
buf[..len].copy_from_slice(&data[..len]);
Ok(len)
}
}
}
}
impl Blob {
pub fn from_filepath<P: AsRef<Path>>(path: P) -> Blob {
Blob {
file_path: PathBuf::from(path.as_ref()),
pub fn read(&self) -> Result<Vec<u8>> {
match self {
Blob::File(path) => {
let file = std::fs::File::open(path)
.map_err(|err| StoreError::Unknown(err.to_string()))?;
let mut reader = std::io::BufReader::new(file);
let mut buffer = Vec::new();
reader
.read_to_end(&mut buffer)
.map_err(|err| StoreError::Unknown(err.to_string()))?;
Ok(buffer)
}
Blob::Data(data) => Ok(data.clone()),
}
}
pub fn get_file_path(&self) -> &Path {
self.file_path.as_path()
pub fn copy_to(&self, path: &Path) -> Result<()> {
match self {
Blob::File(src) => {
std::fs::copy(src, path).map_err(|err| StoreError::Unknown(err.to_string()))?;
Ok(())
}
Blob::Data(data) => {
std::fs::write(path, data).map_err(|err| StoreError::Unknown(err.to_string()))?;
Ok(())
}
}
}
pub fn get_file_path(&self) -> Result<PathBuf> {
match self {
Blob::File(path) => Ok(path.clone()),
Blob::Data(_) => Err(StoreError::Unknown("Blob is not a file".to_string())),
}
}
}
impl From<&Path> for Blob {
fn from(path: &Path) -> Blob {
Blob::File(path.into())
}
}
impl From<PathBuf> for Blob {
fn from(path: PathBuf) -> Blob {
Blob::File(path)
}
}
impl From<Vec<u8>> for Blob {
fn from(data: Vec<u8>) -> Blob {
Blob::Data(data)
}
}
impl From<String> for Blob {
fn from(data: String) -> Blob {
Blob::Data(data.into_bytes())
}
}

View file

@ -34,6 +34,11 @@ impl std::io::Write for LoggerSink {
}
}
pub fn hash<T: AsRef<[u8]>>(input: T) -> anyhow::Result<UpMultihash> {
let hash = upend_base::hash::sha256hash(input)?;
Ok(hash)
}
pub fn hash_at_path<P: AsRef<Path>>(path: P) -> anyhow::Result<UpMultihash> {
let path = path.as_ref();
trace!("Hashing {:?}...", path);