feat(backend): Add API endpoint for available extractors and their statuses
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
This commit is contained in:
parent
baa4c02014
commit
99f15dd584
11 changed files with 252 additions and 58 deletions
|
@ -23,5 +23,9 @@
|
||||||
"use_new_terminal": false,
|
"use_new_terminal": false,
|
||||||
"allow_concurrent_runs": false,
|
"allow_concurrent_runs": false,
|
||||||
"reveal": "always"
|
"reveal": "always"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"label": "cargo clippy",
|
||||||
|
"command": "cargo clippy --fix --allow-dirty --allow-staged"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use serde::Serialize;
|
||||||
|
use shadow_rs::{is_debug, shadow};
|
||||||
use std::env::current_exe;
|
use std::env::current_exe;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use lazy_static::lazy_static;
|
|
||||||
use shadow_rs::{is_debug, shadow};
|
|
||||||
|
|
||||||
shadow!(build);
|
shadow!(build);
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
|
@ -32,3 +32,27 @@ lazy_static! {
|
||||||
pub fn get_version() -> &'static str {
|
pub fn get_version() -> &'static str {
|
||||||
option_env!("UPEND_VERSION").unwrap_or("unknown")
|
option_env!("UPEND_VERSION").unwrap_or("unknown")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub trait UserDescribable {
|
||||||
|
fn name(&self) -> &'static str;
|
||||||
|
fn description(&self) -> Option<&'static str> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
fn icon(&self) -> Option<&'static str> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
fn full_description(&self) -> UserDescription {
|
||||||
|
UserDescription {
|
||||||
|
name: self.name(),
|
||||||
|
description: self.description(),
|
||||||
|
icon: self.icon(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct UserDescription {
|
||||||
|
pub name: &'static str,
|
||||||
|
pub description: Option<&'static str>,
|
||||||
|
pub icon: Option<&'static str>,
|
||||||
|
}
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
use std::io::Write;
|
use super::{Extractor, ExtractorGetResult, ExtractorStatus};
|
||||||
use std::sync::Arc;
|
use crate::common::UserDescribable;
|
||||||
|
|
||||||
use super::{Extractor, ExtractorGetResult};
|
|
||||||
use anyhow::{anyhow, Result};
|
use anyhow::{anyhow, Result};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
|
use std::io::Write;
|
||||||
|
use std::sync::Arc;
|
||||||
use upend_base::{
|
use upend_base::{
|
||||||
addressing::Address,
|
addressing::Address,
|
||||||
constants::{ATTR_IN, ATTR_KEY, ATTR_LABEL, ATTR_OF},
|
constants::{ATTR_IN, ATTR_KEY, ATTR_LABEL, ATTR_OF},
|
||||||
|
@ -163,7 +163,11 @@ impl Extractor for ID3Extractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_needed(&self, address: &Address, connection: &UpEndConnection) -> Result<bool> {
|
fn status_for(
|
||||||
|
&self,
|
||||||
|
address: &Address,
|
||||||
|
connection: &UpEndConnection,
|
||||||
|
) -> Result<super::ExtractorStatus> {
|
||||||
let is_audio = connection.retrieve_object(address)?.iter().any(|e| {
|
let is_audio = connection.retrieve_object(address)?.iter().any(|e| {
|
||||||
if e.attribute == FILE_MIME_KEY {
|
if e.attribute == FILE_MIME_KEY {
|
||||||
if let EntryValue::String(mime) = &e.value {
|
if let EntryValue::String(mime) = &e.value {
|
||||||
|
@ -174,13 +178,23 @@ impl Extractor for ID3Extractor {
|
||||||
});
|
});
|
||||||
|
|
||||||
if !is_audio {
|
if !is_audio {
|
||||||
return Ok(false);
|
return Ok(ExtractorStatus::Unavailable);
|
||||||
}
|
}
|
||||||
|
|
||||||
let is_extracted = !connection
|
let is_extracted = !connection
|
||||||
.query(format!("(matches @{} (contains \"ID3\") ?)", address).parse()?)?
|
.query(format!("(matches @{} (contains \"ID3\") ?)", address).parse()?)?
|
||||||
.is_empty();
|
.is_empty();
|
||||||
|
|
||||||
Ok(!is_extracted)
|
Ok(ExtractorStatus::from_extracted(is_extracted))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl UserDescribable for ID3Extractor {
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
"ID3 Extractor"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn description(&self) -> Option<&'static str> {
|
||||||
|
Some("Extracts ID3 tags from audio files (MP3, FLAC)")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
use std::sync::Arc;
|
use super::{Extractor, ExtractorGetResult, ExtractorStatus};
|
||||||
|
use crate::common::UserDescribable;
|
||||||
use super::{Extractor, ExtractorGetResult};
|
|
||||||
use anyhow::{anyhow, Result};
|
use anyhow::{anyhow, Result};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
|
use std::sync::Arc;
|
||||||
use upend_base::entry::Attribute;
|
use upend_base::entry::Attribute;
|
||||||
use upend_base::{
|
use upend_base::{
|
||||||
addressing::Address,
|
addressing::Address,
|
||||||
|
@ -146,7 +146,11 @@ impl Extractor for ExifExtractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_needed(&self, address: &Address, connection: &UpEndConnection) -> Result<bool> {
|
fn status_for(
|
||||||
|
&self,
|
||||||
|
address: &Address,
|
||||||
|
connection: &UpEndConnection,
|
||||||
|
) -> Result<ExtractorStatus> {
|
||||||
let is_exif = connection.retrieve_object(address)?.iter().any(|e| {
|
let is_exif = connection.retrieve_object(address)?.iter().any(|e| {
|
||||||
if e.attribute == FILE_MIME_KEY {
|
if e.attribute == FILE_MIME_KEY {
|
||||||
if let EntryValue::String(mime) = &e.value {
|
if let EntryValue::String(mime) = &e.value {
|
||||||
|
@ -157,17 +161,23 @@ impl Extractor for ExifExtractor {
|
||||||
});
|
});
|
||||||
|
|
||||||
if !is_exif {
|
if !is_exif {
|
||||||
return Ok(false);
|
return Ok(ExtractorStatus::Unavailable);
|
||||||
}
|
}
|
||||||
|
|
||||||
let is_extracted = !connection
|
let is_extracted = !connection
|
||||||
.query(format!("(matches @{} (contains \"EXIF\") ?)", address).parse()?)?
|
.query(format!("(matches @{} (contains \"EXIF\") ?)", address).parse()?)?
|
||||||
.is_empty();
|
.is_empty();
|
||||||
|
|
||||||
if is_extracted {
|
Ok(ExtractorStatus::from_extracted(is_extracted))
|
||||||
return Ok(false);
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(true)
|
impl UserDescribable for ExifExtractor {
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
"EXIF Metadata Extractor"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn description(&self) -> Option<&'static str> {
|
||||||
|
Some("Extracts EXIF metadata from image files.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
25
cli/src/extractors/external/monolith.rs
vendored
25
cli/src/extractors/external/monolith.rs
vendored
|
@ -1,5 +1,6 @@
|
||||||
|
use crate::common::UserDescribable;
|
||||||
use crate::extractors::external::{process, ExternalCommand, ExternalCommandError};
|
use crate::extractors::external::{process, ExternalCommand, ExternalCommandError};
|
||||||
use crate::extractors::{Extractor, ExtractorGetResult};
|
use crate::extractors::{Extractor, ExtractorGetResult, ExtractorStatus};
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
|
@ -73,20 +74,24 @@ impl Extractor for MonolithExtractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_needed(&self, address: &Address, connection: &UpEndConnection) -> Result<bool> {
|
fn status_for(
|
||||||
|
&self,
|
||||||
|
address: &Address,
|
||||||
|
connection: &UpEndConnection,
|
||||||
|
) -> Result<crate::extractors::ExtractorStatus> {
|
||||||
if !matches!(address, Address::Url(_)) {
|
if !matches!(address, Address::Url(_)) {
|
||||||
return Ok(false);
|
return Ok(ExtractorStatus::Unavailable);
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.get_version().is_err() {
|
if self.get_version().is_err() {
|
||||||
return Ok(false);
|
return Ok(ExtractorStatus::Unavailable);
|
||||||
}
|
}
|
||||||
|
|
||||||
let is_extracted = !connection
|
let is_extracted = !connection
|
||||||
.query(format!("(matches @{} (contains \"WM_ARCHIVED\") ?)", address).parse()?)?
|
.query(format!("(matches @{} (contains \"WM_ARCHIVED\") ?)", address).parse()?)?
|
||||||
.is_empty();
|
.is_empty();
|
||||||
|
|
||||||
Ok(!is_extracted)
|
Ok(ExtractorStatus::from_extracted(is_extracted))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,6 +102,16 @@ impl ExternalCommand for MonolithExtractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl UserDescribable for MonolithExtractor {
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
"Web Archiver (monolith)"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn description(&self) -> Option<&'static str> {
|
||||||
|
Some("Archives webpages using the `monolith` command-line tool, which saves a webpage as a single HTML file with all resources embedded.")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// #[cfg(test)]
|
// #[cfg(test)]
|
||||||
// mod tests {
|
// mod tests {
|
||||||
// use super::*;
|
// use super::*;
|
||||||
|
|
25
cli/src/extractors/external/ytdlp.rs
vendored
25
cli/src/extractors/external/ytdlp.rs
vendored
|
@ -1,5 +1,6 @@
|
||||||
|
use crate::common::UserDescribable;
|
||||||
use crate::extractors::external::{process, ExternalCommand, ExternalCommandError};
|
use crate::extractors::external::{process, ExternalCommand, ExternalCommandError};
|
||||||
use crate::extractors::{Extractor, ExtractorGetResult};
|
use crate::extractors::{Extractor, ExtractorGetResult, ExtractorStatus};
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use std::io::{BufReader, Read};
|
use std::io::{BufReader, Read};
|
||||||
|
@ -208,20 +209,24 @@ impl Extractor for YtDlpExtractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_needed(&self, address: &Address, connection: &UpEndConnection) -> Result<bool> {
|
fn status_for(
|
||||||
|
&self,
|
||||||
|
address: &Address,
|
||||||
|
connection: &UpEndConnection,
|
||||||
|
) -> Result<crate::extractors::ExtractorStatus> {
|
||||||
if !matches!(address, Address::Url(_)) {
|
if !matches!(address, Address::Url(_)) {
|
||||||
return Ok(false);
|
return Ok(ExtractorStatus::Unavailable);
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.get_version().is_err() {
|
if self.get_version().is_err() {
|
||||||
return Ok(false);
|
return Ok(ExtractorStatus::Unavailable);
|
||||||
}
|
}
|
||||||
|
|
||||||
let is_extracted = !connection
|
let is_extracted = !connection
|
||||||
.query(format!("(matches @{} (contains \"YTDLD\") ?)", address).parse()?)?
|
.query(format!("(matches @{} (contains \"YTDLD\") ?)", address).parse()?)?
|
||||||
.is_empty();
|
.is_empty();
|
||||||
|
|
||||||
Ok(!is_extracted)
|
Ok(ExtractorStatus::from_extracted(is_extracted))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -232,6 +237,16 @@ impl ExternalCommand for YtDlpExtractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl UserDescribable for YtDlpExtractor {
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
"yt-dlp downloader"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn description(&self) -> Option<&'static str> {
|
||||||
|
Some("Downloads media from a URL using yt-dlp")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const KNOWN_METADATA: [(&str, &str); 8] = [
|
const KNOWN_METADATA: [(&str, &str); 8] = [
|
||||||
("title", "Title"),
|
("title", "Title"),
|
||||||
("fulltitle", "Full Title"),
|
("fulltitle", "Full Title"),
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
use std::{process::Command, sync::Arc};
|
use super::{Extractor, ExtractorGetResult, ExtractorStatus};
|
||||||
|
use crate::common::UserDescribable;
|
||||||
use super::{Extractor, ExtractorGetResult};
|
|
||||||
use anyhow::{anyhow, Result};
|
use anyhow::{anyhow, Result};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
|
use std::{process::Command, sync::Arc};
|
||||||
use tracing::{debug, trace};
|
use tracing::{debug, trace};
|
||||||
use upend_base::{
|
use upend_base::{
|
||||||
addressing::Address,
|
addressing::Address,
|
||||||
|
@ -127,7 +127,15 @@ impl Extractor for MediaExtractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_needed(&self, address: &Address, connection: &UpEndConnection) -> Result<bool> {
|
fn status_for(
|
||||||
|
&self,
|
||||||
|
address: &Address,
|
||||||
|
connection: &UpEndConnection,
|
||||||
|
) -> Result<super::ExtractorStatus> {
|
||||||
|
if !matches!(address, Address::Hash(_)) {
|
||||||
|
return Ok(ExtractorStatus::Unavailable);
|
||||||
|
}
|
||||||
|
|
||||||
let is_media = connection.retrieve_object(address)?.iter().any(|e| {
|
let is_media = connection.retrieve_object(address)?.iter().any(|e| {
|
||||||
if e.attribute == FILE_MIME_KEY {
|
if e.attribute == FILE_MIME_KEY {
|
||||||
if let EntryValue::String(mime) = &e.value {
|
if let EntryValue::String(mime) = &e.value {
|
||||||
|
@ -146,17 +154,23 @@ impl Extractor for MediaExtractor {
|
||||||
});
|
});
|
||||||
|
|
||||||
if !is_media {
|
if !is_media {
|
||||||
return Ok(false);
|
return Ok(ExtractorStatus::Improbable);
|
||||||
}
|
}
|
||||||
|
|
||||||
let is_extracted = !connection
|
let is_extracted = !connection
|
||||||
.query(format!("(matches @{} (contains \"{}\") ?)", address, DURATION_KEY).parse()?)?
|
.query(format!("(matches @{} (contains \"{}\") ?)", address, DURATION_KEY).parse()?)?
|
||||||
.is_empty();
|
.is_empty();
|
||||||
|
|
||||||
if is_extracted {
|
Ok(ExtractorStatus::from_extracted(is_extracted))
|
||||||
return Ok(false);
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(true)
|
impl UserDescribable for MediaExtractor {
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
"Generic Media Extractor"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn description(&self) -> Option<&'static str> {
|
||||||
|
Some("Extracts rudimentary metadata (duration) from media files.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
|
use crate::common::{UserDescribable, UserDescription};
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
|
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
|
||||||
|
use serde::Serialize;
|
||||||
use std::{
|
use std::{
|
||||||
borrow::Borrow,
|
borrow::Borrow,
|
||||||
sync::{Arc, Mutex, RwLock},
|
sync::{Arc, Mutex, RwLock},
|
||||||
|
@ -25,7 +27,7 @@ pub mod media;
|
||||||
#[cfg(feature = "extractors-external")]
|
#[cfg(feature = "extractors-external")]
|
||||||
pub mod external;
|
pub mod external;
|
||||||
|
|
||||||
pub trait Extractor: Send + Sync {
|
pub trait Extractor: Send + Sync + UserDescribable {
|
||||||
fn get(
|
fn get(
|
||||||
&self,
|
&self,
|
||||||
address: &Address,
|
address: &Address,
|
||||||
|
@ -35,13 +37,11 @@ pub trait Extractor: Send + Sync {
|
||||||
context: OperationContext,
|
context: OperationContext,
|
||||||
) -> Result<ExtractorGetResult>;
|
) -> Result<ExtractorGetResult>;
|
||||||
|
|
||||||
fn is_applicable(&self, _address: &Address) -> bool {
|
fn status_for(
|
||||||
true
|
&self,
|
||||||
}
|
_address: &Address,
|
||||||
|
_connection: &UpEndConnection,
|
||||||
fn is_needed(&self, _address: &Address, _connection: &UpEndConnection) -> Result<bool> {
|
) -> Result<ExtractorStatus>;
|
||||||
Ok(true)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn insert_info(
|
fn insert_info(
|
||||||
&self,
|
&self,
|
||||||
|
@ -51,7 +51,7 @@ pub trait Extractor: Send + Sync {
|
||||||
job_container: JobContainer,
|
job_container: JobContainer,
|
||||||
context: OperationContext,
|
context: OperationContext,
|
||||||
) -> Result<ExtractorResult> {
|
) -> Result<ExtractorResult> {
|
||||||
if self.is_needed(address, connection)? {
|
if self.status_for(address, connection)?.is_applicable() {
|
||||||
let ExtractorGetResult {
|
let ExtractorGetResult {
|
||||||
entries,
|
entries,
|
||||||
stored: inserted,
|
stored: inserted,
|
||||||
|
@ -97,6 +97,32 @@ impl From<Vec<Entry>> for ExtractorGetResult {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
|
||||||
|
pub enum ExtractorStatus {
|
||||||
|
Unavailable,
|
||||||
|
Improbable,
|
||||||
|
Needed,
|
||||||
|
Applicable,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ExtractorStatus {
|
||||||
|
pub fn is_needed(&self) -> bool {
|
||||||
|
matches!(self, ExtractorStatus::Needed)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_applicable(&self) -> bool {
|
||||||
|
matches!(self, ExtractorStatus::Applicable) || self.is_needed()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_extracted(is_extracted: bool) -> Self {
|
||||||
|
if is_extracted {
|
||||||
|
ExtractorStatus::Applicable
|
||||||
|
} else {
|
||||||
|
ExtractorStatus::Needed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct ExtractorManager {
|
pub struct ExtractorManager {
|
||||||
extractors: Vec<(&'static str, Box<dyn Extractor>)>,
|
extractors: Vec<(&'static str, Box<dyn Extractor>)>,
|
||||||
}
|
}
|
||||||
|
@ -148,7 +174,12 @@ impl ExtractorManager {
|
||||||
trace!("Extracting metadata for {address:?}");
|
trace!("Extracting metadata for {address:?}");
|
||||||
|
|
||||||
for (name, extractor) in &self.extractors {
|
for (name, extractor) in &self.extractors {
|
||||||
if extractor.is_applicable(address) {
|
let status = extractor.status_for(address, connection);
|
||||||
|
if status.is_err() {
|
||||||
|
debug!("{name}: {status:?}");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if status.unwrap().is_applicable() {
|
||||||
trace!("Extracting with {name}");
|
trace!("Extracting with {name}");
|
||||||
let extract_result = extractor.insert_info(
|
let extract_result = extractor.insert_info(
|
||||||
address,
|
address,
|
||||||
|
@ -238,4 +269,22 @@ impl ExtractorManager {
|
||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn statuses_for(
|
||||||
|
&self,
|
||||||
|
address: &Address,
|
||||||
|
connection: &UpEndConnection,
|
||||||
|
) -> Vec<(&'static str, UserDescription, ExtractorStatus)> {
|
||||||
|
self.extractors
|
||||||
|
.iter()
|
||||||
|
.map(|(name, extractor)| {
|
||||||
|
let status = extractor.status_for(address, connection);
|
||||||
|
(
|
||||||
|
*name,
|
||||||
|
extractor.full_description(),
|
||||||
|
status.unwrap_or(ExtractorStatus::Unavailable),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
use std::sync::Arc;
|
use super::ExtractorStatus;
|
||||||
|
|
||||||
use super::{Extractor, ExtractorGetResult};
|
use super::{Extractor, ExtractorGetResult};
|
||||||
|
use crate::common::UserDescribable;
|
||||||
use crate::common::REQWEST_CLIENT;
|
use crate::common::REQWEST_CLIENT;
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use std::sync::Arc;
|
||||||
use upend_base::addressing::Address;
|
use upend_base::addressing::Address;
|
||||||
use upend_base::constants::ATTR_LABEL;
|
use upend_base::constants::ATTR_LABEL;
|
||||||
use upend_base::constants::ATTR_OF;
|
use upend_base::constants::ATTR_OF;
|
||||||
|
@ -146,13 +146,33 @@ impl Extractor for WebExtractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_needed(&self, address: &Address, connection: &UpEndConnection) -> Result<bool> {
|
fn status_for(
|
||||||
Ok(connection
|
&self,
|
||||||
|
address: &Address,
|
||||||
|
connection: &UpEndConnection,
|
||||||
|
) -> Result<ExtractorStatus> {
|
||||||
|
if !matches!(address, Address::Url(_)) {
|
||||||
|
return Ok(ExtractorStatus::Unavailable);
|
||||||
|
}
|
||||||
|
|
||||||
|
let is_extracted = !connection
|
||||||
.query(
|
.query(
|
||||||
format!(r#"(matches @{address} (in "HTML_TITLE" "HTML_DESCRIPTION") ?)"#)
|
format!(r#"(matches @{address} (in "HTML_TITLE" "HTML_DESCRIPTION") ?)"#)
|
||||||
.parse()?,
|
.parse()?,
|
||||||
)?
|
)?
|
||||||
.is_empty())
|
.is_empty();
|
||||||
|
|
||||||
|
Ok(ExtractorStatus::from_extracted(is_extracted))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl UserDescribable for WebExtractor {
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
"Web Metadata Extractor"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn description(&self) -> Option<&'static str> {
|
||||||
|
Some("Extracts basic metadata from web pages using OpenGraph and HTML tags.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -188,7 +208,7 @@ mod test {
|
||||||
let job_container = JobContainer::new();
|
let job_container = JobContainer::new();
|
||||||
|
|
||||||
let address = Address::Url(Url::parse("https://upend.dev").unwrap());
|
let address = Address::Url(Url::parse("https://upend.dev").unwrap());
|
||||||
assert!(WebExtractor.is_needed(&address, &connection)?);
|
assert!(WebExtractor.status_for(&address, &connection)?.is_needed());
|
||||||
|
|
||||||
WebExtractor.insert_info(
|
WebExtractor.insert_info(
|
||||||
&address,
|
&address,
|
||||||
|
@ -198,7 +218,7 @@ mod test {
|
||||||
OperationContext::default(),
|
OperationContext::default(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
assert!(!WebExtractor.is_needed(&address, &connection)?);
|
assert!(!WebExtractor.status_for(&address, &connection)?.is_needed());
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -870,6 +870,34 @@ pub async fn get_address(
|
||||||
Ok(response.json(format!("{}", address)))
|
Ok(response.json(format!("{}", address)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[get("/api/obj/{address}/extractors")]
|
||||||
|
pub async fn get_extractors(
|
||||||
|
req: HttpRequest,
|
||||||
|
state: web::Data<State>,
|
||||||
|
address: web::Path<Address>,
|
||||||
|
) -> Result<HttpResponse, Error> {
|
||||||
|
check_auth(&req, &state)?;
|
||||||
|
|
||||||
|
let connection = state.upend.connection().map_err(ErrorInternalServerError)?;
|
||||||
|
let statuses = state
|
||||||
|
.extractor_manager
|
||||||
|
.statuses_for(&address.into_inner(), &connection);
|
||||||
|
|
||||||
|
Ok(HttpResponse::Ok().json(
|
||||||
|
statuses
|
||||||
|
.into_iter()
|
||||||
|
.map(|(name, info, status)| {
|
||||||
|
json!({
|
||||||
|
"id": name,
|
||||||
|
"name": info.name,
|
||||||
|
"description": info.description,
|
||||||
|
"status": status,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
#[get("/api/all/attributes")]
|
#[get("/api/all/attributes")]
|
||||||
pub async fn get_all_attributes(
|
pub async fn get_all_attributes(
|
||||||
req: HttpRequest,
|
req: HttpRequest,
|
||||||
|
|
|
@ -55,6 +55,7 @@ where
|
||||||
.service(routes::get_query)
|
.service(routes::get_query)
|
||||||
.service(routes::get_object)
|
.service(routes::get_object)
|
||||||
.service(routes::put_object)
|
.service(routes::put_object)
|
||||||
|
.service(routes::get_extractors)
|
||||||
.service(routes::put_blob)
|
.service(routes::put_blob)
|
||||||
.service(routes::put_object_attribute)
|
.service(routes::put_object_attribute)
|
||||||
.service(routes::delete_object)
|
.service(routes::delete_object)
|
||||||
|
|
Loading…
Reference in a new issue