From a3353cca65234c2ca76ee8bd98add10fda954a0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Ml=C3=A1dek?= Date: Mon, 1 Jul 2024 22:04:11 +0200 Subject: [PATCH] feat(backend): insert yt-dlp metadata on successful download --- cli/src/extractors/external/ytdlp.rs | 50 +++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/cli/src/extractors/external/ytdlp.rs b/cli/src/extractors/external/ytdlp.rs index 7ad12b0..4fa6775 100644 --- a/cli/src/extractors/external/ytdlp.rs +++ b/cli/src/extractors/external/ytdlp.rs @@ -39,6 +39,7 @@ impl Extractor for YtDlpExtractor { "--progress", "--no-call-home", "--no-playlist", + "--write-info-json", url.as_str(), ]) .current_dir(temp_dir.path()) @@ -102,14 +103,52 @@ impl Extractor for YtDlpExtractor { )); } + let mut result = vec![]; + let files = std::fs::read_dir(temp_dir.path())?.collect::, _>>()?; + let destination = files - .first() + .iter() + .find(|f| f.path().extension().map(|e| e != "json").unwrap_or(false)) .ok_or_else(|| { anyhow::anyhow!("yt-dlp didn't produce any files in {:?}", temp_dir.path()) })? .path(); + let json_path = files + .iter() + .find(|f| f.path().extension().map(|e| e == "json").unwrap_or(false)) + .ok_or_else(|| { + anyhow::anyhow!( + "yt-dlp didn't produce any json files in {:?}", + temp_dir.path() + ) + })? + .path(); + let json_text = std::fs::read_to_string(json_path)?; + let json_data = serde_json::from_str::(&json_text)?; + for key in [ + "title", + "fulltitle", + "description", + "channel", + "uploader", + "channel_url", + "upload_date", + "timestamp", + ] { + if let Some(value) = json_data.get(key) { + result.push(Entry { + entity: address.clone(), + attribute: format!("YTDL_META_{}", key).parse().unwrap(), + value: EntryValue::guess_from(value.to_string()), + provenance: context.provenance.clone() + "EXTRACTOR yt-dlp", + user: context.user.clone(), + timestamp: chrono::Utc::now().naive_utc(), + }); + } + } + let stored = store.store( connection, destination.clone().into(), @@ -119,16 +158,19 @@ impl Extractor for YtDlpExtractor { None, context.clone(), )?; - job_handle.update_progress(100.0)?; - Ok(vec![Entry { + result.push(Entry { entity: address.clone(), attribute: "YTDLD".parse().unwrap(), value: Address::Hash(stored).into(), provenance: context.provenance.clone() + "EXTRACTOR yt-dlp", user: context.user.clone(), timestamp: chrono::Utc::now().naive_utc(), - }]) + }); + + job_handle.update_progress(100.0)?; + + Ok(result) } else { Ok(vec![]) }