import parseDuration from "parse-duration"; import humanizeDuration from "humanize-duration"; import { createLogger, format, transports } from "winston"; import { InvalidArgumentError, Command } from "@commander-js/extra-typings"; import shell from "shelljs"; import { youtube } from "@googleapis/youtube"; import fs from "fs"; const program = new Command() .name("scavengetube") .requiredOption( "--api-key ", "YouTube API key (or specify via SCAVENGETUBE_YOUTUBE_KEY environment variable)", (apiKey: string | undefined) => { apiKey = apiKey || process.env.SCAVENGETUBE_YOUTUBE_KEY; if (!apiKey) { throw new InvalidArgumentError("API key is required."); } return apiKey; } ) .requiredOption( "--limit ", "Download limit (default: 5)", (value: string) => { const result = parseInt(value, 5); if (isNaN(result)) { throw new InvalidArgumentError("Invalid limit."); } return result; }, 10 ) .requiredOption( "--period ", "Delay between searches (default: 3h)", (value: string) => { const result = parseDuration(value); if (!result) { throw new InvalidArgumentError("Invalid duration."); } return result; }, 3 * 60 * 60 * 1000 ) .requiredOption( "--output ", "Output directory", (directory: string) => { try { fs.accessSync(directory, fs.constants.W_OK); return directory; } catch (err) { throw new InvalidArgumentError( `Directory ${directory} is not writable.` ); } }, "out" ) .requiredOption( "--state-file ", "State file", (file: string) => { try { fs.accessSync(file, fs.constants.W_OK); return file; } catch (err) { throw new InvalidArgumentError(`File ${file} is not writable.`); } }, "state.json" ) .option("--log ", 'Log level, default "info"', "info"); program.parse(); const options = program.opts(); const logger = createLogger({ format: format.combine( format.timestamp(), format.errors({ stack: true }), format.splat(), format.colorize(), format.padLevels(), format.printf(({ level, message, timestamp }) => { return `${timestamp} | ${level}: ${message}`; }) ), transports: [new transports.Console({ level: options.log })], }); logger.info("Starting scavengetube..."); const ytdlPath = ["yt-dlp", "yt-dl"] .map((bin) => shell.which(bin)) .find(Boolean); if (!ytdlPath) { throw new Error("Couldn't find yt-dlp or yt-dl binary in $PATH."); } logger.debug("Using %s as yt-dlp binary.", ytdlPath.toString()); const ytApi = youtube({ version: "v3", auth: options.apiKey, }); let state = { downloaded: [] as string[], }; if (fs.existsSync(options.stateFile)) { logger.info("Loading state from %s", options.stateFile); state = JSON.parse(fs.readFileSync(options.stateFile, "utf-8")); logger.debug("Loaded state: %j", state); } else { logger.info("No state file found, starting from scratch."); } if (!fs.existsSync(options.output)) { logger.info("Creating output directory %s", options.output); fs.mkdirSync(options.output); } while (true) { const now = new Date(); const search = getSearch(); logger.info("Searching for %s", search); try { const response = await ytApi.search.list({ part: ["snippet"], q: search, maxResults: 50, safeSearch: "none", order: "date", }); const videos = (response.data.items || []).map((item) => ({ id: item.id?.videoId, title: item.snippet?.title, channel: item.snippet?.channelTitle, description: item.snippet?.description, publishedAt: item.snippet?.publishedAt, })); if (videos.length > 0) { logger.info("Found %d videos...", videos.length); logger.debug("Videos = `%j`", videos); } else { logger.warn("Got no videos!"); } let downloaded = 0; for (const video of videos) { if (!video.id) { logger.warn("Skipping %s, no video ID?!", video); continue; } if (state.downloaded.includes(video.id)) { logger.debug("Skipping %s, already downloaded", video.id); continue; } logger.info( "Downloading %s (%s) from %s by %s", video.id, video.title, video.publishedAt, video.channel ); const cmdline = `${ytdlPath} --no-playlist --no-warnings --write-info-json --output "${ options.output }/${new Date().getTime()}_%(title)s_%(id)s.%(ext)s" https://www.youtube.com/watch?v=${ video.id }`; logger.debug("Executing `%s`", cmdline); const result = shell.exec(cmdline, { silent: true }); if (result.code !== 0) { logger.error("Failed to download %s: %s", video.id, result.stderr); continue; } state.downloaded.push(video.id); downloaded++; logger.info("Downloaded %s", video.id); if (downloaded >= options.limit) { logger.info("Download limit reached, stopping search"); break; } } } catch (err) { logger.error(err); } fs.writeFileSync(options.stateFile, JSON.stringify(state)); logger.debug("Saved state to %s", options.stateFile); const delay = now.getTime() + options.period - Date.now(); logger.info("Waiting for %s", humanizeDuration(delay)); await new Promise((resolve) => setTimeout(resolve, delay)); } function getSearch(): string { var prefix = pick(["DSC", "MOV", "IMG", "100", "MVI"]); var separator = pick([" ", "_", ""]); var numbers = String(Math.floor(Math.random() * 9999)).padStart(4, "0"); return `${prefix}${separator}${numbers}`; } function pick(array: T[]): T { return array[Math.floor(Math.random() * array.length)]; }