From 723008e4510ac4f6293eb87e212df87250e100cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Ml=C3=A1dek?= Date: Tue, 14 May 2019 23:33:48 +0200 Subject: [PATCH] download into subdirectories when multiple hashtags present (fix #2) --- delojza.py | 96 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 49 insertions(+), 47 deletions(-) diff --git a/delojza.py b/delojza.py index 33354a7..29ec8d9 100755 --- a/delojza.py +++ b/delojza.py @@ -42,7 +42,7 @@ class DelojzaBot: def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', acoustid_key=None, tumblr_keys=None, markov=None): self.logger = logging.getLogger("delojza") - self.out_dir = out_dir + self.out_dir = os.path.abspath(out_dir) self.logger.debug('OUT_DIR: ' + out_dir) self.tmp_dir = tmp_dir self.logger.debug('TMP_DIR: ' + tmp_dir) @@ -72,7 +72,7 @@ class DelojzaBot: self.client = pytumblr.TumblrRestClient(*tumblr_keys) self.last_downloaded = [] - self.last_hashtag = None + self.last_hashtags = None @staticmethod def ytdl_can(url): @@ -153,7 +153,7 @@ class DelojzaBot: id3.add(mutagen.id3.TPE1(encoding=3, text=artist)) id3.save() - def download_ytdl(self, urls, subdir, date, message, audio=False, filename=None): + def download_ytdl(self, urls, out_path, date, message, audio=False, filename=None): ydl_opts = { 'noplaylist': True, 'restrictfilenames': True, @@ -169,23 +169,21 @@ class DelojzaBot: filenames = [] with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download(urls) - out_dir = os.path.join(self.out_dir, subdir) for info in [ydl.extract_info(url, download=False) for url in urls]: filename = ydl.prepare_filename(info) globbeds = glob(os.path.splitext(filename)[0] + '.*') for globbed in globbeds: if globbed.endswith("mp3"): self.tag_file(globbed, message, info=info) - self.logger.info("Moving %s to %s..." % (globbed, out_dir)) - dest = shutil.move(globbed, out_dir) + self.logger.info("Moving %s to %s..." % (globbed, out_path)) + dest = shutil.move(globbed, out_path) filenames.append(dest) return filenames - def download_raw(self, urls, subdir, date, message, audio=False, filename=None): + def download_raw(self, urls, out_path, date, message, audio=False, filename=None): filenames = [] for url in urls: - local_filename = os.path.join(self.out_dir, subdir, - "%s__%s" % (datestr(date), filename or url.split('/')[-1])) + local_filename = os.path.join(out_path, "%s__%s" % (datestr(date), filename or url.split('/')[-1])) final_filename = local_filename is_mp3 = local_filename.endswith("mp3") @@ -212,61 +210,61 @@ class DelojzaBot: return filenames @staticmethod - def extract_first_hashtag(message): + def extract_hashtags(message): hashtags = list(map(message.parse_entity, list(filter(lambda e: e.type == 'hashtag', message.entities)))) hashtags += list(map(message.parse_caption_entity, list(filter(lambda e: e.type == 'hashtag', message.caption_entities)))) if len(hashtags) > 0: - hashtag = hashtags[0][1:].upper() - if "PRAS" in hashtag: - hashtag = "PRAS" - return hashtag + hashtags = [hashtag[1:].upper() for hashtag in hashtags] + for i, hashtag in enumerate(hashtags): + if "PRAS" in hashtag: + hashtags[i] = "PRAS" + return hashtags - def get_hashtag(self, message): - hashtag = self.extract_first_hashtag(message) - if hashtag is None: - if self.last_hashtag is not None and self.last_hashtag[0] == message.from_user: - hashtag = self.last_hashtag[1] - self.last_hashtag = None - return hashtag + def get_hashtags(self, message): + hashtags = self.extract_hashtags(message) + if len(hashtags) == 0: + if self.last_hashtags is not None and self.last_hashtags[0] == message.from_user: + hashtags = self.last_hashtags[1] + self.last_hashtags = None + return hashtags def tg_handle_hashtag(self, bot, update): - hashtag = self.extract_first_hashtag(update.message) + hashtags = self.extract_hashtags(update.message) if update.message.reply_to_message: - self.handle_tg_message(update.message.reply_to_message, bot, hashtag) - self.handle_urls(update.message.reply_to_message, hashtag) + self.handle_tg_message(update.message.reply_to_message, bot, hashtags) + self.handle_urls(update.message.reply_to_message, hashtags) else: - self.last_hashtag = (update.message.from_user, hashtag) + self.last_hashtags = (update.message.from_user, hashtags) # noinspection PyBroadException - def handle(self, urls, message, hashtag, download_fn, filename=None): + def handle(self, urls, message, hashtags, download_fn, filename=None): try: - if hashtag is None: + if len(hashtags) == 0: self.logger.info("Ignoring %s due to no hashtag present..." % urls) return - self.logger.info("Downloading %s under '%s'" % (urls, hashtag)) + self.logger.info("Downloading %s under '%s'" % (urls, "/".join(hashtags))) - reply = 'Downloading' - if hashtag: - mkdir_p(os.path.join(self.out_dir, hashtag)) - reply += ' to "' + hashtag + '"' - reply += '...' + out_path = os.path.join(self.out_dir, *hashtags) + mkdir_p(out_path) - audio = any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) + reply = 'Downloading to "{}"...'.format("/".join(hashtags)) + + audio = any([any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) for hashtag in hashtags]) if audio and download_fn != self.download_raw: reply += ' (And also guessing you want to extract the audio)' message.reply_text(reply) - filenames = download_fn(urls, hashtag or '.', message.date, message, audio=audio, filename=filename) - if hashtag == 'TUMBLR' and self.client: + filenames = download_fn(urls, out_path, message.date, message, audio=audio, filename=filename) + if hashtags[0] == 'TUMBLR' and self.client: message.reply_text('(btw, queueing to tumblr)') for filename in filenames: self.client.create_photo('kunsaxan', state="queue", data=filename) - elif hashtag == 'TUMBLR_NOW' and self.client: + elif hashtags[0] == 'TUMBLR_NOW' and self.client: message.reply_text('(btw, ***FIRING TO TUMBLR RIGHT AWAY***)', parse_mode=telegram.ParseMode.MARKDOWN) for filename in filenames: @@ -279,23 +277,23 @@ class DelojzaBot: message.reply_text("Something is FUCKED: %s" % exc_value) def tg_handle_url(self, _, update): - self.handle_urls(update.message, self.get_hashtag(update.message)) + self.handle_urls(update.message, self.get_hashtags(update.message)) - def handle_urls(self, message, hashtag): + def handle_urls(self, message, hashtags): urls = list(map(lambda e: message.parse_entity(e), filter(lambda e: e.type == 'url', message.entities))) ytdl_urls = [url for url in urls if self.ytdl_can(url)] normal_urls = [url for url in urls if not self.ytdl_can(url)] if len(ytdl_urls) > 0: - self.handle(ytdl_urls, message, hashtag, self.download_ytdl) + self.handle(ytdl_urls, message, hashtags, self.download_ytdl) if len(normal_urls) > 0: image_urls = [url for url in normal_urls if "image" in requests.head(url).headers.get("Content-Type", "")] if len(image_urls) > 0: - self.handle(image_urls, message, hashtag, self.download_raw) + self.handle(image_urls, message, hashtags, self.download_raw) # noinspection PyBroadException def tg_handle_rest(self, bot, update): - self.handle_tg_message(update.message, bot, self.get_hashtag(update.message)) + self.handle_tg_message(update.message, bot, self.get_hashtags(update.message)) def handle_tg_message(self, message, bot, hashtag): file, filename, tumblr = None, None, False @@ -395,11 +393,15 @@ class DelojzaBot: for file in self.last_downloaded: update.message.reply_text("Removing \"{}\"!".format(file[len(self.out_dir):])) os.remove(file) - file_parent_dir = os.path.dirname(file) - if len(os.listdir(file_parent_dir)) == 0: - hashtag = os.path.split(file_parent_dir)[1].upper() - update.message.reply_text("Removing tag \"{}\" as it's empty...".format(hashtag)) - os.rmdir(file_parent_dir) + parent_dir = os.path.dirname(file) + while True: + if len(os.listdir(parent_dir)) == 0: + update.message.reply_text("Removing directory \"{}\" as it's empty..." + .format(parent_dir[len(self.out_dir) + 1:])) + os.rmdir(parent_dir) + parent_dir = os.path.dirname(parent_dir) + if parent_dir == self.out_dir: + break self.last_downloaded.clear() else: update.message.reply_text("Nothing to remove!")