From 723008e4510ac4f6293eb87e212df87250e100cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Ml=C3=A1dek?= <t@mldk.cz>
Date: Tue, 14 May 2019 23:33:48 +0200
Subject: [PATCH] download into subdirectories when multiple hashtags present
 (fix #2)

---
 delojza.py | 96 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 49 insertions(+), 47 deletions(-)

diff --git a/delojza.py b/delojza.py
index 33354a7..29ec8d9 100755
--- a/delojza.py
+++ b/delojza.py
@@ -42,7 +42,7 @@ class DelojzaBot:
     def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', acoustid_key=None, tumblr_keys=None, markov=None):
         self.logger = logging.getLogger("delojza")
 
-        self.out_dir = out_dir
+        self.out_dir = os.path.abspath(out_dir)
         self.logger.debug('OUT_DIR: ' + out_dir)
         self.tmp_dir = tmp_dir
         self.logger.debug('TMP_DIR: ' + tmp_dir)
@@ -72,7 +72,7 @@ class DelojzaBot:
             self.client = pytumblr.TumblrRestClient(*tumblr_keys)
 
         self.last_downloaded = []
-        self.last_hashtag = None
+        self.last_hashtags = None
 
     @staticmethod
     def ytdl_can(url):
@@ -153,7 +153,7 @@ class DelojzaBot:
             id3.add(mutagen.id3.TPE1(encoding=3, text=artist))
         id3.save()
 
-    def download_ytdl(self, urls, subdir, date, message, audio=False, filename=None):
+    def download_ytdl(self, urls, out_path, date, message, audio=False, filename=None):
         ydl_opts = {
             'noplaylist': True,
             'restrictfilenames': True,
@@ -169,23 +169,21 @@ class DelojzaBot:
         filenames = []
         with youtube_dl.YoutubeDL(ydl_opts) as ydl:
             ydl.download(urls)
-            out_dir = os.path.join(self.out_dir, subdir)
             for info in [ydl.extract_info(url, download=False) for url in urls]:
                 filename = ydl.prepare_filename(info)
                 globbeds = glob(os.path.splitext(filename)[0] + '.*')
                 for globbed in globbeds:
                     if globbed.endswith("mp3"):
                         self.tag_file(globbed, message, info=info)
-                    self.logger.info("Moving %s to %s..." % (globbed, out_dir))
-                    dest = shutil.move(globbed, out_dir)
+                    self.logger.info("Moving %s to %s..." % (globbed, out_path))
+                    dest = shutil.move(globbed, out_path)
                     filenames.append(dest)
         return filenames
 
-    def download_raw(self, urls, subdir, date, message, audio=False, filename=None):
+    def download_raw(self, urls, out_path, date, message, audio=False, filename=None):
         filenames = []
         for url in urls:
-            local_filename = os.path.join(self.out_dir, subdir,
-                                          "%s__%s" % (datestr(date), filename or url.split('/')[-1]))
+            local_filename = os.path.join(out_path, "%s__%s" % (datestr(date), filename or url.split('/')[-1]))
             final_filename = local_filename
             is_mp3 = local_filename.endswith("mp3")
 
@@ -212,61 +210,61 @@ class DelojzaBot:
         return filenames
 
     @staticmethod
-    def extract_first_hashtag(message):
+    def extract_hashtags(message):
         hashtags = list(map(message.parse_entity,
                             list(filter(lambda e: e.type == 'hashtag', message.entities))))
         hashtags += list(map(message.parse_caption_entity,
                              list(filter(lambda e: e.type == 'hashtag', message.caption_entities))))
         if len(hashtags) > 0:
-            hashtag = hashtags[0][1:].upper()
-            if "PRAS" in hashtag:
-                hashtag = "PRAS"
-            return hashtag
+            hashtags = [hashtag[1:].upper() for hashtag in hashtags]
+            for i, hashtag in enumerate(hashtags):
+                if "PRAS" in hashtag:
+                    hashtags[i] = "PRAS"
+            return hashtags
 
-    def get_hashtag(self, message):
-        hashtag = self.extract_first_hashtag(message)
-        if hashtag is None:
-            if self.last_hashtag is not None and self.last_hashtag[0] == message.from_user:
-                hashtag = self.last_hashtag[1]
-                self.last_hashtag = None
-        return hashtag
+    def get_hashtags(self, message):
+        hashtags = self.extract_hashtags(message)
+        if len(hashtags) == 0:
+            if self.last_hashtags is not None and self.last_hashtags[0] == message.from_user:
+                hashtags = self.last_hashtags[1]
+                self.last_hashtags = None
+        return hashtags
 
     def tg_handle_hashtag(self, bot, update):
-        hashtag = self.extract_first_hashtag(update.message)
+        hashtags = self.extract_hashtags(update.message)
 
         if update.message.reply_to_message:
-            self.handle_tg_message(update.message.reply_to_message, bot, hashtag)
-            self.handle_urls(update.message.reply_to_message, hashtag)
+            self.handle_tg_message(update.message.reply_to_message, bot, hashtags)
+            self.handle_urls(update.message.reply_to_message, hashtags)
         else:
-            self.last_hashtag = (update.message.from_user, hashtag)
+            self.last_hashtags = (update.message.from_user, hashtags)
 
     # noinspection PyBroadException
-    def handle(self, urls, message, hashtag, download_fn, filename=None):
+    def handle(self, urls, message, hashtags, download_fn, filename=None):
         try:
-            if hashtag is None:
+            if len(hashtags) == 0:
                 self.logger.info("Ignoring %s due to no hashtag present..." % urls)
                 return
 
-            self.logger.info("Downloading %s under '%s'" % (urls, hashtag))
+            self.logger.info("Downloading %s under '%s'" % (urls, "/".join(hashtags)))
 
-            reply = 'Downloading'
-            if hashtag:
-                mkdir_p(os.path.join(self.out_dir, hashtag))
-                reply += ' to "' + hashtag + '"'
-            reply += '...'
+            out_path = os.path.join(self.out_dir, *hashtags)
+            mkdir_p(out_path)
 
-            audio = any([tag in hashtag for tag in ('AUDIO', 'RADIO')])
+            reply = 'Downloading to "{}"...'.format("/".join(hashtags))
+
+            audio = any([any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) for hashtag in hashtags])
             if audio and download_fn != self.download_raw:
                 reply += ' (And also guessing you want to extract the audio)'
 
             message.reply_text(reply)
 
-            filenames = download_fn(urls, hashtag or '.', message.date, message, audio=audio, filename=filename)
-            if hashtag == 'TUMBLR' and self.client:
+            filenames = download_fn(urls, out_path, message.date, message, audio=audio, filename=filename)
+            if hashtags[0] == 'TUMBLR' and self.client:
                 message.reply_text('(btw, queueing to tumblr)')
                 for filename in filenames:
                     self.client.create_photo('kunsaxan', state="queue", data=filename)
-            elif hashtag == 'TUMBLR_NOW' and self.client:
+            elif hashtags[0] == 'TUMBLR_NOW' and self.client:
                 message.reply_text('(btw, ***FIRING TO TUMBLR RIGHT AWAY***)',
                                    parse_mode=telegram.ParseMode.MARKDOWN)
                 for filename in filenames:
@@ -279,23 +277,23 @@ class DelojzaBot:
                 message.reply_text("Something is FUCKED: %s" % exc_value)
 
     def tg_handle_url(self, _, update):
-        self.handle_urls(update.message, self.get_hashtag(update.message))
+        self.handle_urls(update.message, self.get_hashtags(update.message))
 
-    def handle_urls(self, message, hashtag):
+    def handle_urls(self, message, hashtags):
         urls = list(map(lambda e: message.parse_entity(e),
                         filter(lambda e: e.type == 'url', message.entities)))
         ytdl_urls = [url for url in urls if self.ytdl_can(url)]
         normal_urls = [url for url in urls if not self.ytdl_can(url)]
         if len(ytdl_urls) > 0:
-            self.handle(ytdl_urls, message, hashtag, self.download_ytdl)
+            self.handle(ytdl_urls, message, hashtags, self.download_ytdl)
         if len(normal_urls) > 0:
             image_urls = [url for url in normal_urls if "image" in requests.head(url).headers.get("Content-Type", "")]
             if len(image_urls) > 0:
-                self.handle(image_urls, message, hashtag, self.download_raw)
+                self.handle(image_urls, message, hashtags, self.download_raw)
 
     # noinspection PyBroadException
     def tg_handle_rest(self, bot, update):
-        self.handle_tg_message(update.message, bot, self.get_hashtag(update.message))
+        self.handle_tg_message(update.message, bot, self.get_hashtags(update.message))
 
     def handle_tg_message(self, message, bot, hashtag):
         file, filename, tumblr = None, None, False
@@ -395,11 +393,15 @@ class DelojzaBot:
             for file in self.last_downloaded:
                 update.message.reply_text("Removing \"{}\"!".format(file[len(self.out_dir):]))
                 os.remove(file)
-                file_parent_dir = os.path.dirname(file)
-                if len(os.listdir(file_parent_dir)) == 0:
-                    hashtag = os.path.split(file_parent_dir)[1].upper()
-                    update.message.reply_text("Removing tag \"{}\" as it's empty...".format(hashtag))
-                    os.rmdir(file_parent_dir)
+                parent_dir = os.path.dirname(file)
+                while True:
+                    if len(os.listdir(parent_dir)) == 0:
+                        update.message.reply_text("Removing directory \"{}\" as it's empty..."
+                                                  .format(parent_dir[len(self.out_dir) + 1:]))
+                        os.rmdir(parent_dir)
+                    parent_dir = os.path.dirname(parent_dir)
+                    if parent_dir == self.out_dir:
+                        break
             self.last_downloaded.clear()
         else:
             update.message.reply_text("Nothing to remove!")