From 671c306ad15b34efc05067c1f16d8ecae0fedeaa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Ml=C3=A1dek?= <t@mldk.cz>
Date: Wed, 1 May 2019 10:54:43 +0200
Subject: [PATCH] add /stats, /orphans, /version; fix order of processing bug

---
 delojza.py | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 82 insertions(+), 2 deletions(-)
diff --git a/delojza.py b/delojza.py
index d566072..b7cf256 100755
--- a/delojza.py
+++ b/delojza.py
@@ -7,7 +7,9 @@ import re
 import shutil
 import sys
 from configparser import ConfigParser
+from datetime import datetime
 from glob import glob
+from operator import itemgetter
 
 import filetype
 import markovify
@@ -18,6 +20,7 @@ import telegram
 import youtube_dl
 from telegram import MessageEntity
 from telegram.ext import Updater, CommandHandler, MessageHandler, Filters
+from youtube_dl.version import __version__ as YTDL_VERSION
 
 
 def mkdir_p(path):
@@ -49,13 +52,17 @@ class DelojzaBot:
 
         dp.add_handler(CommandHandler("start", self.tg_start))
         dp.add_error_handler(self.tg_error)
-        dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), self.tg_handle_hashtag))
-        dp.add_handler(MessageHandler(Filters.text, self.tg_handle_text))
         self.tg_url_handler = MessageHandler(Filters.entity(MessageEntity.URL), self.tg_handle_url)
         dp.add_handler(self.tg_url_handler)
         self.tg_rest_handler = MessageHandler(Filters.photo | Filters.video | Filters.video_note |
                                               Filters.audio | Filters.voice | Filters.document, self.tg_handle_rest)
         dp.add_handler(self.tg_rest_handler)
+        dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), self.tg_handle_hashtag))
+        dp.add_handler(MessageHandler(Filters.text, self.tg_handle_text))
+        dp.add_handler(CommandHandler("stats", self.tg_stats))
+        dp.add_handler(CommandHandler("orphans", self.tg_orphan))
+        dp.add_handler(CommandHandler("orphans_full", self.tg_orphan_full))
+        dp.add_handler(CommandHandler("version", self.tg_version))
 
         if tumblr_keys:
             self.client = pytumblr.TumblrRestClient(*tumblr_keys)
@@ -243,6 +250,79 @@ class DelojzaBot:
         if self.markov:
             self.markov.add_to_corpus(update.message.text)
 
+    def tag_dirs(self):
+        return list(filter(lambda x: x.upper() == x,
+                           filter(lambda dir: os.path.isdir(os.path.join(self.out_dir, dir)),
+                                  os.listdir(self.out_dir))))
+
+    def tg_stats(self, _, update):
+        tag_dirs = self.tag_dirs()
+        reply = "Total number of tags: {}\n\n".format(len(tag_dirs))
+        counts = [(dir, os.listdir(os.path.join(self.out_dir, dir))) for dir in tag_dirs]
+        counts.sort(key=itemgetter(0))
+        counts.sort(key=lambda x: len(x[1]), reverse=True)
+        for dir, files in counts:
+            if len(files) == 1:
+                break
+            abs_paths = [os.path.join(self.out_dir, dir, file) for file in files]
+            abs_files = list(filter(os.path.isfile, abs_paths))
+            audio_cnt = len([match for match in map(filetype.audio, abs_files) if match is not None])
+            video_cnt = len([match for match in map(filetype.video, abs_files) if match is not None])
+            image_cnt = len([match for match in map(filetype.image, abs_files) if match is not None])
+            rest_cnt = len(files) - audio_cnt - video_cnt - image_cnt
+            dir_cnt = len(abs_paths) - len(abs_files)
+            details = ", ".join(["{} {}".format(cnt, desc) for cnt, desc in
+                                 [(image_cnt, "images"), (video_cnt, "videos"), (audio_cnt, "audios"),
+                                  (rest_cnt, "unknown files"), (dir_cnt, "directories")] if cnt > 0])
+            if any([len(abs_paths) == cnt for cnt in [audio_cnt, video_cnt, image_cnt, rest_cnt, dir_cnt]]):
+                reply += "<b>{}:</b> {}\n".format(dir, details)
+            else:
+                reply += "<b>{}:</b> {} files ({})\n".format(dir, len(files), details)
+        orphans = list(filter(lambda cnt: len(cnt[1]) <= 1, counts))
+        if len(orphans) > 0:
+            reply += "\nFollowing tags are orphans: " + ", ".join(map(itemgetter(0), orphans))
+        update.message.reply_text(reply, parse_mode=telegram.ParseMode.HTML)
+
+    def orphans(self):
+        result = []
+        tag_dirs = self.tag_dirs()
+        for dir in tag_dirs:
+            files = os.listdir(os.path.join(self.out_dir, dir))
+            if len(files) == 1:
+                result.append((dir, files[0]))
+            if len(files) == 0:
+                result.append((dir, "NO FILE AT ALL..."))
+        return sorted(result, key=itemgetter(0))
+
+    def tg_orphan(self, _, update):
+        orphans = self.orphans()
+        if len(orphans) == 0:
+            update.message.reply_text("Good job, no orphan tags!")
+        else:
+            update.message.reply_text("The following tags only contain a single file:\n" +
+                                      ", ".join(map(itemgetter(0), orphans)))
+
+    def tg_orphan_full(self, _, update):
+        orphans = self.orphans()
+        if len(orphans) == 0:
+            update.message.reply_text("Good job, no orphan tags!")
+        else:
+            tmp_reply = "The following tags only contain a single file:\n"
+            for dir, file in orphans:
+                line = "{}: {}\n".format(dir, file)
+                if len(tmp_reply + line) > 4096:
+                    update.message.reply_text(tmp_reply)
+                    tmp_reply = ""
+                tmp_reply += line
+            if len(tmp_reply) > 0:
+                update.message.reply_text(tmp_reply)
+
+    def tg_version(self, _, update):
+        delojza_date = datetime.fromtimestamp(os.path.getmtime(os.path.realpath(__file__))) \
+            .strftime('%Y/%m/%d - %H:%M:%S')
+        update.message.reply_text("delojza modified date: {}\nyoutube-dl version: {}"
+                                  .format(delojza_date, YTDL_VERSION))
+
     def tg_start(self, _, update):
         update.message.reply_text(self.markov.make_sentence() if self.markov else "HELLO")