From 671c306ad15b34efc05067c1f16d8ecae0fedeaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Ml=C3=A1dek?= Date: Wed, 1 May 2019 10:54:43 +0200 Subject: [PATCH] add /stats, /orphans, /version; fix order of processing bug --- delojza.py | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 2 deletions(-) diff --git a/delojza.py b/delojza.py index d566072..b7cf256 100755 --- a/delojza.py +++ b/delojza.py @@ -7,7 +7,9 @@ import re import shutil import sys from configparser import ConfigParser +from datetime import datetime from glob import glob +from operator import itemgetter import filetype import markovify @@ -18,6 +20,7 @@ import telegram import youtube_dl from telegram import MessageEntity from telegram.ext import Updater, CommandHandler, MessageHandler, Filters +from youtube_dl.version import __version__ as YTDL_VERSION def mkdir_p(path): @@ -49,13 +52,17 @@ class DelojzaBot: dp.add_handler(CommandHandler("start", self.tg_start)) dp.add_error_handler(self.tg_error) - dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), self.tg_handle_hashtag)) - dp.add_handler(MessageHandler(Filters.text, self.tg_handle_text)) self.tg_url_handler = MessageHandler(Filters.entity(MessageEntity.URL), self.tg_handle_url) dp.add_handler(self.tg_url_handler) self.tg_rest_handler = MessageHandler(Filters.photo | Filters.video | Filters.video_note | Filters.audio | Filters.voice | Filters.document, self.tg_handle_rest) dp.add_handler(self.tg_rest_handler) + dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), self.tg_handle_hashtag)) + dp.add_handler(MessageHandler(Filters.text, self.tg_handle_text)) + dp.add_handler(CommandHandler("stats", self.tg_stats)) + dp.add_handler(CommandHandler("orphans", self.tg_orphan)) + dp.add_handler(CommandHandler("orphans_full", self.tg_orphan_full)) + dp.add_handler(CommandHandler("version", self.tg_version)) if tumblr_keys: self.client = pytumblr.TumblrRestClient(*tumblr_keys) @@ -243,6 +250,79 @@ class DelojzaBot: if self.markov: self.markov.add_to_corpus(update.message.text) + def tag_dirs(self): + return list(filter(lambda x: x.upper() == x, + filter(lambda dir: os.path.isdir(os.path.join(self.out_dir, dir)), + os.listdir(self.out_dir)))) + + def tg_stats(self, _, update): + tag_dirs = self.tag_dirs() + reply = "Total number of tags: {}\n\n".format(len(tag_dirs)) + counts = [(dir, os.listdir(os.path.join(self.out_dir, dir))) for dir in tag_dirs] + counts.sort(key=itemgetter(0)) + counts.sort(key=lambda x: len(x[1]), reverse=True) + for dir, files in counts: + if len(files) == 1: + break + abs_paths = [os.path.join(self.out_dir, dir, file) for file in files] + abs_files = list(filter(os.path.isfile, abs_paths)) + audio_cnt = len([match for match in map(filetype.audio, abs_files) if match is not None]) + video_cnt = len([match for match in map(filetype.video, abs_files) if match is not None]) + image_cnt = len([match for match in map(filetype.image, abs_files) if match is not None]) + rest_cnt = len(files) - audio_cnt - video_cnt - image_cnt + dir_cnt = len(abs_paths) - len(abs_files) + details = ", ".join(["{} {}".format(cnt, desc) for cnt, desc in + [(image_cnt, "images"), (video_cnt, "videos"), (audio_cnt, "audios"), + (rest_cnt, "unknown files"), (dir_cnt, "directories")] if cnt > 0]) + if any([len(abs_paths) == cnt for cnt in [audio_cnt, video_cnt, image_cnt, rest_cnt, dir_cnt]]): + reply += "{}: {}\n".format(dir, details) + else: + reply += "{}: {} files ({})\n".format(dir, len(files), details) + orphans = list(filter(lambda cnt: len(cnt[1]) <= 1, counts)) + if len(orphans) > 0: + reply += "\nFollowing tags are orphans: " + ", ".join(map(itemgetter(0), orphans)) + update.message.reply_text(reply, parse_mode=telegram.ParseMode.HTML) + + def orphans(self): + result = [] + tag_dirs = self.tag_dirs() + for dir in tag_dirs: + files = os.listdir(os.path.join(self.out_dir, dir)) + if len(files) == 1: + result.append((dir, files[0])) + if len(files) == 0: + result.append((dir, "NO FILE AT ALL...")) + return sorted(result, key=itemgetter(0)) + + def tg_orphan(self, _, update): + orphans = self.orphans() + if len(orphans) == 0: + update.message.reply_text("Good job, no orphan tags!") + else: + update.message.reply_text("The following tags only contain a single file:\n" + + ", ".join(map(itemgetter(0), orphans))) + + def tg_orphan_full(self, _, update): + orphans = self.orphans() + if len(orphans) == 0: + update.message.reply_text("Good job, no orphan tags!") + else: + tmp_reply = "The following tags only contain a single file:\n" + for dir, file in orphans: + line = "{}: {}\n".format(dir, file) + if len(tmp_reply + line) > 4096: + update.message.reply_text(tmp_reply) + tmp_reply = "" + tmp_reply += line + if len(tmp_reply) > 0: + update.message.reply_text(tmp_reply) + + def tg_version(self, _, update): + delojza_date = datetime.fromtimestamp(os.path.getmtime(os.path.realpath(__file__))) \ + .strftime('%Y/%m/%d - %H:%M:%S') + update.message.reply_text("delojza modified date: {}\nyoutube-dl version: {}" + .format(delojza_date, YTDL_VERSION)) + def tg_start(self, _, update): update.message.reply_text(self.markov.make_sentence() if self.markov else "HELLO")