add /stats, /orphans, /version; fix order of processing bug

This commit is contained in:
Tomáš Mládek 2019-05-01 10:54:43 +02:00 committed by Tomáš Mládek
parent 2f658ed9ab
commit 671c306ad1

View file

@ -7,7 +7,9 @@ import re
import shutil import shutil
import sys import sys
from configparser import ConfigParser from configparser import ConfigParser
from datetime import datetime
from glob import glob from glob import glob
from operator import itemgetter
import filetype import filetype
import markovify import markovify
@ -18,6 +20,7 @@ import telegram
import youtube_dl import youtube_dl
from telegram import MessageEntity from telegram import MessageEntity
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters from telegram.ext import Updater, CommandHandler, MessageHandler, Filters
from youtube_dl.version import __version__ as YTDL_VERSION
def mkdir_p(path): def mkdir_p(path):
@ -49,13 +52,17 @@ class DelojzaBot:
dp.add_handler(CommandHandler("start", self.tg_start)) dp.add_handler(CommandHandler("start", self.tg_start))
dp.add_error_handler(self.tg_error) dp.add_error_handler(self.tg_error)
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), self.tg_handle_hashtag))
dp.add_handler(MessageHandler(Filters.text, self.tg_handle_text))
self.tg_url_handler = MessageHandler(Filters.entity(MessageEntity.URL), self.tg_handle_url) self.tg_url_handler = MessageHandler(Filters.entity(MessageEntity.URL), self.tg_handle_url)
dp.add_handler(self.tg_url_handler) dp.add_handler(self.tg_url_handler)
self.tg_rest_handler = MessageHandler(Filters.photo | Filters.video | Filters.video_note | self.tg_rest_handler = MessageHandler(Filters.photo | Filters.video | Filters.video_note |
Filters.audio | Filters.voice | Filters.document, self.tg_handle_rest) Filters.audio | Filters.voice | Filters.document, self.tg_handle_rest)
dp.add_handler(self.tg_rest_handler) dp.add_handler(self.tg_rest_handler)
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), self.tg_handle_hashtag))
dp.add_handler(MessageHandler(Filters.text, self.tg_handle_text))
dp.add_handler(CommandHandler("stats", self.tg_stats))
dp.add_handler(CommandHandler("orphans", self.tg_orphan))
dp.add_handler(CommandHandler("orphans_full", self.tg_orphan_full))
dp.add_handler(CommandHandler("version", self.tg_version))
if tumblr_keys: if tumblr_keys:
self.client = pytumblr.TumblrRestClient(*tumblr_keys) self.client = pytumblr.TumblrRestClient(*tumblr_keys)
@ -243,6 +250,79 @@ class DelojzaBot:
if self.markov: if self.markov:
self.markov.add_to_corpus(update.message.text) self.markov.add_to_corpus(update.message.text)
def tag_dirs(self):
return list(filter(lambda x: x.upper() == x,
filter(lambda dir: os.path.isdir(os.path.join(self.out_dir, dir)),
os.listdir(self.out_dir))))
def tg_stats(self, _, update):
tag_dirs = self.tag_dirs()
reply = "Total number of tags: {}\n\n".format(len(tag_dirs))
counts = [(dir, os.listdir(os.path.join(self.out_dir, dir))) for dir in tag_dirs]
counts.sort(key=itemgetter(0))
counts.sort(key=lambda x: len(x[1]), reverse=True)
for dir, files in counts:
if len(files) == 1:
break
abs_paths = [os.path.join(self.out_dir, dir, file) for file in files]
abs_files = list(filter(os.path.isfile, abs_paths))
audio_cnt = len([match for match in map(filetype.audio, abs_files) if match is not None])
video_cnt = len([match for match in map(filetype.video, abs_files) if match is not None])
image_cnt = len([match for match in map(filetype.image, abs_files) if match is not None])
rest_cnt = len(files) - audio_cnt - video_cnt - image_cnt
dir_cnt = len(abs_paths) - len(abs_files)
details = ", ".join(["{} {}".format(cnt, desc) for cnt, desc in
[(image_cnt, "images"), (video_cnt, "videos"), (audio_cnt, "audios"),
(rest_cnt, "unknown files"), (dir_cnt, "directories")] if cnt > 0])
if any([len(abs_paths) == cnt for cnt in [audio_cnt, video_cnt, image_cnt, rest_cnt, dir_cnt]]):
reply += "<b>{}:</b> {}\n".format(dir, details)
else:
reply += "<b>{}:</b> {} files ({})\n".format(dir, len(files), details)
orphans = list(filter(lambda cnt: len(cnt[1]) <= 1, counts))
if len(orphans) > 0:
reply += "\nFollowing tags are orphans: " + ", ".join(map(itemgetter(0), orphans))
update.message.reply_text(reply, parse_mode=telegram.ParseMode.HTML)
def orphans(self):
result = []
tag_dirs = self.tag_dirs()
for dir in tag_dirs:
files = os.listdir(os.path.join(self.out_dir, dir))
if len(files) == 1:
result.append((dir, files[0]))
if len(files) == 0:
result.append((dir, "NO FILE AT ALL..."))
return sorted(result, key=itemgetter(0))
def tg_orphan(self, _, update):
orphans = self.orphans()
if len(orphans) == 0:
update.message.reply_text("Good job, no orphan tags!")
else:
update.message.reply_text("The following tags only contain a single file:\n" +
", ".join(map(itemgetter(0), orphans)))
def tg_orphan_full(self, _, update):
orphans = self.orphans()
if len(orphans) == 0:
update.message.reply_text("Good job, no orphan tags!")
else:
tmp_reply = "The following tags only contain a single file:\n"
for dir, file in orphans:
line = "{}: {}\n".format(dir, file)
if len(tmp_reply + line) > 4096:
update.message.reply_text(tmp_reply)
tmp_reply = ""
tmp_reply += line
if len(tmp_reply) > 0:
update.message.reply_text(tmp_reply)
def tg_version(self, _, update):
delojza_date = datetime.fromtimestamp(os.path.getmtime(os.path.realpath(__file__))) \
.strftime('%Y/%m/%d - %H:%M:%S')
update.message.reply_text("delojza modified date: {}\nyoutube-dl version: {}"
.format(delojza_date, YTDL_VERSION))
def tg_start(self, _, update): def tg_start(self, _, update):
update.message.reply_text(self.markov.make_sentence() if self.markov else "HELLO") update.message.reply_text(self.markov.make_sentence() if self.markov else "HELLO")