redirection of specific tags into other folders

neu
Tomáš Mládek 2019-11-07 16:24:30 +01:00
parent c7297f4d27
commit 409ffc4b7e
1 changed files with 44 additions and 30 deletions

View File

@ -87,18 +87,28 @@ class DelojzaDB:
class DelojzaBot:
def __init__(self, tg_api_key, out_dir, tmp_dir=None, db_path=None, protected_password=None,
def __init__(self, tg_api_key, out_dir, redirects=None, tmp_dir=None, db_path=None, protected_password=None,
acoustid_key=None, tumblr_name=None, tumblr_keys=None, markov=None):
self._setup_logging(os.path.dirname(os.path.realpath(__file__)))
self.db = DelojzaDB(db_path or os.path.join(os.path.dirname(os.path.realpath(__file__)), "delojza.db"))
self.out_dir = os.path.abspath(out_dir)
self.out_dir = self.out_dir[:-1] if self.out_dir[-1] == "/" else self.out_dir
self.logger.debug('OUT_DIR: ' + out_dir)
self.tmp_dir = tmp_dir if tmp_dir else tempfile.gettempdir()
self.logger.debug('TMP_DIR: ' + tmp_dir)
self.markov = markov
self.redirects = {}
if redirects is not None:
for hashtag, directory in redirects:
hashtag = hashtag.upper()
directory = directory[:-1] if directory[-1] == "/" else directory
mkdir_p(directory)
self.redirects[hashtag] = directory
self.logger.debug(f"Will redirect hashtag {hashtag} to {directory}")
self.updater = Updater(tg_api_key)
dp = self.updater.dispatcher
@ -252,8 +262,9 @@ class DelojzaBot:
self.logger.info("Tagging {} w/ {} - {} [{}]...".format(filepath, title, artist, source))
self._tag_file(filepath, artist, title)
def _get_percent_filled(self):
output = subprocess.check_output(["df", self.out_dir])
@staticmethod
def _get_percent_filled(directory):
output = subprocess.check_output(["df", directory])
percents_re = re.search(r"[0-9]+%", output.decode('utf-8'))
if not percents_re:
raise RuntimeError
@ -367,8 +378,8 @@ class DelojzaBot:
info_line = info_line[:64]
filename = '{}__{}.txt'.format(datestr(message.date), info_line)
out_path = os.path.join(self.out_dir, *hashtags[1:] or ['TEXT'])
out_dir = self.redirects.get(hashtags[0], self.out_dir)
out_path = os.path.join(out_dir, *hashtags[1:] or ['TEXT'])
file_path = os.path.join(out_path, filename)
mkdir_p(out_path)
@ -382,20 +393,19 @@ class DelojzaBot:
def handle(self, urls, message, hashtags, download_fn, filetitle=None):
self.db.initialize()
try:
if self._get_percent_filled() >= 98:
message.reply("NO! Less than 2% of drive space left :(")
return
except Exception:
message.reply("NO! Couldn't figure out how much space is left???")
return
try:
if len(hashtags) == 0:
self.logger.info("Ignoring %s due to no hashtag present..." % urls)
return False
if any(hashtag in self.db.get_protected_tags() for hashtag in hashtags):
original_hashtags = hashtags
if hashtags[0] in self.redirects:
out_dir = self.redirects[hashtags[0]]
hashtags = hashtags[1:]
else:
out_dir = self.out_dir
if any(hashtag in self.db.get_protected_tags() for hashtag in original_hashtags):
if message.chat.id not in self.db.get_protected_chats():
self.logger.info("Redirecting {} in chat {} due to protected hashtags: {}..."
.format(urls, message.chat.title, hashtags))
@ -403,30 +413,30 @@ class DelojzaBot:
for i in range(len(hashtags)):
current_path = hashtags[:i + 1]
if not os.path.isdir(os.path.join(self.out_dir, *current_path)):
if not os.path.isdir(os.path.join(out_dir, *current_path)):
test_path = current_path
test_path[-1] = "_" + test_path[-1]
if os.path.isdir(os.path.join(self.out_dir, *test_path)):
if os.path.isdir(os.path.join(out_dir, *test_path)):
self.logger.debug(f"Rerouting {current_path[-1]} to _{test_path[-1]}")
hashtags[i] = test_path[-1]
self.last_hashtags[message.chat.id] = None
self.logger.info("Downloading %s under '%s'" % (urls, "/".join(hashtags)))
self.logger.info("Downloading %s into '%s' (%s)" % (urls, "/".join(original_hashtags), out_dir))
out_path = os.path.join(self.out_dir, *hashtags)
out_path = os.path.join(out_dir, *hashtags)
mkdir_p(out_path)
reply = 'Downloading to "{}"...'.format("/".join(hashtags))
reply = 'Downloading to "{}"...'.format("/".join(original_hashtags))
audio = any([any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) for hashtag in hashtags])
audio = any([any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) for hashtag in original_hashtags])
if audio and download_fn != self.download_raw:
reply += ' (And also guessing you want to extract the audio)'
message.reply_text(reply)
filenames = download_fn(urls, out_path, message.date, message, audio=audio, filetitle=filetitle)
cmd_hashtag = hashtags[0]
cmd_hashtag = original_hashtags[0]
tumblr_ids = []
if cmd_hashtag in ('TUMBLR', 'TUMBLR_NOW') and self.tumblr_client:
@ -450,7 +460,7 @@ class DelojzaBot:
self.logger.warning("Did not receive 'id' in tumblr response: \n" + pprint.pformat(response))
message.reply_text('Something weird happened with the tumblrs, check it!')
self.last_downloaded[message.chat.id] = filenames, hashtags, tumblr_ids
self.last_downloaded[message.chat.id] = filenames, original_hashtags, tumblr_ids
return True
except:
exc_type, exc_value, __ = sys.exc_info()
@ -529,7 +539,7 @@ class DelojzaBot:
def _get_tag_dirs(self):
return list(filter(lambda x: x.upper() == x,
filter(lambda directory: os.path.isdir(os.path.join(self.out_dir, directory)),
os.listdir(self.out_dir))))
os.listdir(self.out_dir)))) + list(self.redirects.keys())
def tg_stats(self, _, update):
self._log_msg(update)
@ -539,13 +549,14 @@ class DelojzaBot:
return
tag_dirs = self._get_tag_dirs()
reply = "Total number of tags: {}\n\n".format(len(tag_dirs))
counts = [(directory, os.listdir(os.path.join(self.out_dir, directory))) for directory in tag_dirs]
counts = [(directory, os.listdir(os.path.join(self.out_dir, directory))) for directory in
tag_dirs] # TODO REDIRECTS
counts.sort(key=itemgetter(0))
counts.sort(key=lambda x: len(x[1]), reverse=True)
for directory, files in counts:
if len(files) == 1:
break
abs_paths = [os.path.join(self.out_dir, directory, file) for file in files]
abs_paths = [os.path.join(self.out_dir, directory, file) for file in files] # TODO REDIRECTS
abs_files = list(filter(os.path.isfile, abs_paths))
# mimes = [magic.from_file(path, mime=True).split("/")[0] for path in abs_files]
# mime_counts = [(mime, mimes.count(mime)) for mime in set(mimes)]
@ -611,6 +622,7 @@ class DelojzaBot:
self._log_msg(update)
if self.last_downloaded.get(update.message.chat.id) is not None:
files, hashtags, tumblr_ids = self.last_downloaded[update.message.chat.id]
out_dir = self.redirects.get(hashtags[0], self.out_dir)
mp3s = [filename for filename in files if filename.endswith("mp3")]
if len(mp3s) > 0:
arg_raw = re.sub(r'^/[@\w]+ ?', '', update.message.text).strip()
@ -632,7 +644,7 @@ class DelojzaBot:
self._tag_file(mp3, artist, title)
update.message.reply_text("Tagging \"{}\" as \"{}\" by \"{}\"!"
.format(mp3[len(self.out_dir) + 1:], title, artist))
.format(mp3[len(out_dir) + 1:], title, artist))
else:
update.message.reply_text((self.markov.make_sentence() if self.markov and random() > .7 else "") + "???")
@ -640,18 +652,19 @@ class DelojzaBot:
self._log_msg(update)
if self.last_downloaded.get(update.message.chat.id) is not None:
files, hashtags, tumblr_ids = self.last_downloaded[update.message.chat.id]
out_dir = self.redirects.get(hashtags[0], self.out_dir)
for file in files:
update.message.reply_text("Removing \"{}\"!".format(file[len(self.out_dir) + 1:]))
update.message.reply_text("Removing \"{}\"!".format(file[len(out_dir) + 1:]))
os.remove(file)
parent_dir = os.path.dirname(file)
while True:
if len(os.listdir(parent_dir)) == 0:
update.message.reply_text("Removing directory \"{}\" as it's empty..."
.format(parent_dir[len(self.out_dir) + 1:]))
.format(parent_dir[len(out_dir) + 1:]))
os.rmdir(parent_dir)
parent_dir = os.path.dirname(parent_dir)
if parent_dir == self.out_dir:
if parent_dir == out_dir:
break
parent_dir = os.path.dirname(parent_dir)
if len(tumblr_ids) > 0:
plural = "s (all {} of them)".format(len(tumblr_ids)) if len(tumblr_ids) > 1 else ""
update.message.reply_text("Also deleting tumblr post{}!".format(plural))
@ -786,6 +799,7 @@ if __name__ == '__main__':
delojza = DelojzaBot(config.get('delojza', 'tg_api_key'),
config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")),
tmp_dir=config.get('delojza', 'tmp_dir', fallback=tempfile.gettempdir()),
redirects=config.items('redirects'),
protected_password=config.get('delojza', 'protected_password', fallback=None),
acoustid_key=config.get('delojza', 'acoustid_api_key', fallback=None),
tumblr_name=config.get('tumblr', 'blog_name', fallback=None),