redirection of specific tags into other folders
This commit is contained in:
parent
c7297f4d27
commit
409ffc4b7e
1 changed files with 44 additions and 30 deletions
74
delojza.py
74
delojza.py
|
@ -87,18 +87,28 @@ class DelojzaDB:
|
||||||
|
|
||||||
|
|
||||||
class DelojzaBot:
|
class DelojzaBot:
|
||||||
def __init__(self, tg_api_key, out_dir, tmp_dir=None, db_path=None, protected_password=None,
|
def __init__(self, tg_api_key, out_dir, redirects=None, tmp_dir=None, db_path=None, protected_password=None,
|
||||||
acoustid_key=None, tumblr_name=None, tumblr_keys=None, markov=None):
|
acoustid_key=None, tumblr_name=None, tumblr_keys=None, markov=None):
|
||||||
self._setup_logging(os.path.dirname(os.path.realpath(__file__)))
|
self._setup_logging(os.path.dirname(os.path.realpath(__file__)))
|
||||||
|
|
||||||
self.db = DelojzaDB(db_path or os.path.join(os.path.dirname(os.path.realpath(__file__)), "delojza.db"))
|
self.db = DelojzaDB(db_path or os.path.join(os.path.dirname(os.path.realpath(__file__)), "delojza.db"))
|
||||||
|
|
||||||
self.out_dir = os.path.abspath(out_dir)
|
self.out_dir = os.path.abspath(out_dir)
|
||||||
|
self.out_dir = self.out_dir[:-1] if self.out_dir[-1] == "/" else self.out_dir
|
||||||
self.logger.debug('OUT_DIR: ' + out_dir)
|
self.logger.debug('OUT_DIR: ' + out_dir)
|
||||||
self.tmp_dir = tmp_dir if tmp_dir else tempfile.gettempdir()
|
self.tmp_dir = tmp_dir if tmp_dir else tempfile.gettempdir()
|
||||||
self.logger.debug('TMP_DIR: ' + tmp_dir)
|
self.logger.debug('TMP_DIR: ' + tmp_dir)
|
||||||
self.markov = markov
|
self.markov = markov
|
||||||
|
|
||||||
|
self.redirects = {}
|
||||||
|
if redirects is not None:
|
||||||
|
for hashtag, directory in redirects:
|
||||||
|
hashtag = hashtag.upper()
|
||||||
|
directory = directory[:-1] if directory[-1] == "/" else directory
|
||||||
|
mkdir_p(directory)
|
||||||
|
self.redirects[hashtag] = directory
|
||||||
|
self.logger.debug(f"Will redirect hashtag {hashtag} to {directory}")
|
||||||
|
|
||||||
self.updater = Updater(tg_api_key)
|
self.updater = Updater(tg_api_key)
|
||||||
dp = self.updater.dispatcher
|
dp = self.updater.dispatcher
|
||||||
|
|
||||||
|
@ -252,8 +262,9 @@ class DelojzaBot:
|
||||||
self.logger.info("Tagging {} w/ {} - {} [{}]...".format(filepath, title, artist, source))
|
self.logger.info("Tagging {} w/ {} - {} [{}]...".format(filepath, title, artist, source))
|
||||||
self._tag_file(filepath, artist, title)
|
self._tag_file(filepath, artist, title)
|
||||||
|
|
||||||
def _get_percent_filled(self):
|
@staticmethod
|
||||||
output = subprocess.check_output(["df", self.out_dir])
|
def _get_percent_filled(directory):
|
||||||
|
output = subprocess.check_output(["df", directory])
|
||||||
percents_re = re.search(r"[0-9]+%", output.decode('utf-8'))
|
percents_re = re.search(r"[0-9]+%", output.decode('utf-8'))
|
||||||
if not percents_re:
|
if not percents_re:
|
||||||
raise RuntimeError
|
raise RuntimeError
|
||||||
|
@ -367,8 +378,8 @@ class DelojzaBot:
|
||||||
info_line = info_line[:64]
|
info_line = info_line[:64]
|
||||||
|
|
||||||
filename = '{}__{}.txt'.format(datestr(message.date), info_line)
|
filename = '{}__{}.txt'.format(datestr(message.date), info_line)
|
||||||
|
out_dir = self.redirects.get(hashtags[0], self.out_dir)
|
||||||
out_path = os.path.join(self.out_dir, *hashtags[1:] or ['TEXT'])
|
out_path = os.path.join(out_dir, *hashtags[1:] or ['TEXT'])
|
||||||
file_path = os.path.join(out_path, filename)
|
file_path = os.path.join(out_path, filename)
|
||||||
|
|
||||||
mkdir_p(out_path)
|
mkdir_p(out_path)
|
||||||
|
@ -382,20 +393,19 @@ class DelojzaBot:
|
||||||
def handle(self, urls, message, hashtags, download_fn, filetitle=None):
|
def handle(self, urls, message, hashtags, download_fn, filetitle=None):
|
||||||
self.db.initialize()
|
self.db.initialize()
|
||||||
|
|
||||||
try:
|
|
||||||
if self._get_percent_filled() >= 98:
|
|
||||||
message.reply("NO! Less than 2% of drive space left :(")
|
|
||||||
return
|
|
||||||
except Exception:
|
|
||||||
message.reply("NO! Couldn't figure out how much space is left???")
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if len(hashtags) == 0:
|
if len(hashtags) == 0:
|
||||||
self.logger.info("Ignoring %s due to no hashtag present..." % urls)
|
self.logger.info("Ignoring %s due to no hashtag present..." % urls)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if any(hashtag in self.db.get_protected_tags() for hashtag in hashtags):
|
original_hashtags = hashtags
|
||||||
|
if hashtags[0] in self.redirects:
|
||||||
|
out_dir = self.redirects[hashtags[0]]
|
||||||
|
hashtags = hashtags[1:]
|
||||||
|
else:
|
||||||
|
out_dir = self.out_dir
|
||||||
|
|
||||||
|
if any(hashtag in self.db.get_protected_tags() for hashtag in original_hashtags):
|
||||||
if message.chat.id not in self.db.get_protected_chats():
|
if message.chat.id not in self.db.get_protected_chats():
|
||||||
self.logger.info("Redirecting {} in chat {} due to protected hashtags: {}..."
|
self.logger.info("Redirecting {} in chat {} due to protected hashtags: {}..."
|
||||||
.format(urls, message.chat.title, hashtags))
|
.format(urls, message.chat.title, hashtags))
|
||||||
|
@ -403,30 +413,30 @@ class DelojzaBot:
|
||||||
|
|
||||||
for i in range(len(hashtags)):
|
for i in range(len(hashtags)):
|
||||||
current_path = hashtags[:i + 1]
|
current_path = hashtags[:i + 1]
|
||||||
if not os.path.isdir(os.path.join(self.out_dir, *current_path)):
|
if not os.path.isdir(os.path.join(out_dir, *current_path)):
|
||||||
test_path = current_path
|
test_path = current_path
|
||||||
test_path[-1] = "_" + test_path[-1]
|
test_path[-1] = "_" + test_path[-1]
|
||||||
if os.path.isdir(os.path.join(self.out_dir, *test_path)):
|
if os.path.isdir(os.path.join(out_dir, *test_path)):
|
||||||
self.logger.debug(f"Rerouting {current_path[-1]} to _{test_path[-1]}")
|
self.logger.debug(f"Rerouting {current_path[-1]} to _{test_path[-1]}")
|
||||||
hashtags[i] = test_path[-1]
|
hashtags[i] = test_path[-1]
|
||||||
|
|
||||||
self.last_hashtags[message.chat.id] = None
|
self.last_hashtags[message.chat.id] = None
|
||||||
|
|
||||||
self.logger.info("Downloading %s under '%s'" % (urls, "/".join(hashtags)))
|
self.logger.info("Downloading %s into '%s' (%s)" % (urls, "/".join(original_hashtags), out_dir))
|
||||||
|
|
||||||
out_path = os.path.join(self.out_dir, *hashtags)
|
out_path = os.path.join(out_dir, *hashtags)
|
||||||
mkdir_p(out_path)
|
mkdir_p(out_path)
|
||||||
|
|
||||||
reply = 'Downloading to "{}"...'.format("/".join(hashtags))
|
reply = 'Downloading to "{}"...'.format("/".join(original_hashtags))
|
||||||
|
|
||||||
audio = any([any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) for hashtag in hashtags])
|
audio = any([any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) for hashtag in original_hashtags])
|
||||||
if audio and download_fn != self.download_raw:
|
if audio and download_fn != self.download_raw:
|
||||||
reply += ' (And also guessing you want to extract the audio)'
|
reply += ' (And also guessing you want to extract the audio)'
|
||||||
message.reply_text(reply)
|
message.reply_text(reply)
|
||||||
|
|
||||||
filenames = download_fn(urls, out_path, message.date, message, audio=audio, filetitle=filetitle)
|
filenames = download_fn(urls, out_path, message.date, message, audio=audio, filetitle=filetitle)
|
||||||
|
|
||||||
cmd_hashtag = hashtags[0]
|
cmd_hashtag = original_hashtags[0]
|
||||||
|
|
||||||
tumblr_ids = []
|
tumblr_ids = []
|
||||||
if cmd_hashtag in ('TUMBLR', 'TUMBLR_NOW') and self.tumblr_client:
|
if cmd_hashtag in ('TUMBLR', 'TUMBLR_NOW') and self.tumblr_client:
|
||||||
|
@ -450,7 +460,7 @@ class DelojzaBot:
|
||||||
self.logger.warning("Did not receive 'id' in tumblr response: \n" + pprint.pformat(response))
|
self.logger.warning("Did not receive 'id' in tumblr response: \n" + pprint.pformat(response))
|
||||||
message.reply_text('Something weird happened with the tumblrs, check it!')
|
message.reply_text('Something weird happened with the tumblrs, check it!')
|
||||||
|
|
||||||
self.last_downloaded[message.chat.id] = filenames, hashtags, tumblr_ids
|
self.last_downloaded[message.chat.id] = filenames, original_hashtags, tumblr_ids
|
||||||
return True
|
return True
|
||||||
except:
|
except:
|
||||||
exc_type, exc_value, __ = sys.exc_info()
|
exc_type, exc_value, __ = sys.exc_info()
|
||||||
|
@ -529,7 +539,7 @@ class DelojzaBot:
|
||||||
def _get_tag_dirs(self):
|
def _get_tag_dirs(self):
|
||||||
return list(filter(lambda x: x.upper() == x,
|
return list(filter(lambda x: x.upper() == x,
|
||||||
filter(lambda directory: os.path.isdir(os.path.join(self.out_dir, directory)),
|
filter(lambda directory: os.path.isdir(os.path.join(self.out_dir, directory)),
|
||||||
os.listdir(self.out_dir))))
|
os.listdir(self.out_dir)))) + list(self.redirects.keys())
|
||||||
|
|
||||||
def tg_stats(self, _, update):
|
def tg_stats(self, _, update):
|
||||||
self._log_msg(update)
|
self._log_msg(update)
|
||||||
|
@ -539,13 +549,14 @@ class DelojzaBot:
|
||||||
return
|
return
|
||||||
tag_dirs = self._get_tag_dirs()
|
tag_dirs = self._get_tag_dirs()
|
||||||
reply = "Total number of tags: {}\n\n".format(len(tag_dirs))
|
reply = "Total number of tags: {}\n\n".format(len(tag_dirs))
|
||||||
counts = [(directory, os.listdir(os.path.join(self.out_dir, directory))) for directory in tag_dirs]
|
counts = [(directory, os.listdir(os.path.join(self.out_dir, directory))) for directory in
|
||||||
|
tag_dirs] # TODO REDIRECTS
|
||||||
counts.sort(key=itemgetter(0))
|
counts.sort(key=itemgetter(0))
|
||||||
counts.sort(key=lambda x: len(x[1]), reverse=True)
|
counts.sort(key=lambda x: len(x[1]), reverse=True)
|
||||||
for directory, files in counts:
|
for directory, files in counts:
|
||||||
if len(files) == 1:
|
if len(files) == 1:
|
||||||
break
|
break
|
||||||
abs_paths = [os.path.join(self.out_dir, directory, file) for file in files]
|
abs_paths = [os.path.join(self.out_dir, directory, file) for file in files] # TODO REDIRECTS
|
||||||
abs_files = list(filter(os.path.isfile, abs_paths))
|
abs_files = list(filter(os.path.isfile, abs_paths))
|
||||||
# mimes = [magic.from_file(path, mime=True).split("/")[0] for path in abs_files]
|
# mimes = [magic.from_file(path, mime=True).split("/")[0] for path in abs_files]
|
||||||
# mime_counts = [(mime, mimes.count(mime)) for mime in set(mimes)]
|
# mime_counts = [(mime, mimes.count(mime)) for mime in set(mimes)]
|
||||||
|
@ -611,6 +622,7 @@ class DelojzaBot:
|
||||||
self._log_msg(update)
|
self._log_msg(update)
|
||||||
if self.last_downloaded.get(update.message.chat.id) is not None:
|
if self.last_downloaded.get(update.message.chat.id) is not None:
|
||||||
files, hashtags, tumblr_ids = self.last_downloaded[update.message.chat.id]
|
files, hashtags, tumblr_ids = self.last_downloaded[update.message.chat.id]
|
||||||
|
out_dir = self.redirects.get(hashtags[0], self.out_dir)
|
||||||
mp3s = [filename for filename in files if filename.endswith("mp3")]
|
mp3s = [filename for filename in files if filename.endswith("mp3")]
|
||||||
if len(mp3s) > 0:
|
if len(mp3s) > 0:
|
||||||
arg_raw = re.sub(r'^/[@\w]+ ?', '', update.message.text).strip()
|
arg_raw = re.sub(r'^/[@\w]+ ?', '', update.message.text).strip()
|
||||||
|
@ -632,7 +644,7 @@ class DelojzaBot:
|
||||||
|
|
||||||
self._tag_file(mp3, artist, title)
|
self._tag_file(mp3, artist, title)
|
||||||
update.message.reply_text("Tagging \"{}\" as \"{}\" by \"{}\"!"
|
update.message.reply_text("Tagging \"{}\" as \"{}\" by \"{}\"!"
|
||||||
.format(mp3[len(self.out_dir) + 1:], title, artist))
|
.format(mp3[len(out_dir) + 1:], title, artist))
|
||||||
else:
|
else:
|
||||||
update.message.reply_text((self.markov.make_sentence() if self.markov and random() > .7 else "") + "???")
|
update.message.reply_text((self.markov.make_sentence() if self.markov and random() > .7 else "") + "???")
|
||||||
|
|
||||||
|
@ -640,18 +652,19 @@ class DelojzaBot:
|
||||||
self._log_msg(update)
|
self._log_msg(update)
|
||||||
if self.last_downloaded.get(update.message.chat.id) is not None:
|
if self.last_downloaded.get(update.message.chat.id) is not None:
|
||||||
files, hashtags, tumblr_ids = self.last_downloaded[update.message.chat.id]
|
files, hashtags, tumblr_ids = self.last_downloaded[update.message.chat.id]
|
||||||
|
out_dir = self.redirects.get(hashtags[0], self.out_dir)
|
||||||
for file in files:
|
for file in files:
|
||||||
update.message.reply_text("Removing \"{}\"!".format(file[len(self.out_dir) + 1:]))
|
update.message.reply_text("Removing \"{}\"!".format(file[len(out_dir) + 1:]))
|
||||||
os.remove(file)
|
os.remove(file)
|
||||||
parent_dir = os.path.dirname(file)
|
parent_dir = os.path.dirname(file)
|
||||||
while True:
|
while True:
|
||||||
if len(os.listdir(parent_dir)) == 0:
|
if len(os.listdir(parent_dir)) == 0:
|
||||||
update.message.reply_text("Removing directory \"{}\" as it's empty..."
|
update.message.reply_text("Removing directory \"{}\" as it's empty..."
|
||||||
.format(parent_dir[len(self.out_dir) + 1:]))
|
.format(parent_dir[len(out_dir) + 1:]))
|
||||||
os.rmdir(parent_dir)
|
os.rmdir(parent_dir)
|
||||||
parent_dir = os.path.dirname(parent_dir)
|
if parent_dir == out_dir:
|
||||||
if parent_dir == self.out_dir:
|
|
||||||
break
|
break
|
||||||
|
parent_dir = os.path.dirname(parent_dir)
|
||||||
if len(tumblr_ids) > 0:
|
if len(tumblr_ids) > 0:
|
||||||
plural = "s (all {} of them)".format(len(tumblr_ids)) if len(tumblr_ids) > 1 else ""
|
plural = "s (all {} of them)".format(len(tumblr_ids)) if len(tumblr_ids) > 1 else ""
|
||||||
update.message.reply_text("Also deleting tumblr post{}!".format(plural))
|
update.message.reply_text("Also deleting tumblr post{}!".format(plural))
|
||||||
|
@ -786,6 +799,7 @@ if __name__ == '__main__':
|
||||||
delojza = DelojzaBot(config.get('delojza', 'tg_api_key'),
|
delojza = DelojzaBot(config.get('delojza', 'tg_api_key'),
|
||||||
config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")),
|
config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")),
|
||||||
tmp_dir=config.get('delojza', 'tmp_dir', fallback=tempfile.gettempdir()),
|
tmp_dir=config.get('delojza', 'tmp_dir', fallback=tempfile.gettempdir()),
|
||||||
|
redirects=config.items('redirects'),
|
||||||
protected_password=config.get('delojza', 'protected_password', fallback=None),
|
protected_password=config.get('delojza', 'protected_password', fallback=None),
|
||||||
acoustid_key=config.get('delojza', 'acoustid_api_key', fallback=None),
|
acoustid_key=config.get('delojza', 'acoustid_api_key', fallback=None),
|
||||||
tumblr_name=config.get('tumblr', 'blog_name', fallback=None),
|
tumblr_name=config.get('tumblr', 'blog_name', fallback=None),
|
||||||
|
|
Loading…
Reference in a new issue