redirection of specific tags into other folders
This commit is contained in:
parent
c7297f4d27
commit
409ffc4b7e
1 changed files with 44 additions and 30 deletions
74
delojza.py
74
delojza.py
|
@ -87,18 +87,28 @@ class DelojzaDB:
|
|||
|
||||
|
||||
class DelojzaBot:
|
||||
def __init__(self, tg_api_key, out_dir, tmp_dir=None, db_path=None, protected_password=None,
|
||||
def __init__(self, tg_api_key, out_dir, redirects=None, tmp_dir=None, db_path=None, protected_password=None,
|
||||
acoustid_key=None, tumblr_name=None, tumblr_keys=None, markov=None):
|
||||
self._setup_logging(os.path.dirname(os.path.realpath(__file__)))
|
||||
|
||||
self.db = DelojzaDB(db_path or os.path.join(os.path.dirname(os.path.realpath(__file__)), "delojza.db"))
|
||||
|
||||
self.out_dir = os.path.abspath(out_dir)
|
||||
self.out_dir = self.out_dir[:-1] if self.out_dir[-1] == "/" else self.out_dir
|
||||
self.logger.debug('OUT_DIR: ' + out_dir)
|
||||
self.tmp_dir = tmp_dir if tmp_dir else tempfile.gettempdir()
|
||||
self.logger.debug('TMP_DIR: ' + tmp_dir)
|
||||
self.markov = markov
|
||||
|
||||
self.redirects = {}
|
||||
if redirects is not None:
|
||||
for hashtag, directory in redirects:
|
||||
hashtag = hashtag.upper()
|
||||
directory = directory[:-1] if directory[-1] == "/" else directory
|
||||
mkdir_p(directory)
|
||||
self.redirects[hashtag] = directory
|
||||
self.logger.debug(f"Will redirect hashtag {hashtag} to {directory}")
|
||||
|
||||
self.updater = Updater(tg_api_key)
|
||||
dp = self.updater.dispatcher
|
||||
|
||||
|
@ -252,8 +262,9 @@ class DelojzaBot:
|
|||
self.logger.info("Tagging {} w/ {} - {} [{}]...".format(filepath, title, artist, source))
|
||||
self._tag_file(filepath, artist, title)
|
||||
|
||||
def _get_percent_filled(self):
|
||||
output = subprocess.check_output(["df", self.out_dir])
|
||||
@staticmethod
|
||||
def _get_percent_filled(directory):
|
||||
output = subprocess.check_output(["df", directory])
|
||||
percents_re = re.search(r"[0-9]+%", output.decode('utf-8'))
|
||||
if not percents_re:
|
||||
raise RuntimeError
|
||||
|
@ -367,8 +378,8 @@ class DelojzaBot:
|
|||
info_line = info_line[:64]
|
||||
|
||||
filename = '{}__{}.txt'.format(datestr(message.date), info_line)
|
||||
|
||||
out_path = os.path.join(self.out_dir, *hashtags[1:] or ['TEXT'])
|
||||
out_dir = self.redirects.get(hashtags[0], self.out_dir)
|
||||
out_path = os.path.join(out_dir, *hashtags[1:] or ['TEXT'])
|
||||
file_path = os.path.join(out_path, filename)
|
||||
|
||||
mkdir_p(out_path)
|
||||
|
@ -382,20 +393,19 @@ class DelojzaBot:
|
|||
def handle(self, urls, message, hashtags, download_fn, filetitle=None):
|
||||
self.db.initialize()
|
||||
|
||||
try:
|
||||
if self._get_percent_filled() >= 98:
|
||||
message.reply("NO! Less than 2% of drive space left :(")
|
||||
return
|
||||
except Exception:
|
||||
message.reply("NO! Couldn't figure out how much space is left???")
|
||||
return
|
||||
|
||||
try:
|
||||
if len(hashtags) == 0:
|
||||
self.logger.info("Ignoring %s due to no hashtag present..." % urls)
|
||||
return False
|
||||
|
||||
if any(hashtag in self.db.get_protected_tags() for hashtag in hashtags):
|
||||
original_hashtags = hashtags
|
||||
if hashtags[0] in self.redirects:
|
||||
out_dir = self.redirects[hashtags[0]]
|
||||
hashtags = hashtags[1:]
|
||||
else:
|
||||
out_dir = self.out_dir
|
||||
|
||||
if any(hashtag in self.db.get_protected_tags() for hashtag in original_hashtags):
|
||||
if message.chat.id not in self.db.get_protected_chats():
|
||||
self.logger.info("Redirecting {} in chat {} due to protected hashtags: {}..."
|
||||
.format(urls, message.chat.title, hashtags))
|
||||
|
@ -403,30 +413,30 @@ class DelojzaBot:
|
|||
|
||||
for i in range(len(hashtags)):
|
||||
current_path = hashtags[:i + 1]
|
||||
if not os.path.isdir(os.path.join(self.out_dir, *current_path)):
|
||||
if not os.path.isdir(os.path.join(out_dir, *current_path)):
|
||||
test_path = current_path
|
||||
test_path[-1] = "_" + test_path[-1]
|
||||
if os.path.isdir(os.path.join(self.out_dir, *test_path)):
|
||||
if os.path.isdir(os.path.join(out_dir, *test_path)):
|
||||
self.logger.debug(f"Rerouting {current_path[-1]} to _{test_path[-1]}")
|
||||
hashtags[i] = test_path[-1]
|
||||
|
||||
self.last_hashtags[message.chat.id] = None
|
||||
|
||||
self.logger.info("Downloading %s under '%s'" % (urls, "/".join(hashtags)))
|
||||
self.logger.info("Downloading %s into '%s' (%s)" % (urls, "/".join(original_hashtags), out_dir))
|
||||
|
||||
out_path = os.path.join(self.out_dir, *hashtags)
|
||||
out_path = os.path.join(out_dir, *hashtags)
|
||||
mkdir_p(out_path)
|
||||
|
||||
reply = 'Downloading to "{}"...'.format("/".join(hashtags))
|
||||
reply = 'Downloading to "{}"...'.format("/".join(original_hashtags))
|
||||
|
||||
audio = any([any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) for hashtag in hashtags])
|
||||
audio = any([any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) for hashtag in original_hashtags])
|
||||
if audio and download_fn != self.download_raw:
|
||||
reply += ' (And also guessing you want to extract the audio)'
|
||||
message.reply_text(reply)
|
||||
|
||||
filenames = download_fn(urls, out_path, message.date, message, audio=audio, filetitle=filetitle)
|
||||
|
||||
cmd_hashtag = hashtags[0]
|
||||
cmd_hashtag = original_hashtags[0]
|
||||
|
||||
tumblr_ids = []
|
||||
if cmd_hashtag in ('TUMBLR', 'TUMBLR_NOW') and self.tumblr_client:
|
||||
|
@ -450,7 +460,7 @@ class DelojzaBot:
|
|||
self.logger.warning("Did not receive 'id' in tumblr response: \n" + pprint.pformat(response))
|
||||
message.reply_text('Something weird happened with the tumblrs, check it!')
|
||||
|
||||
self.last_downloaded[message.chat.id] = filenames, hashtags, tumblr_ids
|
||||
self.last_downloaded[message.chat.id] = filenames, original_hashtags, tumblr_ids
|
||||
return True
|
||||
except:
|
||||
exc_type, exc_value, __ = sys.exc_info()
|
||||
|
@ -529,7 +539,7 @@ class DelojzaBot:
|
|||
def _get_tag_dirs(self):
|
||||
return list(filter(lambda x: x.upper() == x,
|
||||
filter(lambda directory: os.path.isdir(os.path.join(self.out_dir, directory)),
|
||||
os.listdir(self.out_dir))))
|
||||
os.listdir(self.out_dir)))) + list(self.redirects.keys())
|
||||
|
||||
def tg_stats(self, _, update):
|
||||
self._log_msg(update)
|
||||
|
@ -539,13 +549,14 @@ class DelojzaBot:
|
|||
return
|
||||
tag_dirs = self._get_tag_dirs()
|
||||
reply = "Total number of tags: {}\n\n".format(len(tag_dirs))
|
||||
counts = [(directory, os.listdir(os.path.join(self.out_dir, directory))) for directory in tag_dirs]
|
||||
counts = [(directory, os.listdir(os.path.join(self.out_dir, directory))) for directory in
|
||||
tag_dirs] # TODO REDIRECTS
|
||||
counts.sort(key=itemgetter(0))
|
||||
counts.sort(key=lambda x: len(x[1]), reverse=True)
|
||||
for directory, files in counts:
|
||||
if len(files) == 1:
|
||||
break
|
||||
abs_paths = [os.path.join(self.out_dir, directory, file) for file in files]
|
||||
abs_paths = [os.path.join(self.out_dir, directory, file) for file in files] # TODO REDIRECTS
|
||||
abs_files = list(filter(os.path.isfile, abs_paths))
|
||||
# mimes = [magic.from_file(path, mime=True).split("/")[0] for path in abs_files]
|
||||
# mime_counts = [(mime, mimes.count(mime)) for mime in set(mimes)]
|
||||
|
@ -611,6 +622,7 @@ class DelojzaBot:
|
|||
self._log_msg(update)
|
||||
if self.last_downloaded.get(update.message.chat.id) is not None:
|
||||
files, hashtags, tumblr_ids = self.last_downloaded[update.message.chat.id]
|
||||
out_dir = self.redirects.get(hashtags[0], self.out_dir)
|
||||
mp3s = [filename for filename in files if filename.endswith("mp3")]
|
||||
if len(mp3s) > 0:
|
||||
arg_raw = re.sub(r'^/[@\w]+ ?', '', update.message.text).strip()
|
||||
|
@ -632,7 +644,7 @@ class DelojzaBot:
|
|||
|
||||
self._tag_file(mp3, artist, title)
|
||||
update.message.reply_text("Tagging \"{}\" as \"{}\" by \"{}\"!"
|
||||
.format(mp3[len(self.out_dir) + 1:], title, artist))
|
||||
.format(mp3[len(out_dir) + 1:], title, artist))
|
||||
else:
|
||||
update.message.reply_text((self.markov.make_sentence() if self.markov and random() > .7 else "") + "???")
|
||||
|
||||
|
@ -640,18 +652,19 @@ class DelojzaBot:
|
|||
self._log_msg(update)
|
||||
if self.last_downloaded.get(update.message.chat.id) is not None:
|
||||
files, hashtags, tumblr_ids = self.last_downloaded[update.message.chat.id]
|
||||
out_dir = self.redirects.get(hashtags[0], self.out_dir)
|
||||
for file in files:
|
||||
update.message.reply_text("Removing \"{}\"!".format(file[len(self.out_dir) + 1:]))
|
||||
update.message.reply_text("Removing \"{}\"!".format(file[len(out_dir) + 1:]))
|
||||
os.remove(file)
|
||||
parent_dir = os.path.dirname(file)
|
||||
while True:
|
||||
if len(os.listdir(parent_dir)) == 0:
|
||||
update.message.reply_text("Removing directory \"{}\" as it's empty..."
|
||||
.format(parent_dir[len(self.out_dir) + 1:]))
|
||||
.format(parent_dir[len(out_dir) + 1:]))
|
||||
os.rmdir(parent_dir)
|
||||
parent_dir = os.path.dirname(parent_dir)
|
||||
if parent_dir == self.out_dir:
|
||||
if parent_dir == out_dir:
|
||||
break
|
||||
parent_dir = os.path.dirname(parent_dir)
|
||||
if len(tumblr_ids) > 0:
|
||||
plural = "s (all {} of them)".format(len(tumblr_ids)) if len(tumblr_ids) > 1 else ""
|
||||
update.message.reply_text("Also deleting tumblr post{}!".format(plural))
|
||||
|
@ -786,6 +799,7 @@ if __name__ == '__main__':
|
|||
delojza = DelojzaBot(config.get('delojza', 'tg_api_key'),
|
||||
config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")),
|
||||
tmp_dir=config.get('delojza', 'tmp_dir', fallback=tempfile.gettempdir()),
|
||||
redirects=config.items('redirects'),
|
||||
protected_password=config.get('delojza', 'protected_password', fallback=None),
|
||||
acoustid_key=config.get('delojza', 'acoustid_api_key', fallback=None),
|
||||
tumblr_name=config.get('tumblr', 'blog_name', fallback=None),
|
||||
|
|
Loading…
Reference in a new issue