diff --git a/delojza.py b/delojza.py index 4f4d0b8..aac2830 100755 --- a/delojza.py +++ b/delojza.py @@ -87,18 +87,28 @@ class DelojzaDB: class DelojzaBot: - def __init__(self, tg_api_key, out_dir, tmp_dir=None, db_path=None, protected_password=None, + def __init__(self, tg_api_key, out_dir, redirects=None, tmp_dir=None, db_path=None, protected_password=None, acoustid_key=None, tumblr_name=None, tumblr_keys=None, markov=None): self._setup_logging(os.path.dirname(os.path.realpath(__file__))) self.db = DelojzaDB(db_path or os.path.join(os.path.dirname(os.path.realpath(__file__)), "delojza.db")) self.out_dir = os.path.abspath(out_dir) + self.out_dir = self.out_dir[:-1] if self.out_dir[-1] == "/" else self.out_dir self.logger.debug('OUT_DIR: ' + out_dir) self.tmp_dir = tmp_dir if tmp_dir else tempfile.gettempdir() self.logger.debug('TMP_DIR: ' + tmp_dir) self.markov = markov + self.redirects = {} + if redirects is not None: + for hashtag, directory in redirects: + hashtag = hashtag.upper() + directory = directory[:-1] if directory[-1] == "/" else directory + mkdir_p(directory) + self.redirects[hashtag] = directory + self.logger.debug(f"Will redirect hashtag {hashtag} to {directory}") + self.updater = Updater(tg_api_key) dp = self.updater.dispatcher @@ -252,8 +262,9 @@ class DelojzaBot: self.logger.info("Tagging {} w/ {} - {} [{}]...".format(filepath, title, artist, source)) self._tag_file(filepath, artist, title) - def _get_percent_filled(self): - output = subprocess.check_output(["df", self.out_dir]) + @staticmethod + def _get_percent_filled(directory): + output = subprocess.check_output(["df", directory]) percents_re = re.search(r"[0-9]+%", output.decode('utf-8')) if not percents_re: raise RuntimeError @@ -367,8 +378,8 @@ class DelojzaBot: info_line = info_line[:64] filename = '{}__{}.txt'.format(datestr(message.date), info_line) - - out_path = os.path.join(self.out_dir, *hashtags[1:] or ['TEXT']) + out_dir = self.redirects.get(hashtags[0], self.out_dir) + out_path = os.path.join(out_dir, *hashtags[1:] or ['TEXT']) file_path = os.path.join(out_path, filename) mkdir_p(out_path) @@ -382,20 +393,19 @@ class DelojzaBot: def handle(self, urls, message, hashtags, download_fn, filetitle=None): self.db.initialize() - try: - if self._get_percent_filled() >= 98: - message.reply("NO! Less than 2% of drive space left :(") - return - except Exception: - message.reply("NO! Couldn't figure out how much space is left???") - return - try: if len(hashtags) == 0: self.logger.info("Ignoring %s due to no hashtag present..." % urls) return False - if any(hashtag in self.db.get_protected_tags() for hashtag in hashtags): + original_hashtags = hashtags + if hashtags[0] in self.redirects: + out_dir = self.redirects[hashtags[0]] + hashtags = hashtags[1:] + else: + out_dir = self.out_dir + + if any(hashtag in self.db.get_protected_tags() for hashtag in original_hashtags): if message.chat.id not in self.db.get_protected_chats(): self.logger.info("Redirecting {} in chat {} due to protected hashtags: {}..." .format(urls, message.chat.title, hashtags)) @@ -403,30 +413,30 @@ class DelojzaBot: for i in range(len(hashtags)): current_path = hashtags[:i + 1] - if not os.path.isdir(os.path.join(self.out_dir, *current_path)): + if not os.path.isdir(os.path.join(out_dir, *current_path)): test_path = current_path test_path[-1] = "_" + test_path[-1] - if os.path.isdir(os.path.join(self.out_dir, *test_path)): + if os.path.isdir(os.path.join(out_dir, *test_path)): self.logger.debug(f"Rerouting {current_path[-1]} to _{test_path[-1]}") hashtags[i] = test_path[-1] self.last_hashtags[message.chat.id] = None - self.logger.info("Downloading %s under '%s'" % (urls, "/".join(hashtags))) + self.logger.info("Downloading %s into '%s' (%s)" % (urls, "/".join(original_hashtags), out_dir)) - out_path = os.path.join(self.out_dir, *hashtags) + out_path = os.path.join(out_dir, *hashtags) mkdir_p(out_path) - reply = 'Downloading to "{}"...'.format("/".join(hashtags)) + reply = 'Downloading to "{}"...'.format("/".join(original_hashtags)) - audio = any([any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) for hashtag in hashtags]) + audio = any([any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) for hashtag in original_hashtags]) if audio and download_fn != self.download_raw: reply += ' (And also guessing you want to extract the audio)' message.reply_text(reply) filenames = download_fn(urls, out_path, message.date, message, audio=audio, filetitle=filetitle) - cmd_hashtag = hashtags[0] + cmd_hashtag = original_hashtags[0] tumblr_ids = [] if cmd_hashtag in ('TUMBLR', 'TUMBLR_NOW') and self.tumblr_client: @@ -450,7 +460,7 @@ class DelojzaBot: self.logger.warning("Did not receive 'id' in tumblr response: \n" + pprint.pformat(response)) message.reply_text('Something weird happened with the tumblrs, check it!') - self.last_downloaded[message.chat.id] = filenames, hashtags, tumblr_ids + self.last_downloaded[message.chat.id] = filenames, original_hashtags, tumblr_ids return True except: exc_type, exc_value, __ = sys.exc_info() @@ -529,7 +539,7 @@ class DelojzaBot: def _get_tag_dirs(self): return list(filter(lambda x: x.upper() == x, filter(lambda directory: os.path.isdir(os.path.join(self.out_dir, directory)), - os.listdir(self.out_dir)))) + os.listdir(self.out_dir)))) + list(self.redirects.keys()) def tg_stats(self, _, update): self._log_msg(update) @@ -539,13 +549,14 @@ class DelojzaBot: return tag_dirs = self._get_tag_dirs() reply = "Total number of tags: {}\n\n".format(len(tag_dirs)) - counts = [(directory, os.listdir(os.path.join(self.out_dir, directory))) for directory in tag_dirs] + counts = [(directory, os.listdir(os.path.join(self.out_dir, directory))) for directory in + tag_dirs] # TODO REDIRECTS counts.sort(key=itemgetter(0)) counts.sort(key=lambda x: len(x[1]), reverse=True) for directory, files in counts: if len(files) == 1: break - abs_paths = [os.path.join(self.out_dir, directory, file) for file in files] + abs_paths = [os.path.join(self.out_dir, directory, file) for file in files] # TODO REDIRECTS abs_files = list(filter(os.path.isfile, abs_paths)) # mimes = [magic.from_file(path, mime=True).split("/")[0] for path in abs_files] # mime_counts = [(mime, mimes.count(mime)) for mime in set(mimes)] @@ -611,6 +622,7 @@ class DelojzaBot: self._log_msg(update) if self.last_downloaded.get(update.message.chat.id) is not None: files, hashtags, tumblr_ids = self.last_downloaded[update.message.chat.id] + out_dir = self.redirects.get(hashtags[0], self.out_dir) mp3s = [filename for filename in files if filename.endswith("mp3")] if len(mp3s) > 0: arg_raw = re.sub(r'^/[@\w]+ ?', '', update.message.text).strip() @@ -632,7 +644,7 @@ class DelojzaBot: self._tag_file(mp3, artist, title) update.message.reply_text("Tagging \"{}\" as \"{}\" by \"{}\"!" - .format(mp3[len(self.out_dir) + 1:], title, artist)) + .format(mp3[len(out_dir) + 1:], title, artist)) else: update.message.reply_text((self.markov.make_sentence() if self.markov and random() > .7 else "") + "???") @@ -640,18 +652,19 @@ class DelojzaBot: self._log_msg(update) if self.last_downloaded.get(update.message.chat.id) is not None: files, hashtags, tumblr_ids = self.last_downloaded[update.message.chat.id] + out_dir = self.redirects.get(hashtags[0], self.out_dir) for file in files: - update.message.reply_text("Removing \"{}\"!".format(file[len(self.out_dir) + 1:])) + update.message.reply_text("Removing \"{}\"!".format(file[len(out_dir) + 1:])) os.remove(file) parent_dir = os.path.dirname(file) while True: if len(os.listdir(parent_dir)) == 0: update.message.reply_text("Removing directory \"{}\" as it's empty..." - .format(parent_dir[len(self.out_dir) + 1:])) + .format(parent_dir[len(out_dir) + 1:])) os.rmdir(parent_dir) - parent_dir = os.path.dirname(parent_dir) - if parent_dir == self.out_dir: + if parent_dir == out_dir: break + parent_dir = os.path.dirname(parent_dir) if len(tumblr_ids) > 0: plural = "s (all {} of them)".format(len(tumblr_ids)) if len(tumblr_ids) > 1 else "" update.message.reply_text("Also deleting tumblr post{}!".format(plural)) @@ -786,6 +799,7 @@ if __name__ == '__main__': delojza = DelojzaBot(config.get('delojza', 'tg_api_key'), config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")), tmp_dir=config.get('delojza', 'tmp_dir', fallback=tempfile.gettempdir()), + redirects=config.items('redirects'), protected_password=config.get('delojza', 'protected_password', fallback=None), acoustid_key=config.get('delojza', 'acoustid_api_key', fallback=None), tumblr_name=config.get('tumblr', 'blog_name', fallback=None),