diff --git a/delojza.py b/delojza.py index 1055c96..2a139ba 100755 --- a/delojza.py +++ b/delojza.py @@ -11,6 +11,7 @@ from datetime import datetime from glob import glob from operator import itemgetter +import acoustid import filetype import markovify import mutagen.id3 @@ -38,7 +39,7 @@ def datestr(date): class DelojzaBot: - def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', tumblr_keys=None, markov=None): + def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', acoustid_key=None, tumblr_keys=None, markov=None): self.logger = logging.getLogger("delojza") self.out_dir = out_dir @@ -65,6 +66,8 @@ class DelojzaBot: dp.add_handler(CommandHandler("delete", self.tg_delete)) dp.add_handler(CommandHandler("version", self.tg_version)) + self.acoustid_key = acoustid_key + if tumblr_keys: self.client = pytumblr.TumblrRestClient(*tumblr_keys) @@ -81,31 +84,78 @@ class DelojzaBot: return True return False - @staticmethod - def extract_tags(info): + def tag_file(self, filepath, message, info=None): + if info is None: + info = {} + title = None artist = None + source = None + if 'track' in info: title = info['track'] if 'artist' in info: artist = info['artist'] - if title is None and artist is None and '-' in info['title']: + + if 'track' in info or 'artist' in info: + source = "supplied metadata" + + if title is None or artist is None and self.acoustid_key: + try: + self.logger.debug("Requesting AcoustID for {}".format(filepath)) + results = sorted(acoustid.match(self.acoustid_key, filepath), key=itemgetter(0), reverse=True) + if len(results) > 0: + score, rid, aid_title, aid_artist = results[0] + if score > .8: + title = aid_title + artist = aid_artist + source = "AcoustID ({}%)".format(round(score * 100)) + except acoustid.NoBackendError: + self.logger.warning("chromaprint library/tool not found") + except acoustid.FingerprintGenerationError: + self.logger.warning("fingerprint could not be calculated") + except acoustid.WebServiceError as exc: + self.logger.warning("web service request failed: {}".format(exc.message)) + + if title is None and artist is None and '-' in info.get("title", ""): split = info['title'].split("-") artist = split[0] title = split[1] - if title is None: - title = info['title'] - if 'soundcloud' in info['extractor']: - artist = info['uploader'] - return artist.strip() if artist is not None else None, title.strip() if title is not None else None + source = "fallback (artist - title)" - def download_ytdl(self, urls, subdir, date, message, extract=False, filename=None): + if title is None and title in info: + title = info['title'] + source = "full title fallback" + + if 'soundcloud' in info.get("extractor", ""): + artist = info['uploader'] + source = "soundcloud \"fallback\"" + + artist = artist.strip() if artist else None + title = title.strip() if title else None + + message.reply_text("Tagging as \"{}\" by \"{}\"\nvia {}".format(title, artist, source)) + self.logger.info("Tagging %s w/ $s - $s [%s]...".format(filepath, title, artist, source)) + try: + id3 = mutagen.id3.ID3(filepath) + except mutagen.id3.ID3NoHeaderError: + mutafile = mutagen.File(filepath) + mutafile.add_tags() + mutafile.save() + id3 = mutagen.id3.ID3(filepath) + id3.add(mutagen.id3.TIT2(encoding=3, text=title)) + if artist: + id3.add(mutagen.id3.TOPE(encoding=3, text=artist)) + id3.add(mutagen.id3.TPE1(encoding=3, text=artist)) + id3.save() + + def download_ytdl(self, urls, subdir, date, message, audio=False, filename=None): ydl_opts = { 'noplaylist': True, 'restrictfilenames': True, 'outtmpl': os.path.join(self.tmp_dir, '{}__%(title)s__%(id)s.%(ext)s'.format(datestr(date))) } - if extract: + if audio: ydl_opts['format'] = 'bestaudio/best' ydl_opts['postprocessors'] = [{ 'key': 'FFmpegExtractAudio', @@ -121,44 +171,40 @@ class DelojzaBot: globbeds = glob(os.path.splitext(filename)[0] + '.*') for globbed in globbeds: if globbed.endswith("mp3"): - artist, title = self.extract_tags(info) - message.reply_text("Tagging as \"{}\" by \"{}\"".format(title, artist)) - self.logger.info("Tagging %s w/ $s - $s...".format(globbed, title, artist)) - try: - id3 = mutagen.id3.ID3(globbed) - except mutagen.id3.ID3NoHeaderError: - mutafile = mutagen.File(globbed) - mutafile.add_tags() - mutafile.save() - id3 = mutagen.id3.ID3(globbed) - id3.add(mutagen.id3.TIT2(encoding=3, text=title)) - if artist: - id3.add(mutagen.id3.TOPE(encoding=3, text=artist)) - id3.add(mutagen.id3.TPE1(encoding=3, text=artist)) - id3.save() + self.tag_file(globbed, message, info=info) self.logger.info("Moving %s to %s..." % (globbed, out_dir)) dest = shutil.move(globbed, out_dir) filenames.append(dest) return filenames - def download_raw(self, urls, subdir, date, _, extract=False, filename=None): + def download_raw(self, urls, subdir, date, message, audio=False, filename=None): filenames = [] for url in urls: local_filename = os.path.join(self.out_dir, subdir, "%s__%s" % (datestr(date), filename or url.split('/')[-1])) + final_filename = local_filename + is_mp3 = local_filename.endswith("mp3") + r = requests.get(url, stream=True) with open(local_filename, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: f.write(chunk) + if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]): kind = filetype.guess(local_filename) if kind is None: self.logger.error("File has no extension and could not be determined!") else: self.logger.info('Moving file without extension... %s?' % kind.extension) - shutil.move(local_filename, local_filename + '.' + kind.extension) - filenames.append(local_filename) + final_filename = shutil.move(local_filename, local_filename + '.' + kind.extension) + is_mp3 = kind.extension == "mp3" + + filenames.append(final_filename) + + if audio and is_mp3: + self.tag_file(final_filename, message) + return filenames @staticmethod @@ -205,14 +251,14 @@ class DelojzaBot: reply += ' to "' + hashtag + '"' reply += '...' - extract = False + audio = False if any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) and download_fn != self.download_raw: - extract = True + audio = True reply += ' (And also guessing you want to extract the audio)' message.reply_text(reply) - filenames = download_fn(urls, hashtag or '.', message.date, message, extract=extract, filename=filename) + filenames = download_fn(urls, hashtag or '.', message.date, message, audio=audio, filename=filename) if hashtag == 'TUMBLR' and self.client: message.reply_text('(btw, queueing to tumblr)') for filename in filenames: @@ -423,6 +469,7 @@ if __name__ == '__main__': delojza = DelojzaBot(config.get('delojza', 'tg_api_key'), config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")), tmp_dir=config.get('delojza', 'tmp_dir', fallback="/var/tmp"), + acoustid_key=config.get('delojza', 'acoustid_api_key'), tumblr_keys=(config.get('tumblr', 'consumer_key'), config.get('tumblr', 'consumer_secret'), config.get('tumblr', 'oauth_key'), diff --git a/requirements.in b/requirements.in index 119de12..89b7c9e 100644 --- a/requirements.in +++ b/requirements.in @@ -3,5 +3,6 @@ youtube-dl requests filetype mutagen -markovify -pytumblr \ No newline at end of file +pyacoustid +pytumblr +markovify \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5afdd63..93dfd7e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ # pip-compile # asn1crypto==0.24.0 # via cryptography +audioread==2.1.6 # via pyacoustid certifi==2019.3.9 # via python-telegram-bot, requests cffi==1.12.3 # via cryptography chardet==3.0.4 # via requests @@ -15,6 +16,7 @@ idna==2.8 # via requests markovify==0.7.1 mutagen==1.42.0 oauthlib==3.0.1 # via requests-oauthlib +pyacoustid==1.1.5 pycparser==2.19 # via cffi python-telegram-bot==11.1.0 pytumblr==0.0.8