add acoustid (?)

2019-05-02 19:18:35 +02:00 · 2019-05-02 19:18:35 +02:00 · 617f6cc6f3
commit 617f6cc6f3
parent 265447ed76
3 changed files with 84 additions and 34 deletions
--- a/delojza.py
+++ b/delojza.py
@ -11,6 +11,7 @@ from datetime import datetime
 from glob import glob
 from operator import itemgetter

+import acoustid
 import filetype
 import markovify
 import mutagen.id3
@ -38,7 +39,7 @@ def datestr(date):


 class DelojzaBot:
-    def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', tumblr_keys=None, markov=None):
+    def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', acoustid_key=None, tumblr_keys=None, markov=None):
        self.logger = logging.getLogger("delojza")

        self.out_dir = out_dir
@ -65,6 +66,8 @@ class DelojzaBot:
        dp.add_handler(CommandHandler("delete", self.tg_delete))
        dp.add_handler(CommandHandler("version", self.tg_version))

+        self.acoustid_key = acoustid_key
+
        if tumblr_keys:
            self.client = pytumblr.TumblrRestClient(*tumblr_keys)

@ -81,31 +84,78 @@ class DelojzaBot:
                return True
        return False

-    @staticmethod
-    def extract_tags(info):
+    def tag_file(self, filepath, message, info=None):
+        if info is None:
+            info = {}
+
        title = None
        artist = None
+        source = None
+
        if 'track' in info:
            title = info['track']
        if 'artist' in info:
            artist = info['artist']
-        if title is None and artist is None and '-' in info['title']:
+
+        if 'track' in info or 'artist' in info:
+            source = "supplied metadata"
+
+        if title is None or artist is None and self.acoustid_key:
+            try:
+                self.logger.debug("Requesting AcoustID for {}".format(filepath))
+                results = sorted(acoustid.match(self.acoustid_key, filepath), key=itemgetter(0), reverse=True)
+                if len(results) > 0:
+                    score, rid, aid_title, aid_artist = results[0]
+                    if score > .8:
+                        title = aid_title
+                        artist = aid_artist
+                        source = "AcoustID ({}%)".format(round(score * 100))
+            except acoustid.NoBackendError:
+                self.logger.warning("chromaprint library/tool not found")
+            except acoustid.FingerprintGenerationError:
+                self.logger.warning("fingerprint could not be calculated")
+            except acoustid.WebServiceError as exc:
+                self.logger.warning("web service request failed: {}".format(exc.message))
+
+        if title is None and artist is None and '-' in info.get("title", ""):
            split = info['title'].split("-")
            artist = split[0]
            title = split[1]
-        if title is None:
-            title = info['title']
-        if 'soundcloud' in info['extractor']:
-            artist = info['uploader']
-        return artist.strip() if artist is not None else None, title.strip() if title is not None else None
+            source = "fallback (artist - title)"

-    def download_ytdl(self, urls, subdir, date, message, extract=False, filename=None):
+        if title is None and title in info:
+            title = info['title']
+            source = "full title fallback"
+
+        if 'soundcloud' in info.get("extractor", ""):
+            artist = info['uploader']
+            source = "soundcloud \"fallback\""
+
+        artist = artist.strip() if artist else None
+        title = title.strip() if title else None
+
+        message.reply_text("Tagging as \"{}\" by \"{}\"\nvia {}".format(title, artist, source))
+        self.logger.info("Tagging %s w/ $s - $s [%s]...".format(filepath, title, artist, source))
+        try:
+            id3 = mutagen.id3.ID3(filepath)
+        except mutagen.id3.ID3NoHeaderError:
+            mutafile = mutagen.File(filepath)
+            mutafile.add_tags()
+            mutafile.save()
+            id3 = mutagen.id3.ID3(filepath)
+        id3.add(mutagen.id3.TIT2(encoding=3, text=title))
+        if artist:
+            id3.add(mutagen.id3.TOPE(encoding=3, text=artist))
+            id3.add(mutagen.id3.TPE1(encoding=3, text=artist))
+        id3.save()
+
+    def download_ytdl(self, urls, subdir, date, message, audio=False, filename=None):
        ydl_opts = {
            'noplaylist': True,
            'restrictfilenames': True,
            'outtmpl': os.path.join(self.tmp_dir, '{}__%(title)s__%(id)s.%(ext)s'.format(datestr(date)))
        }
-        if extract:
+        if audio:
            ydl_opts['format'] = 'bestaudio/best'
            ydl_opts['postprocessors'] = [{
                'key': 'FFmpegExtractAudio',
@ -121,44 +171,40 @@ class DelojzaBot:
                globbeds = glob(os.path.splitext(filename)[0] + '.*')
                for globbed in globbeds:
                    if globbed.endswith("mp3"):
-                        artist, title = self.extract_tags(info)
-                        message.reply_text("Tagging as \"{}\" by \"{}\"".format(title, artist))
-                        self.logger.info("Tagging %s w/ $s - $s...".format(globbed, title, artist))
-                        try:
-                            id3 = mutagen.id3.ID3(globbed)
-                        except mutagen.id3.ID3NoHeaderError:
-                            mutafile = mutagen.File(globbed)
-                            mutafile.add_tags()
-                            mutafile.save()
-                            id3 = mutagen.id3.ID3(globbed)
-                        id3.add(mutagen.id3.TIT2(encoding=3, text=title))
-                        if artist:
-                            id3.add(mutagen.id3.TOPE(encoding=3, text=artist))
-                            id3.add(mutagen.id3.TPE1(encoding=3, text=artist))
-                        id3.save()
+                        self.tag_file(globbed, message, info=info)
                    self.logger.info("Moving %s to %s..." % (globbed, out_dir))
                    dest = shutil.move(globbed, out_dir)
                    filenames.append(dest)
        return filenames

-    def download_raw(self, urls, subdir, date, _, extract=False, filename=None):
+    def download_raw(self, urls, subdir, date, message, audio=False, filename=None):
        filenames = []
        for url in urls:
            local_filename = os.path.join(self.out_dir, subdir,
                                          "%s__%s" % (datestr(date), filename or url.split('/')[-1]))
+            final_filename = local_filename
+            is_mp3 = local_filename.endswith("mp3")
+
            r = requests.get(url, stream=True)
            with open(local_filename, 'wb') as f:
                for chunk in r.iter_content(chunk_size=1024):
                    if chunk:
                        f.write(chunk)
+
            if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]):
                kind = filetype.guess(local_filename)
                if kind is None:
                    self.logger.error("File has no extension and could not be determined!")
                else:
                    self.logger.info('Moving file without extension... %s?' % kind.extension)
-                    shutil.move(local_filename, local_filename + '.' + kind.extension)
-            filenames.append(local_filename)
+                    final_filename = shutil.move(local_filename, local_filename + '.' + kind.extension)
+                    is_mp3 = kind.extension == "mp3"
+
+            filenames.append(final_filename)
+
+            if audio and is_mp3:
+                self.tag_file(final_filename, message)
+
        return filenames

    @staticmethod
@ -205,14 +251,14 @@ class DelojzaBot:
                reply += ' to "' + hashtag + '"'
            reply += '...'

-            extract = False
+            audio = False
            if any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) and download_fn != self.download_raw:
-                extract = True
+                audio = True
                reply += ' (And also guessing you want to extract the audio)'

            message.reply_text(reply)

-            filenames = download_fn(urls, hashtag or '.', message.date, message, extract=extract, filename=filename)
+            filenames = download_fn(urls, hashtag or '.', message.date, message, audio=audio, filename=filename)
            if hashtag == 'TUMBLR' and self.client:
                message.reply_text('(btw, queueing to tumblr)')
                for filename in filenames:
@ -423,6 +469,7 @@ if __name__ == '__main__':
    delojza = DelojzaBot(config.get('delojza', 'tg_api_key'),
                         config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")),
                         tmp_dir=config.get('delojza', 'tmp_dir', fallback="/var/tmp"),
+                         acoustid_key=config.get('delojza', 'acoustid_api_key'),
                         tumblr_keys=(config.get('tumblr', 'consumer_key'),
                                      config.get('tumblr', 'consumer_secret'),
                                      config.get('tumblr', 'oauth_key'),
--- a/requirements.in
+++ b/requirements.in
@ -3,5 +3,6 @@ youtube-dl
 requests
 filetype
 mutagen
-markovify
+pyacoustid
 pytumblr
+markovify
--- a/requirements.txt
+++ b/requirements.txt
@ -5,6 +5,7 @@
 #    pip-compile 
 #
 asn1crypto==0.24.0        # via cryptography
+audioread==2.1.6          # via pyacoustid
 certifi==2019.3.9         # via python-telegram-bot, requests
 cffi==1.12.3              # via cryptography
 chardet==3.0.4            # via requests
@ -15,6 +16,7 @@ idna==2.8                 # via requests
 markovify==0.7.1
 mutagen==1.42.0
 oauthlib==3.0.1           # via requests-oauthlib
+pyacoustid==1.1.5
 pycparser==2.19           # via cffi
 python-telegram-bot==11.1.0
 pytumblr==0.0.8