add acoustid (?)

This commit is contained in:
Tomáš Mládek 2019-05-02 19:18:35 +02:00
parent 265447ed76
commit 617f6cc6f3
3 changed files with 84 additions and 34 deletions

View file

@ -11,6 +11,7 @@ from datetime import datetime
from glob import glob from glob import glob
from operator import itemgetter from operator import itemgetter
import acoustid
import filetype import filetype
import markovify import markovify
import mutagen.id3 import mutagen.id3
@ -38,7 +39,7 @@ def datestr(date):
class DelojzaBot: class DelojzaBot:
def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', tumblr_keys=None, markov=None): def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', acoustid_key=None, tumblr_keys=None, markov=None):
self.logger = logging.getLogger("delojza") self.logger = logging.getLogger("delojza")
self.out_dir = out_dir self.out_dir = out_dir
@ -65,6 +66,8 @@ class DelojzaBot:
dp.add_handler(CommandHandler("delete", self.tg_delete)) dp.add_handler(CommandHandler("delete", self.tg_delete))
dp.add_handler(CommandHandler("version", self.tg_version)) dp.add_handler(CommandHandler("version", self.tg_version))
self.acoustid_key = acoustid_key
if tumblr_keys: if tumblr_keys:
self.client = pytumblr.TumblrRestClient(*tumblr_keys) self.client = pytumblr.TumblrRestClient(*tumblr_keys)
@ -81,31 +84,78 @@ class DelojzaBot:
return True return True
return False return False
@staticmethod def tag_file(self, filepath, message, info=None):
def extract_tags(info): if info is None:
info = {}
title = None title = None
artist = None artist = None
source = None
if 'track' in info: if 'track' in info:
title = info['track'] title = info['track']
if 'artist' in info: if 'artist' in info:
artist = info['artist'] artist = info['artist']
if title is None and artist is None and '-' in info['title']:
if 'track' in info or 'artist' in info:
source = "supplied metadata"
if title is None or artist is None and self.acoustid_key:
try:
self.logger.debug("Requesting AcoustID for {}".format(filepath))
results = sorted(acoustid.match(self.acoustid_key, filepath), key=itemgetter(0), reverse=True)
if len(results) > 0:
score, rid, aid_title, aid_artist = results[0]
if score > .8:
title = aid_title
artist = aid_artist
source = "AcoustID ({}%)".format(round(score * 100))
except acoustid.NoBackendError:
self.logger.warning("chromaprint library/tool not found")
except acoustid.FingerprintGenerationError:
self.logger.warning("fingerprint could not be calculated")
except acoustid.WebServiceError as exc:
self.logger.warning("web service request failed: {}".format(exc.message))
if title is None and artist is None and '-' in info.get("title", ""):
split = info['title'].split("-") split = info['title'].split("-")
artist = split[0] artist = split[0]
title = split[1] title = split[1]
if title is None: source = "fallback (artist - title)"
title = info['title']
if 'soundcloud' in info['extractor']:
artist = info['uploader']
return artist.strip() if artist is not None else None, title.strip() if title is not None else None
def download_ytdl(self, urls, subdir, date, message, extract=False, filename=None): if title is None and title in info:
title = info['title']
source = "full title fallback"
if 'soundcloud' in info.get("extractor", ""):
artist = info['uploader']
source = "soundcloud \"fallback\""
artist = artist.strip() if artist else None
title = title.strip() if title else None
message.reply_text("Tagging as \"{}\" by \"{}\"\nvia {}".format(title, artist, source))
self.logger.info("Tagging %s w/ $s - $s [%s]...".format(filepath, title, artist, source))
try:
id3 = mutagen.id3.ID3(filepath)
except mutagen.id3.ID3NoHeaderError:
mutafile = mutagen.File(filepath)
mutafile.add_tags()
mutafile.save()
id3 = mutagen.id3.ID3(filepath)
id3.add(mutagen.id3.TIT2(encoding=3, text=title))
if artist:
id3.add(mutagen.id3.TOPE(encoding=3, text=artist))
id3.add(mutagen.id3.TPE1(encoding=3, text=artist))
id3.save()
def download_ytdl(self, urls, subdir, date, message, audio=False, filename=None):
ydl_opts = { ydl_opts = {
'noplaylist': True, 'noplaylist': True,
'restrictfilenames': True, 'restrictfilenames': True,
'outtmpl': os.path.join(self.tmp_dir, '{}__%(title)s__%(id)s.%(ext)s'.format(datestr(date))) 'outtmpl': os.path.join(self.tmp_dir, '{}__%(title)s__%(id)s.%(ext)s'.format(datestr(date)))
} }
if extract: if audio:
ydl_opts['format'] = 'bestaudio/best' ydl_opts['format'] = 'bestaudio/best'
ydl_opts['postprocessors'] = [{ ydl_opts['postprocessors'] = [{
'key': 'FFmpegExtractAudio', 'key': 'FFmpegExtractAudio',
@ -121,44 +171,40 @@ class DelojzaBot:
globbeds = glob(os.path.splitext(filename)[0] + '.*') globbeds = glob(os.path.splitext(filename)[0] + '.*')
for globbed in globbeds: for globbed in globbeds:
if globbed.endswith("mp3"): if globbed.endswith("mp3"):
artist, title = self.extract_tags(info) self.tag_file(globbed, message, info=info)
message.reply_text("Tagging as \"{}\" by \"{}\"".format(title, artist))
self.logger.info("Tagging %s w/ $s - $s...".format(globbed, title, artist))
try:
id3 = mutagen.id3.ID3(globbed)
except mutagen.id3.ID3NoHeaderError:
mutafile = mutagen.File(globbed)
mutafile.add_tags()
mutafile.save()
id3 = mutagen.id3.ID3(globbed)
id3.add(mutagen.id3.TIT2(encoding=3, text=title))
if artist:
id3.add(mutagen.id3.TOPE(encoding=3, text=artist))
id3.add(mutagen.id3.TPE1(encoding=3, text=artist))
id3.save()
self.logger.info("Moving %s to %s..." % (globbed, out_dir)) self.logger.info("Moving %s to %s..." % (globbed, out_dir))
dest = shutil.move(globbed, out_dir) dest = shutil.move(globbed, out_dir)
filenames.append(dest) filenames.append(dest)
return filenames return filenames
def download_raw(self, urls, subdir, date, _, extract=False, filename=None): def download_raw(self, urls, subdir, date, message, audio=False, filename=None):
filenames = [] filenames = []
for url in urls: for url in urls:
local_filename = os.path.join(self.out_dir, subdir, local_filename = os.path.join(self.out_dir, subdir,
"%s__%s" % (datestr(date), filename or url.split('/')[-1])) "%s__%s" % (datestr(date), filename or url.split('/')[-1]))
final_filename = local_filename
is_mp3 = local_filename.endswith("mp3")
r = requests.get(url, stream=True) r = requests.get(url, stream=True)
with open(local_filename, 'wb') as f: with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024): for chunk in r.iter_content(chunk_size=1024):
if chunk: if chunk:
f.write(chunk) f.write(chunk)
if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]): if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]):
kind = filetype.guess(local_filename) kind = filetype.guess(local_filename)
if kind is None: if kind is None:
self.logger.error("File has no extension and could not be determined!") self.logger.error("File has no extension and could not be determined!")
else: else:
self.logger.info('Moving file without extension... %s?' % kind.extension) self.logger.info('Moving file without extension... %s?' % kind.extension)
shutil.move(local_filename, local_filename + '.' + kind.extension) final_filename = shutil.move(local_filename, local_filename + '.' + kind.extension)
filenames.append(local_filename) is_mp3 = kind.extension == "mp3"
filenames.append(final_filename)
if audio and is_mp3:
self.tag_file(final_filename, message)
return filenames return filenames
@staticmethod @staticmethod
@ -205,14 +251,14 @@ class DelojzaBot:
reply += ' to "' + hashtag + '"' reply += ' to "' + hashtag + '"'
reply += '...' reply += '...'
extract = False audio = False
if any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) and download_fn != self.download_raw: if any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) and download_fn != self.download_raw:
extract = True audio = True
reply += ' (And also guessing you want to extract the audio)' reply += ' (And also guessing you want to extract the audio)'
message.reply_text(reply) message.reply_text(reply)
filenames = download_fn(urls, hashtag or '.', message.date, message, extract=extract, filename=filename) filenames = download_fn(urls, hashtag or '.', message.date, message, audio=audio, filename=filename)
if hashtag == 'TUMBLR' and self.client: if hashtag == 'TUMBLR' and self.client:
message.reply_text('(btw, queueing to tumblr)') message.reply_text('(btw, queueing to tumblr)')
for filename in filenames: for filename in filenames:
@ -423,6 +469,7 @@ if __name__ == '__main__':
delojza = DelojzaBot(config.get('delojza', 'tg_api_key'), delojza = DelojzaBot(config.get('delojza', 'tg_api_key'),
config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")), config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")),
tmp_dir=config.get('delojza', 'tmp_dir', fallback="/var/tmp"), tmp_dir=config.get('delojza', 'tmp_dir', fallback="/var/tmp"),
acoustid_key=config.get('delojza', 'acoustid_api_key'),
tumblr_keys=(config.get('tumblr', 'consumer_key'), tumblr_keys=(config.get('tumblr', 'consumer_key'),
config.get('tumblr', 'consumer_secret'), config.get('tumblr', 'consumer_secret'),
config.get('tumblr', 'oauth_key'), config.get('tumblr', 'oauth_key'),

View file

@ -3,5 +3,6 @@ youtube-dl
requests requests
filetype filetype
mutagen mutagen
markovify pyacoustid
pytumblr pytumblr
markovify

View file

@ -5,6 +5,7 @@
# pip-compile # pip-compile
# #
asn1crypto==0.24.0 # via cryptography asn1crypto==0.24.0 # via cryptography
audioread==2.1.6 # via pyacoustid
certifi==2019.3.9 # via python-telegram-bot, requests certifi==2019.3.9 # via python-telegram-bot, requests
cffi==1.12.3 # via cryptography cffi==1.12.3 # via cryptography
chardet==3.0.4 # via requests chardet==3.0.4 # via requests
@ -15,6 +16,7 @@ idna==2.8 # via requests
markovify==0.7.1 markovify==0.7.1
mutagen==1.42.0 mutagen==1.42.0
oauthlib==3.0.1 # via requests-oauthlib oauthlib==3.0.1 # via requests-oauthlib
pyacoustid==1.1.5
pycparser==2.19 # via cffi pycparser==2.19 # via cffi
python-telegram-bot==11.1.0 python-telegram-bot==11.1.0
pytumblr==0.0.8 pytumblr==0.0.8