add acoustid (?)
This commit is contained in:
parent
265447ed76
commit
617f6cc6f3
3 changed files with 84 additions and 34 deletions
111
delojza.py
111
delojza.py
|
@ -11,6 +11,7 @@ from datetime import datetime
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
|
||||||
|
import acoustid
|
||||||
import filetype
|
import filetype
|
||||||
import markovify
|
import markovify
|
||||||
import mutagen.id3
|
import mutagen.id3
|
||||||
|
@ -38,7 +39,7 @@ def datestr(date):
|
||||||
|
|
||||||
|
|
||||||
class DelojzaBot:
|
class DelojzaBot:
|
||||||
def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', tumblr_keys=None, markov=None):
|
def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', acoustid_key=None, tumblr_keys=None, markov=None):
|
||||||
self.logger = logging.getLogger("delojza")
|
self.logger = logging.getLogger("delojza")
|
||||||
|
|
||||||
self.out_dir = out_dir
|
self.out_dir = out_dir
|
||||||
|
@ -65,6 +66,8 @@ class DelojzaBot:
|
||||||
dp.add_handler(CommandHandler("delete", self.tg_delete))
|
dp.add_handler(CommandHandler("delete", self.tg_delete))
|
||||||
dp.add_handler(CommandHandler("version", self.tg_version))
|
dp.add_handler(CommandHandler("version", self.tg_version))
|
||||||
|
|
||||||
|
self.acoustid_key = acoustid_key
|
||||||
|
|
||||||
if tumblr_keys:
|
if tumblr_keys:
|
||||||
self.client = pytumblr.TumblrRestClient(*tumblr_keys)
|
self.client = pytumblr.TumblrRestClient(*tumblr_keys)
|
||||||
|
|
||||||
|
@ -81,31 +84,78 @@ class DelojzaBot:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@staticmethod
|
def tag_file(self, filepath, message, info=None):
|
||||||
def extract_tags(info):
|
if info is None:
|
||||||
|
info = {}
|
||||||
|
|
||||||
title = None
|
title = None
|
||||||
artist = None
|
artist = None
|
||||||
|
source = None
|
||||||
|
|
||||||
if 'track' in info:
|
if 'track' in info:
|
||||||
title = info['track']
|
title = info['track']
|
||||||
if 'artist' in info:
|
if 'artist' in info:
|
||||||
artist = info['artist']
|
artist = info['artist']
|
||||||
if title is None and artist is None and '-' in info['title']:
|
|
||||||
|
if 'track' in info or 'artist' in info:
|
||||||
|
source = "supplied metadata"
|
||||||
|
|
||||||
|
if title is None or artist is None and self.acoustid_key:
|
||||||
|
try:
|
||||||
|
self.logger.debug("Requesting AcoustID for {}".format(filepath))
|
||||||
|
results = sorted(acoustid.match(self.acoustid_key, filepath), key=itemgetter(0), reverse=True)
|
||||||
|
if len(results) > 0:
|
||||||
|
score, rid, aid_title, aid_artist = results[0]
|
||||||
|
if score > .8:
|
||||||
|
title = aid_title
|
||||||
|
artist = aid_artist
|
||||||
|
source = "AcoustID ({}%)".format(round(score * 100))
|
||||||
|
except acoustid.NoBackendError:
|
||||||
|
self.logger.warning("chromaprint library/tool not found")
|
||||||
|
except acoustid.FingerprintGenerationError:
|
||||||
|
self.logger.warning("fingerprint could not be calculated")
|
||||||
|
except acoustid.WebServiceError as exc:
|
||||||
|
self.logger.warning("web service request failed: {}".format(exc.message))
|
||||||
|
|
||||||
|
if title is None and artist is None and '-' in info.get("title", ""):
|
||||||
split = info['title'].split("-")
|
split = info['title'].split("-")
|
||||||
artist = split[0]
|
artist = split[0]
|
||||||
title = split[1]
|
title = split[1]
|
||||||
if title is None:
|
source = "fallback (artist - title)"
|
||||||
title = info['title']
|
|
||||||
if 'soundcloud' in info['extractor']:
|
|
||||||
artist = info['uploader']
|
|
||||||
return artist.strip() if artist is not None else None, title.strip() if title is not None else None
|
|
||||||
|
|
||||||
def download_ytdl(self, urls, subdir, date, message, extract=False, filename=None):
|
if title is None and title in info:
|
||||||
|
title = info['title']
|
||||||
|
source = "full title fallback"
|
||||||
|
|
||||||
|
if 'soundcloud' in info.get("extractor", ""):
|
||||||
|
artist = info['uploader']
|
||||||
|
source = "soundcloud \"fallback\""
|
||||||
|
|
||||||
|
artist = artist.strip() if artist else None
|
||||||
|
title = title.strip() if title else None
|
||||||
|
|
||||||
|
message.reply_text("Tagging as \"{}\" by \"{}\"\nvia {}".format(title, artist, source))
|
||||||
|
self.logger.info("Tagging %s w/ $s - $s [%s]...".format(filepath, title, artist, source))
|
||||||
|
try:
|
||||||
|
id3 = mutagen.id3.ID3(filepath)
|
||||||
|
except mutagen.id3.ID3NoHeaderError:
|
||||||
|
mutafile = mutagen.File(filepath)
|
||||||
|
mutafile.add_tags()
|
||||||
|
mutafile.save()
|
||||||
|
id3 = mutagen.id3.ID3(filepath)
|
||||||
|
id3.add(mutagen.id3.TIT2(encoding=3, text=title))
|
||||||
|
if artist:
|
||||||
|
id3.add(mutagen.id3.TOPE(encoding=3, text=artist))
|
||||||
|
id3.add(mutagen.id3.TPE1(encoding=3, text=artist))
|
||||||
|
id3.save()
|
||||||
|
|
||||||
|
def download_ytdl(self, urls, subdir, date, message, audio=False, filename=None):
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
'noplaylist': True,
|
'noplaylist': True,
|
||||||
'restrictfilenames': True,
|
'restrictfilenames': True,
|
||||||
'outtmpl': os.path.join(self.tmp_dir, '{}__%(title)s__%(id)s.%(ext)s'.format(datestr(date)))
|
'outtmpl': os.path.join(self.tmp_dir, '{}__%(title)s__%(id)s.%(ext)s'.format(datestr(date)))
|
||||||
}
|
}
|
||||||
if extract:
|
if audio:
|
||||||
ydl_opts['format'] = 'bestaudio/best'
|
ydl_opts['format'] = 'bestaudio/best'
|
||||||
ydl_opts['postprocessors'] = [{
|
ydl_opts['postprocessors'] = [{
|
||||||
'key': 'FFmpegExtractAudio',
|
'key': 'FFmpegExtractAudio',
|
||||||
|
@ -121,44 +171,40 @@ class DelojzaBot:
|
||||||
globbeds = glob(os.path.splitext(filename)[0] + '.*')
|
globbeds = glob(os.path.splitext(filename)[0] + '.*')
|
||||||
for globbed in globbeds:
|
for globbed in globbeds:
|
||||||
if globbed.endswith("mp3"):
|
if globbed.endswith("mp3"):
|
||||||
artist, title = self.extract_tags(info)
|
self.tag_file(globbed, message, info=info)
|
||||||
message.reply_text("Tagging as \"{}\" by \"{}\"".format(title, artist))
|
|
||||||
self.logger.info("Tagging %s w/ $s - $s...".format(globbed, title, artist))
|
|
||||||
try:
|
|
||||||
id3 = mutagen.id3.ID3(globbed)
|
|
||||||
except mutagen.id3.ID3NoHeaderError:
|
|
||||||
mutafile = mutagen.File(globbed)
|
|
||||||
mutafile.add_tags()
|
|
||||||
mutafile.save()
|
|
||||||
id3 = mutagen.id3.ID3(globbed)
|
|
||||||
id3.add(mutagen.id3.TIT2(encoding=3, text=title))
|
|
||||||
if artist:
|
|
||||||
id3.add(mutagen.id3.TOPE(encoding=3, text=artist))
|
|
||||||
id3.add(mutagen.id3.TPE1(encoding=3, text=artist))
|
|
||||||
id3.save()
|
|
||||||
self.logger.info("Moving %s to %s..." % (globbed, out_dir))
|
self.logger.info("Moving %s to %s..." % (globbed, out_dir))
|
||||||
dest = shutil.move(globbed, out_dir)
|
dest = shutil.move(globbed, out_dir)
|
||||||
filenames.append(dest)
|
filenames.append(dest)
|
||||||
return filenames
|
return filenames
|
||||||
|
|
||||||
def download_raw(self, urls, subdir, date, _, extract=False, filename=None):
|
def download_raw(self, urls, subdir, date, message, audio=False, filename=None):
|
||||||
filenames = []
|
filenames = []
|
||||||
for url in urls:
|
for url in urls:
|
||||||
local_filename = os.path.join(self.out_dir, subdir,
|
local_filename = os.path.join(self.out_dir, subdir,
|
||||||
"%s__%s" % (datestr(date), filename or url.split('/')[-1]))
|
"%s__%s" % (datestr(date), filename or url.split('/')[-1]))
|
||||||
|
final_filename = local_filename
|
||||||
|
is_mp3 = local_filename.endswith("mp3")
|
||||||
|
|
||||||
r = requests.get(url, stream=True)
|
r = requests.get(url, stream=True)
|
||||||
with open(local_filename, 'wb') as f:
|
with open(local_filename, 'wb') as f:
|
||||||
for chunk in r.iter_content(chunk_size=1024):
|
for chunk in r.iter_content(chunk_size=1024):
|
||||||
if chunk:
|
if chunk:
|
||||||
f.write(chunk)
|
f.write(chunk)
|
||||||
|
|
||||||
if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]):
|
if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]):
|
||||||
kind = filetype.guess(local_filename)
|
kind = filetype.guess(local_filename)
|
||||||
if kind is None:
|
if kind is None:
|
||||||
self.logger.error("File has no extension and could not be determined!")
|
self.logger.error("File has no extension and could not be determined!")
|
||||||
else:
|
else:
|
||||||
self.logger.info('Moving file without extension... %s?' % kind.extension)
|
self.logger.info('Moving file without extension... %s?' % kind.extension)
|
||||||
shutil.move(local_filename, local_filename + '.' + kind.extension)
|
final_filename = shutil.move(local_filename, local_filename + '.' + kind.extension)
|
||||||
filenames.append(local_filename)
|
is_mp3 = kind.extension == "mp3"
|
||||||
|
|
||||||
|
filenames.append(final_filename)
|
||||||
|
|
||||||
|
if audio and is_mp3:
|
||||||
|
self.tag_file(final_filename, message)
|
||||||
|
|
||||||
return filenames
|
return filenames
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -205,14 +251,14 @@ class DelojzaBot:
|
||||||
reply += ' to "' + hashtag + '"'
|
reply += ' to "' + hashtag + '"'
|
||||||
reply += '...'
|
reply += '...'
|
||||||
|
|
||||||
extract = False
|
audio = False
|
||||||
if any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) and download_fn != self.download_raw:
|
if any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) and download_fn != self.download_raw:
|
||||||
extract = True
|
audio = True
|
||||||
reply += ' (And also guessing you want to extract the audio)'
|
reply += ' (And also guessing you want to extract the audio)'
|
||||||
|
|
||||||
message.reply_text(reply)
|
message.reply_text(reply)
|
||||||
|
|
||||||
filenames = download_fn(urls, hashtag or '.', message.date, message, extract=extract, filename=filename)
|
filenames = download_fn(urls, hashtag or '.', message.date, message, audio=audio, filename=filename)
|
||||||
if hashtag == 'TUMBLR' and self.client:
|
if hashtag == 'TUMBLR' and self.client:
|
||||||
message.reply_text('(btw, queueing to tumblr)')
|
message.reply_text('(btw, queueing to tumblr)')
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
|
@ -423,6 +469,7 @@ if __name__ == '__main__':
|
||||||
delojza = DelojzaBot(config.get('delojza', 'tg_api_key'),
|
delojza = DelojzaBot(config.get('delojza', 'tg_api_key'),
|
||||||
config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")),
|
config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")),
|
||||||
tmp_dir=config.get('delojza', 'tmp_dir', fallback="/var/tmp"),
|
tmp_dir=config.get('delojza', 'tmp_dir', fallback="/var/tmp"),
|
||||||
|
acoustid_key=config.get('delojza', 'acoustid_api_key'),
|
||||||
tumblr_keys=(config.get('tumblr', 'consumer_key'),
|
tumblr_keys=(config.get('tumblr', 'consumer_key'),
|
||||||
config.get('tumblr', 'consumer_secret'),
|
config.get('tumblr', 'consumer_secret'),
|
||||||
config.get('tumblr', 'oauth_key'),
|
config.get('tumblr', 'oauth_key'),
|
||||||
|
|
|
@ -3,5 +3,6 @@ youtube-dl
|
||||||
requests
|
requests
|
||||||
filetype
|
filetype
|
||||||
mutagen
|
mutagen
|
||||||
markovify
|
pyacoustid
|
||||||
pytumblr
|
pytumblr
|
||||||
|
markovify
|
|
@ -5,6 +5,7 @@
|
||||||
# pip-compile
|
# pip-compile
|
||||||
#
|
#
|
||||||
asn1crypto==0.24.0 # via cryptography
|
asn1crypto==0.24.0 # via cryptography
|
||||||
|
audioread==2.1.6 # via pyacoustid
|
||||||
certifi==2019.3.9 # via python-telegram-bot, requests
|
certifi==2019.3.9 # via python-telegram-bot, requests
|
||||||
cffi==1.12.3 # via cryptography
|
cffi==1.12.3 # via cryptography
|
||||||
chardet==3.0.4 # via requests
|
chardet==3.0.4 # via requests
|
||||||
|
@ -15,6 +16,7 @@ idna==2.8 # via requests
|
||||||
markovify==0.7.1
|
markovify==0.7.1
|
||||||
mutagen==1.42.0
|
mutagen==1.42.0
|
||||||
oauthlib==3.0.1 # via requests-oauthlib
|
oauthlib==3.0.1 # via requests-oauthlib
|
||||||
|
pyacoustid==1.1.5
|
||||||
pycparser==2.19 # via cffi
|
pycparser==2.19 # via cffi
|
||||||
python-telegram-bot==11.1.0
|
python-telegram-bot==11.1.0
|
||||||
pytumblr==0.0.8
|
pytumblr==0.0.8
|
||||||
|
|
Loading…
Reference in a new issue