download into subdirectories when multiple hashtags present (fix #2)
This commit is contained in:
parent
3368f27588
commit
723008e451
1 changed files with 49 additions and 47 deletions
96
delojza.py
96
delojza.py
|
@ -42,7 +42,7 @@ class DelojzaBot:
|
||||||
def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', acoustid_key=None, tumblr_keys=None, markov=None):
|
def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', acoustid_key=None, tumblr_keys=None, markov=None):
|
||||||
self.logger = logging.getLogger("delojza")
|
self.logger = logging.getLogger("delojza")
|
||||||
|
|
||||||
self.out_dir = out_dir
|
self.out_dir = os.path.abspath(out_dir)
|
||||||
self.logger.debug('OUT_DIR: ' + out_dir)
|
self.logger.debug('OUT_DIR: ' + out_dir)
|
||||||
self.tmp_dir = tmp_dir
|
self.tmp_dir = tmp_dir
|
||||||
self.logger.debug('TMP_DIR: ' + tmp_dir)
|
self.logger.debug('TMP_DIR: ' + tmp_dir)
|
||||||
|
@ -72,7 +72,7 @@ class DelojzaBot:
|
||||||
self.client = pytumblr.TumblrRestClient(*tumblr_keys)
|
self.client = pytumblr.TumblrRestClient(*tumblr_keys)
|
||||||
|
|
||||||
self.last_downloaded = []
|
self.last_downloaded = []
|
||||||
self.last_hashtag = None
|
self.last_hashtags = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def ytdl_can(url):
|
def ytdl_can(url):
|
||||||
|
@ -153,7 +153,7 @@ class DelojzaBot:
|
||||||
id3.add(mutagen.id3.TPE1(encoding=3, text=artist))
|
id3.add(mutagen.id3.TPE1(encoding=3, text=artist))
|
||||||
id3.save()
|
id3.save()
|
||||||
|
|
||||||
def download_ytdl(self, urls, subdir, date, message, audio=False, filename=None):
|
def download_ytdl(self, urls, out_path, date, message, audio=False, filename=None):
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
'noplaylist': True,
|
'noplaylist': True,
|
||||||
'restrictfilenames': True,
|
'restrictfilenames': True,
|
||||||
|
@ -169,23 +169,21 @@ class DelojzaBot:
|
||||||
filenames = []
|
filenames = []
|
||||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||||
ydl.download(urls)
|
ydl.download(urls)
|
||||||
out_dir = os.path.join(self.out_dir, subdir)
|
|
||||||
for info in [ydl.extract_info(url, download=False) for url in urls]:
|
for info in [ydl.extract_info(url, download=False) for url in urls]:
|
||||||
filename = ydl.prepare_filename(info)
|
filename = ydl.prepare_filename(info)
|
||||||
globbeds = glob(os.path.splitext(filename)[0] + '.*')
|
globbeds = glob(os.path.splitext(filename)[0] + '.*')
|
||||||
for globbed in globbeds:
|
for globbed in globbeds:
|
||||||
if globbed.endswith("mp3"):
|
if globbed.endswith("mp3"):
|
||||||
self.tag_file(globbed, message, info=info)
|
self.tag_file(globbed, message, info=info)
|
||||||
self.logger.info("Moving %s to %s..." % (globbed, out_dir))
|
self.logger.info("Moving %s to %s..." % (globbed, out_path))
|
||||||
dest = shutil.move(globbed, out_dir)
|
dest = shutil.move(globbed, out_path)
|
||||||
filenames.append(dest)
|
filenames.append(dest)
|
||||||
return filenames
|
return filenames
|
||||||
|
|
||||||
def download_raw(self, urls, subdir, date, message, audio=False, filename=None):
|
def download_raw(self, urls, out_path, date, message, audio=False, filename=None):
|
||||||
filenames = []
|
filenames = []
|
||||||
for url in urls:
|
for url in urls:
|
||||||
local_filename = os.path.join(self.out_dir, subdir,
|
local_filename = os.path.join(out_path, "%s__%s" % (datestr(date), filename or url.split('/')[-1]))
|
||||||
"%s__%s" % (datestr(date), filename or url.split('/')[-1]))
|
|
||||||
final_filename = local_filename
|
final_filename = local_filename
|
||||||
is_mp3 = local_filename.endswith("mp3")
|
is_mp3 = local_filename.endswith("mp3")
|
||||||
|
|
||||||
|
@ -212,61 +210,61 @@ class DelojzaBot:
|
||||||
return filenames
|
return filenames
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def extract_first_hashtag(message):
|
def extract_hashtags(message):
|
||||||
hashtags = list(map(message.parse_entity,
|
hashtags = list(map(message.parse_entity,
|
||||||
list(filter(lambda e: e.type == 'hashtag', message.entities))))
|
list(filter(lambda e: e.type == 'hashtag', message.entities))))
|
||||||
hashtags += list(map(message.parse_caption_entity,
|
hashtags += list(map(message.parse_caption_entity,
|
||||||
list(filter(lambda e: e.type == 'hashtag', message.caption_entities))))
|
list(filter(lambda e: e.type == 'hashtag', message.caption_entities))))
|
||||||
if len(hashtags) > 0:
|
if len(hashtags) > 0:
|
||||||
hashtag = hashtags[0][1:].upper()
|
hashtags = [hashtag[1:].upper() for hashtag in hashtags]
|
||||||
if "PRAS" in hashtag:
|
for i, hashtag in enumerate(hashtags):
|
||||||
hashtag = "PRAS"
|
if "PRAS" in hashtag:
|
||||||
return hashtag
|
hashtags[i] = "PRAS"
|
||||||
|
return hashtags
|
||||||
|
|
||||||
def get_hashtag(self, message):
|
def get_hashtags(self, message):
|
||||||
hashtag = self.extract_first_hashtag(message)
|
hashtags = self.extract_hashtags(message)
|
||||||
if hashtag is None:
|
if len(hashtags) == 0:
|
||||||
if self.last_hashtag is not None and self.last_hashtag[0] == message.from_user:
|
if self.last_hashtags is not None and self.last_hashtags[0] == message.from_user:
|
||||||
hashtag = self.last_hashtag[1]
|
hashtags = self.last_hashtags[1]
|
||||||
self.last_hashtag = None
|
self.last_hashtags = None
|
||||||
return hashtag
|
return hashtags
|
||||||
|
|
||||||
def tg_handle_hashtag(self, bot, update):
|
def tg_handle_hashtag(self, bot, update):
|
||||||
hashtag = self.extract_first_hashtag(update.message)
|
hashtags = self.extract_hashtags(update.message)
|
||||||
|
|
||||||
if update.message.reply_to_message:
|
if update.message.reply_to_message:
|
||||||
self.handle_tg_message(update.message.reply_to_message, bot, hashtag)
|
self.handle_tg_message(update.message.reply_to_message, bot, hashtags)
|
||||||
self.handle_urls(update.message.reply_to_message, hashtag)
|
self.handle_urls(update.message.reply_to_message, hashtags)
|
||||||
else:
|
else:
|
||||||
self.last_hashtag = (update.message.from_user, hashtag)
|
self.last_hashtags = (update.message.from_user, hashtags)
|
||||||
|
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
def handle(self, urls, message, hashtag, download_fn, filename=None):
|
def handle(self, urls, message, hashtags, download_fn, filename=None):
|
||||||
try:
|
try:
|
||||||
if hashtag is None:
|
if len(hashtags) == 0:
|
||||||
self.logger.info("Ignoring %s due to no hashtag present..." % urls)
|
self.logger.info("Ignoring %s due to no hashtag present..." % urls)
|
||||||
return
|
return
|
||||||
|
|
||||||
self.logger.info("Downloading %s under '%s'" % (urls, hashtag))
|
self.logger.info("Downloading %s under '%s'" % (urls, "/".join(hashtags)))
|
||||||
|
|
||||||
reply = 'Downloading'
|
out_path = os.path.join(self.out_dir, *hashtags)
|
||||||
if hashtag:
|
mkdir_p(out_path)
|
||||||
mkdir_p(os.path.join(self.out_dir, hashtag))
|
|
||||||
reply += ' to "' + hashtag + '"'
|
|
||||||
reply += '...'
|
|
||||||
|
|
||||||
audio = any([tag in hashtag for tag in ('AUDIO', 'RADIO')])
|
reply = 'Downloading to "{}"...'.format("/".join(hashtags))
|
||||||
|
|
||||||
|
audio = any([any([tag in hashtag for tag in ('AUDIO', 'RADIO')]) for hashtag in hashtags])
|
||||||
if audio and download_fn != self.download_raw:
|
if audio and download_fn != self.download_raw:
|
||||||
reply += ' (And also guessing you want to extract the audio)'
|
reply += ' (And also guessing you want to extract the audio)'
|
||||||
|
|
||||||
message.reply_text(reply)
|
message.reply_text(reply)
|
||||||
|
|
||||||
filenames = download_fn(urls, hashtag or '.', message.date, message, audio=audio, filename=filename)
|
filenames = download_fn(urls, out_path, message.date, message, audio=audio, filename=filename)
|
||||||
if hashtag == 'TUMBLR' and self.client:
|
if hashtags[0] == 'TUMBLR' and self.client:
|
||||||
message.reply_text('(btw, queueing to tumblr)')
|
message.reply_text('(btw, queueing to tumblr)')
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
self.client.create_photo('kunsaxan', state="queue", data=filename)
|
self.client.create_photo('kunsaxan', state="queue", data=filename)
|
||||||
elif hashtag == 'TUMBLR_NOW' and self.client:
|
elif hashtags[0] == 'TUMBLR_NOW' and self.client:
|
||||||
message.reply_text('(btw, ***FIRING TO TUMBLR RIGHT AWAY***)',
|
message.reply_text('(btw, ***FIRING TO TUMBLR RIGHT AWAY***)',
|
||||||
parse_mode=telegram.ParseMode.MARKDOWN)
|
parse_mode=telegram.ParseMode.MARKDOWN)
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
|
@ -279,23 +277,23 @@ class DelojzaBot:
|
||||||
message.reply_text("Something is FUCKED: %s" % exc_value)
|
message.reply_text("Something is FUCKED: %s" % exc_value)
|
||||||
|
|
||||||
def tg_handle_url(self, _, update):
|
def tg_handle_url(self, _, update):
|
||||||
self.handle_urls(update.message, self.get_hashtag(update.message))
|
self.handle_urls(update.message, self.get_hashtags(update.message))
|
||||||
|
|
||||||
def handle_urls(self, message, hashtag):
|
def handle_urls(self, message, hashtags):
|
||||||
urls = list(map(lambda e: message.parse_entity(e),
|
urls = list(map(lambda e: message.parse_entity(e),
|
||||||
filter(lambda e: e.type == 'url', message.entities)))
|
filter(lambda e: e.type == 'url', message.entities)))
|
||||||
ytdl_urls = [url for url in urls if self.ytdl_can(url)]
|
ytdl_urls = [url for url in urls if self.ytdl_can(url)]
|
||||||
normal_urls = [url for url in urls if not self.ytdl_can(url)]
|
normal_urls = [url for url in urls if not self.ytdl_can(url)]
|
||||||
if len(ytdl_urls) > 0:
|
if len(ytdl_urls) > 0:
|
||||||
self.handle(ytdl_urls, message, hashtag, self.download_ytdl)
|
self.handle(ytdl_urls, message, hashtags, self.download_ytdl)
|
||||||
if len(normal_urls) > 0:
|
if len(normal_urls) > 0:
|
||||||
image_urls = [url for url in normal_urls if "image" in requests.head(url).headers.get("Content-Type", "")]
|
image_urls = [url for url in normal_urls if "image" in requests.head(url).headers.get("Content-Type", "")]
|
||||||
if len(image_urls) > 0:
|
if len(image_urls) > 0:
|
||||||
self.handle(image_urls, message, hashtag, self.download_raw)
|
self.handle(image_urls, message, hashtags, self.download_raw)
|
||||||
|
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
def tg_handle_rest(self, bot, update):
|
def tg_handle_rest(self, bot, update):
|
||||||
self.handle_tg_message(update.message, bot, self.get_hashtag(update.message))
|
self.handle_tg_message(update.message, bot, self.get_hashtags(update.message))
|
||||||
|
|
||||||
def handle_tg_message(self, message, bot, hashtag):
|
def handle_tg_message(self, message, bot, hashtag):
|
||||||
file, filename, tumblr = None, None, False
|
file, filename, tumblr = None, None, False
|
||||||
|
@ -395,11 +393,15 @@ class DelojzaBot:
|
||||||
for file in self.last_downloaded:
|
for file in self.last_downloaded:
|
||||||
update.message.reply_text("Removing \"{}\"!".format(file[len(self.out_dir):]))
|
update.message.reply_text("Removing \"{}\"!".format(file[len(self.out_dir):]))
|
||||||
os.remove(file)
|
os.remove(file)
|
||||||
file_parent_dir = os.path.dirname(file)
|
parent_dir = os.path.dirname(file)
|
||||||
if len(os.listdir(file_parent_dir)) == 0:
|
while True:
|
||||||
hashtag = os.path.split(file_parent_dir)[1].upper()
|
if len(os.listdir(parent_dir)) == 0:
|
||||||
update.message.reply_text("Removing tag \"{}\" as it's empty...".format(hashtag))
|
update.message.reply_text("Removing directory \"{}\" as it's empty..."
|
||||||
os.rmdir(file_parent_dir)
|
.format(parent_dir[len(self.out_dir) + 1:]))
|
||||||
|
os.rmdir(parent_dir)
|
||||||
|
parent_dir = os.path.dirname(parent_dir)
|
||||||
|
if parent_dir == self.out_dir:
|
||||||
|
break
|
||||||
self.last_downloaded.clear()
|
self.last_downloaded.clear()
|
||||||
else:
|
else:
|
||||||
update.message.reply_text("Nothing to remove!")
|
update.message.reply_text("Nothing to remove!")
|
||||||
|
|
Loading…
Reference in a new issue