sanitize filenames

This commit is contained in:
Tomáš Mládek 2019-05-22 15:21:25 +02:00
parent 4059ce6366
commit bfbc225d0e

View file

@ -92,6 +92,12 @@ class DelojzaBot:
return True return True
return False return False
@staticmethod
def sanitize(filepath):
if filepath is None:
return None
return re.sub(r'[^\w.-]', '_', filepath)
def tag_file(self, filepath, message, info=None): def tag_file(self, filepath, message, info=None):
if info is None: if info is None:
info = {} info = {}
@ -162,7 +168,7 @@ class DelojzaBot:
id3.save() id3.save()
# noinspection PyUnusedLocal # noinspection PyUnusedLocal
def download_ytdl(self, urls, out_path, date, message, audio=False, filename=None): def download_ytdl(self, urls, out_path, date, message, audio=False, filetitle=None):
ydl_opts = { ydl_opts = {
'noplaylist': True, 'noplaylist': True,
'restrictfilenames': True, 'restrictfilenames': True,
@ -179,7 +185,7 @@ class DelojzaBot:
with youtube_dl.YoutubeDL(ydl_opts) as ydl: with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(urls) ydl.download(urls)
for info in [ydl.extract_info(url, download=False) for url in urls]: for info in [ydl.extract_info(url, download=False) for url in urls]:
filename = ydl.prepare_filename(info) filename = self.sanitize(ydl.prepare_filename(info))
globbeds = glob(os.path.splitext(filename)[0] + '.*') globbeds = glob(os.path.splitext(filename)[0] + '.*')
for globbed in globbeds: for globbed in globbeds:
if globbed.endswith("mp3"): if globbed.endswith("mp3"):
@ -189,10 +195,11 @@ class DelojzaBot:
filenames.append(dest) filenames.append(dest)
return filenames return filenames
def download_raw(self, urls, out_path, date, message, audio=False, filename=None): def download_raw(self, urls, out_path, date, message, audio=False, filetitle=None):
filenames = [] filenames = []
for url in urls: for url in urls:
local_filename = os.path.join(out_path, "%s__%s" % (datestr(date), filename or url.split('/')[-1])) local_filename = os.path.join(out_path, "{}__{}".format(datestr(date),
self.sanitize(filetitle or url.split('/')[-1])))
final_filename = local_filename final_filename = local_filename
is_mp3 = local_filename.endswith("mp3") is_mp3 = local_filename.endswith("mp3")
@ -255,7 +262,7 @@ class DelojzaBot:
self.last_hashtags = update.message.from_user, update.message.chat, datetime.now(), hashtags self.last_hashtags = update.message.from_user, update.message.chat, datetime.now(), hashtags
# noinspection PyBroadException # noinspection PyBroadException
def handle(self, urls, message, hashtags, download_fn, filename=None): def handle(self, urls, message, hashtags, download_fn, filetitle=None):
try: try:
if len(hashtags) == 0: if len(hashtags) == 0:
self.logger.info("Ignoring %s due to no hashtag present..." % urls) self.logger.info("Ignoring %s due to no hashtag present..." % urls)
@ -281,7 +288,7 @@ class DelojzaBot:
reply += ' (And also guessing you want to extract the audio)' reply += ' (And also guessing you want to extract the audio)'
message.reply_text(reply) message.reply_text(reply)
filenames = download_fn(urls, out_path, message.date, message, audio=audio, filename=filename) filenames = download_fn(urls, out_path, message.date, message, audio=audio, filetitle=filetitle)
cmd_hashtag = hashtags[0] cmd_hashtag = hashtags[0]
@ -290,8 +297,8 @@ class DelojzaBot:
now = cmd_hashtag == 'TUMBLR_NOW' now = cmd_hashtag == 'TUMBLR_NOW'
reply = '(btw, {})'.format("***FIRING TO TUMBLR RIGHT AWAY***" if now else "queueing to tumblr") reply = '(btw, {})'.format("***FIRING TO TUMBLR RIGHT AWAY***" if now else "queueing to tumblr")
message.reply_text(reply, parse_mode=telegram.ParseMode.MARKDOWN) message.reply_text(reply, parse_mode=telegram.ParseMode.MARKDOWN)
for filename in filenames: for filetitle in filenames:
response = self.tumblr_client.create_photo(self.tumblr_name, data=filename, response = self.tumblr_client.create_photo(self.tumblr_name, data=filetitle,
state="published" if now else "queue") state="published" if now else "queue")
if 'id' in response: if 'id' in response:
tumblr_ids.append(response['id']) tumblr_ids.append(response['id'])
@ -327,15 +334,15 @@ class DelojzaBot:
self.handle_tg_message(update.message, bot, self.get_hashtags(update.message)) self.handle_tg_message(update.message, bot, self.get_hashtags(update.message))
def handle_tg_message(self, message, bot, hashtag): def handle_tg_message(self, message, bot, hashtag):
file, filename, tumblr = None, None, False file, filetitle, tumblr = None, None, False
if len(message.photo) > 0: if len(message.photo) > 0:
photo = max(message.photo, key=lambda p: p.width) photo = max(message.photo, key=lambda p: p.width)
file = photo.file_id file = photo.file_id
elif message.document is not None: elif message.document is not None:
filename = message.document.file_name filetitle = message.document.file_name
file = message.document.file_id file = message.document.file_id
elif message.audio is not None: elif message.audio is not None:
filename = message.audio.title filetitle = message.audio.title
file = message.audio.file_id file = message.audio.file_id
elif message.video is not None: elif message.video is not None:
file = message.video.file_id file = message.video.file_id
@ -346,7 +353,7 @@ class DelojzaBot:
if file is not None: if file is not None:
url = bot.getFile(file).file_path url = bot.getFile(file).file_path
self.handle([url], message, hashtag, self.download_raw, filename=filename) self.handle([url], message, hashtag, self.download_raw, filetitle=filetitle)
def tg_handle_text(self, _, update): def tg_handle_text(self, _, update):
if self.markov: if self.markov: