From bfbc225d0e89a46adb59520a39268a9d84cb5be5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Ml=C3=A1dek?= Date: Wed, 22 May 2019 15:21:25 +0200 Subject: [PATCH] sanitize filenames --- delojza.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/delojza.py b/delojza.py index 457d328..8b0366c 100755 --- a/delojza.py +++ b/delojza.py @@ -92,6 +92,12 @@ class DelojzaBot: return True return False + @staticmethod + def sanitize(filepath): + if filepath is None: + return None + return re.sub(r'[^\w.-]', '_', filepath) + def tag_file(self, filepath, message, info=None): if info is None: info = {} @@ -162,7 +168,7 @@ class DelojzaBot: id3.save() # noinspection PyUnusedLocal - def download_ytdl(self, urls, out_path, date, message, audio=False, filename=None): + def download_ytdl(self, urls, out_path, date, message, audio=False, filetitle=None): ydl_opts = { 'noplaylist': True, 'restrictfilenames': True, @@ -179,7 +185,7 @@ class DelojzaBot: with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download(urls) for info in [ydl.extract_info(url, download=False) for url in urls]: - filename = ydl.prepare_filename(info) + filename = self.sanitize(ydl.prepare_filename(info)) globbeds = glob(os.path.splitext(filename)[0] + '.*') for globbed in globbeds: if globbed.endswith("mp3"): @@ -189,10 +195,11 @@ class DelojzaBot: filenames.append(dest) return filenames - def download_raw(self, urls, out_path, date, message, audio=False, filename=None): + def download_raw(self, urls, out_path, date, message, audio=False, filetitle=None): filenames = [] for url in urls: - local_filename = os.path.join(out_path, "%s__%s" % (datestr(date), filename or url.split('/')[-1])) + local_filename = os.path.join(out_path, "{}__{}".format(datestr(date), + self.sanitize(filetitle or url.split('/')[-1]))) final_filename = local_filename is_mp3 = local_filename.endswith("mp3") @@ -255,7 +262,7 @@ class DelojzaBot: self.last_hashtags = update.message.from_user, update.message.chat, datetime.now(), hashtags # noinspection PyBroadException - def handle(self, urls, message, hashtags, download_fn, filename=None): + def handle(self, urls, message, hashtags, download_fn, filetitle=None): try: if len(hashtags) == 0: self.logger.info("Ignoring %s due to no hashtag present..." % urls) @@ -281,7 +288,7 @@ class DelojzaBot: reply += ' (And also guessing you want to extract the audio)' message.reply_text(reply) - filenames = download_fn(urls, out_path, message.date, message, audio=audio, filename=filename) + filenames = download_fn(urls, out_path, message.date, message, audio=audio, filetitle=filetitle) cmd_hashtag = hashtags[0] @@ -290,8 +297,8 @@ class DelojzaBot: now = cmd_hashtag == 'TUMBLR_NOW' reply = '(btw, {})'.format("***FIRING TO TUMBLR RIGHT AWAY***" if now else "queueing to tumblr") message.reply_text(reply, parse_mode=telegram.ParseMode.MARKDOWN) - for filename in filenames: - response = self.tumblr_client.create_photo(self.tumblr_name, data=filename, + for filetitle in filenames: + response = self.tumblr_client.create_photo(self.tumblr_name, data=filetitle, state="published" if now else "queue") if 'id' in response: tumblr_ids.append(response['id']) @@ -327,15 +334,15 @@ class DelojzaBot: self.handle_tg_message(update.message, bot, self.get_hashtags(update.message)) def handle_tg_message(self, message, bot, hashtag): - file, filename, tumblr = None, None, False + file, filetitle, tumblr = None, None, False if len(message.photo) > 0: photo = max(message.photo, key=lambda p: p.width) file = photo.file_id elif message.document is not None: - filename = message.document.file_name + filetitle = message.document.file_name file = message.document.file_id elif message.audio is not None: - filename = message.audio.title + filetitle = message.audio.title file = message.audio.file_id elif message.video is not None: file = message.video.file_id @@ -346,7 +353,7 @@ class DelojzaBot: if file is not None: url = bot.getFile(file).file_path - self.handle([url], message, hashtag, self.download_raw, filename=filename) + self.handle([url], message, hashtag, self.download_raw, filetitle=filetitle) def tg_handle_text(self, _, update): if self.markov: