diff --git a/.gitignore b/.gitignore index 0521a6d..0838922 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -out +delojza.ini downloaded.lst delojza.log initial.txt \ No newline at end of file diff --git a/delojza.ini b/delojza.ini new file mode 100644 index 0000000..6094130 --- /dev/null +++ b/delojza.ini @@ -0,0 +1,8 @@ +[delojza] +tg_api_key = ***REMOVED*** + +[tumblr] +consumer_key = ***REMOVED*** +consumer_secret = ***REMOVED*** +oauth_key = ***REMOVED*** +oauth_secret = ***REMOVED*** \ No newline at end of file diff --git a/delojza.py b/delojza.py index d73c85d..ab004f0 100755 --- a/delojza.py +++ b/delojza.py @@ -1,10 +1,12 @@ #!/usr/bin/env python3 + import errno import logging import os import re import shutil import sys +from configparser import ConfigParser from glob import glob import filetype @@ -15,249 +17,260 @@ import youtube_dl from telegram import MessageEntity from telegram.ext import Updater, CommandHandler, MessageHandler, Filters -DIR = os.path.dirname(os.path.realpath(__file__)) -TMP_DIR = '/var/tmp' -OUT_DIR = DIR + '/out' - -logging.basicConfig(level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') -logger = logging.getLogger("kunsax") - -client = pytumblr.TumblrRestClient( - '***REMOVED***', - '***REMOVED***', - '***REMOVED***', - '***REMOVED***' -) - -markov = None - - -def add_to_corpus(text): - global markov - text = text.lower() - new_sentence = markovify.NewlineText(text) - markov = markovify.combine([markov, new_sentence]) - with open("initial.txt", 'a') as f: - f.write(text + '\n') - - -def datestr(date): - return date.strftime("%Y-%m-%d@%H%M") - def mkdir_p(path): try: os.makedirs(path) - except OSError as exc: # Python >2.5 + except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise -def ytdl_has(url): - ies = youtube_dl.extractor.gen_extractors() - for ie in ies: - if ie.suitable(url) and ie.IE_NAME != 'generic' \ - and '/channel/' not in url: - # Site has dedicated extractor - return True - return False +def datestr(date): + return date.strftime("%Y-%m-%d@%H%M") -def download_ydl(urls, subdir, date, extract=False, filename=None): - ydl_opts = { - 'noplaylist': True, - 'restrictfilenames': True, - 'outtmpl': TMP_DIR + '/' + datestr(date) + '__%(title)s__%(id)s.%(ext)s' - } - if extract: - ydl_opts['format'] = 'bestaudio' - # ydl_opts['postprocessors'] = [{ - # 'key': 'FFmpegExtractAudio', - # 'preferredcodec': 'wav' - # }] - with youtube_dl.YoutubeDL(ydl_opts) as ydl: - ydl.download(urls) - out_dir = OUT_DIR + '/' + subdir + '/' - for filename in map(ydl.prepare_filename, map(ydl.extract_info, urls)): - globbeds = glob(os.path.splitext(filename)[0] + '.*') - for globbed in globbeds: - logger.info("Moving %s to %s..." % (globbed, out_dir)) - shutil.move(globbed, out_dir) - return [] +class DelojzaBot: + def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', tumblr_keys=None, markov=None): + self.logger = logging.getLogger("kunsax") + self.out_dir = out_dir + self.logger.debug('OUT_DIR: ' + out_dir) + self.tmp_dir = tmp_dir + self.logger.debug('TMP_DIR: ' + tmp_dir) + self.markov = markov -def download_raw(urls, subdir, date, extract=False, filename=None): - filenames = [] - for url in urls: - local_filename = OUT_DIR + '/' + subdir + '/' + "%s__%s" % (datestr(date), filename or url.split('/')[-1]) - r = requests.get(url, stream=True) - with open(local_filename, 'wb') as f: - for chunk in r.iter_content(chunk_size=1024): - if chunk: - f.write(chunk) - if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]): - kind = filetype.guess(local_filename) - if kind is None: - logger.error("File has no extension and could not be determined!") - else: - logger.info('Moving file without extension... %s?' % kind.extension) - shutil.move(local_filename, local_filename + '.' + kind.extension) - filenames.append(local_filename) - return filenames + self.updater = Updater(tg_api_key) + dp = self.updater.dispatcher + dp.add_handler(CommandHandler("start", self.tg_start)) + dp.add_error_handler(self.tg_error) -last_hashtag = None + dp.add_handler(MessageHandler(Filters.entity(MessageEntity.URL), self.handle_url)) + dp.add_handler( + MessageHandler( + Filters.photo | Filters.video | Filters.video_note | Filters.audio | Filters.voice | Filters.document, + self.handle_rest)) + dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), self.handle_hashtag)) + dp.add_handler(MessageHandler(Filters.text, self.handle_text)) + if tumblr_keys: + self.client = pytumblr.TumblrRestClient(*tumblr_keys) -def get_first_hashtag(message): - global last_hashtag - hashtags = list(map(message.parse_entity, - list(filter(lambda e: e.type == 'hashtag', message.entities)))) - hashtags += list(map(message.parse_caption_entity, - list(filter(lambda e: e.type == 'hashtag', message.caption_entities)))) - if len(hashtags) == 0: - if last_hashtag is not None and last_hashtag[0] == message.from_user: - prehashtag = last_hashtag[1] - last_hashtag = None - else: - return None - else: - prehashtag = hashtags[0] - hashtag = prehashtag[1:].upper() - if "PRAS" in hashtag: - hashtag = "PRAS" - return hashtag + self.last_hashtag = None + @staticmethod + def ytdl_can(url): + ies = youtube_dl.extractor.gen_extractors() + for ie in ies: + if ie.suitable(url) and ie.IE_NAME != 'generic' \ + and '/channel/' not in url: + # Site has dedicated extractor + return True + return False -def handle_hashtag(bot, update): - global last_hashtag - hashtags = list(map(update.message.parse_entity, - list(filter(lambda e: e.type == 'hashtag', update.message.entities)))) - if len(hashtags) > 0: - last_hashtag = (update.message.from_user, hashtags[0]) + def download_ytdl(self, urls, subdir, date, extract=False, filename=None): + ydl_opts = { + 'noplaylist': True, + 'restrictfilenames': True, + 'outtmpl': os.path.join(self.tmp_dir, datestr(date), '__%(title)s__%(id)s.%(ext)s') # HOW? + } + if extract: + ydl_opts['format'] = 'bestaudio' + # ydl_opts['postprocessors'] = [{ + # 'key': 'FFmpegExtractAudio', + # 'preferredcodec': 'wav' + # }] + with youtube_dl.YoutubeDL(ydl_opts) as ydl: + ydl.download(urls) + out_dir = os.path.join(self.out_dir, subdir) + for filename in map(ydl.prepare_filename, map(ydl.extract_info, urls)): + globbeds = glob(os.path.splitext(filename)[0] + '.*') + for globbed in globbeds: + self.logger.info("Moving %s to %s..." % (globbed, out_dir)) + shutil.move(globbed, out_dir) + return [] - -# noinspection PyBroadException -def handle(urls, message, download, tumblr=False, filename=None): - try: - hashtag = get_first_hashtag(message) - if hashtag is None: - logger.info("Ignoring %s due to no hashtag present..." % urls) - return - - logger.info("Downloading %s" % urls) - - reply = 'Downloading' - if hashtag: - mkdir_p(OUT_DIR + '/' + hashtag) - reply += ' to "' + hashtag + '"' - reply += '...' - - extract = False - if hashtag in ('AUDIO', 'RADIO') and download != download_raw: - extract = True - reply += ' (And also guessing you want to extract the audio)' - message.reply_text(reply) - filenames = download(urls, - hashtag or '.', message.date, - extract=extract, filename=filename) - if hashtag == 'TUMBLR': - message.reply_text('(btw, queueing to tumblr)') - for filename in filenames: - client.create_photo('kunsaxan', state="queue", data=filename) + def download_raw(self, urls, subdir, date, extract=False, filename=None): + filenames = [] + for url in urls: + local_filename = os.path.join(self.out_dir, subdir, + "%s__%s" % (datestr(date), filename or url.split('/')[-1])) + r = requests.get(url, stream=True) + with open(local_filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]): + kind = filetype.guess(local_filename) + if kind is None: + self.logger.error("File has no extension and could not be determined!") + else: + self.logger.info('Moving file without extension... %s?' % kind.extension) + shutil.move(local_filename, local_filename + '.' + kind.extension) + filenames.append(local_filename) return filenames - except: - _, exc_value, __ = sys.exc_info() - if "Timed out" not in str(exc_value): - message.reply_text("Something is FUCKED: %s" % exc_value) + + def get_first_hashtag(self, message): + hashtags = list(map(message.parse_entity, + list(filter(lambda e: e.type == 'hashtag', message.entities)))) + hashtags += list(map(message.parse_caption_entity, + list(filter(lambda e: e.type == 'hashtag', message.caption_entities)))) + if len(hashtags) == 0: + if self.last_hashtag is not None and self.last_hashtag[0] == message.from_user: + prehashtag = self.last_hashtag[1] + self.last_hashtag = None + else: + return None + else: + prehashtag = hashtags[0] + hashtag = prehashtag[1:].upper() + if "PRAS" in hashtag: + hashtag = "PRAS" + return hashtag + + def handle_hashtag(self, bot, update): + hashtags = list(map(update.message.parse_entity, + list(filter(lambda e: e.type == 'hashtag', update.message.entities)))) + if len(hashtags) > 0: + self.last_hashtag = (update.message.from_user, hashtags[0]) + + # noinspection PyBroadException + def handle(self, urls, message, download, tumblr=False, filename=None): + try: + hashtag = self.get_first_hashtag(message) + if hashtag is None: + self.logger.info("Ignoring %s due to no hashtag present..." % urls) + return + + self.logger.info("Downloading %s under '%s'" % (urls, hashtag)) + + reply = 'Downloading' + if hashtag: + mkdir_p(os.path.join(self.out_dir, hashtag)) + reply += ' to "' + hashtag + '"' + reply += '...' + + extract = False + if hashtag in ('AUDIO', 'RADIO') and download != self.download_raw: + extract = True + reply += ' (And also guessing you want to extract the audio)' + message.reply_text(reply) + filenames = download(urls, + hashtag or '.', message.date, + extract=extract, filename=filename) + if hashtag == 'TUMBLR': + message.reply_text('(btw, queueing to tumblr)') + for filename in filenames: + self.client.create_photo('kunsaxan', state="queue", data=filename) + return filenames + except: + _, exc_value, __ = sys.exc_info() + if "Timed out" not in str(exc_value): + message.reply_text("Something is FUCKED: %s" % exc_value) + + def handle_url(self, bot, update): + ytdl_urls = list(filter(self.ytdl_can, + map(lambda e: update.message.parse_entity(e), + filter(lambda e: e.type == 'url', + update.message.entities)))) + if len(ytdl_urls) > 0: + self.handle(ytdl_urls, update.message, self.download_ytdl) + + # noinspection PyBroadException + def handle_rest(self, bot, update): + file, filename, tumblr = None, None, False + if len(update.message.photo) > 0: + photo = max(update.message.photo, key=lambda p: p.width) + file = photo.file_id + tumblr = True + elif update.message.document is not None: + filename = update.message.document.file_name + file = update.message.document.file_id + elif update.message.audio is not None: + filename = update.message.audio.title + file = update.message.audio.file_id + elif update.message.video is not None: + file = update.message.video.file_id + elif update.message.video_note is not None: + file = update.message.video_note.file_id + elif update.message.voice is not None: + file = update.message.voice.file_id + + if file is not None: + url = bot.getFile(file).file_path + self.handle([url], update.message, self.download_raw, tumblr=tumblr, filename=filename) + + def handle_text(self, bot, update): + self.markov.add_to_corpus(update.message.text) + + def tg_start(self, bot, update): + update.message.reply_text(self.markov.make_sentence()) + + def tg_error(self, bot, update, error): + self.logger.error(error) + if "Timed out" in str(error): + if update is not None: + update.message.reply_text(self.markov.make_sentence(tries=100) or "Mmmm, I like it...") + self.handle_rest(bot, update) + else: + if update is not None: + update.message.reply_text("Something is fucked: %s" % error) + + def run_idle(self): + self.updater.start_polling() + self.logger.info("Started Telegram bot...") + self.updater.idle() -def handle_url(bot, update): - ytdl_urls = list(filter(ytdl_has, - map(lambda e: update.message.parse_entity(e), - filter(lambda e: e.type == 'url', - update.message.entities)))) - if len(ytdl_urls) > 0: - handle(ytdl_urls, update.message, download_ydl) +class MarkovBlabberer: + def __init__(self, filepath): + self.logger = logging.getLogger('markov') + self.filepath = filepath + with open(filepath) as f: + text = f.read() + self.markov = markovify.NewlineText(text.lower()) + self.logger.info("Sentence of the day: " + self.make_sentence()) -# noinspection PyBroadException -def handle_rest(bot, update): - file, filename, tumblr = None, None, False - if len(update.message.photo) > 0: - photo = max(update.message.photo, key=lambda p: p.width) - file = photo.file_id - tumblr = True - elif update.message.document is not None: - filename = update.message.document.file_name - file = update.message.document.file_id - elif update.message.audio is not None: - filename = update.message.audio.title - file = update.message.audio.file_id - elif update.message.video is not None: - file = update.message.video.file_id - elif update.message.video_note is not None: - file = update.message.video_note.file_id - elif update.message.voice is not None: - file = update.message.voice.file_id + def make_sentence(self, tries=100): + return self.markov.make_sentence(tries=tries) - if file is not None: - url = bot.getFile(file).file_path - handle([url], update.message, download_raw, tumblr=tumblr, filename=filename) - - -def handle_text(bot, update): - add_to_corpus(update.message.text) - - -def start(bot, update): - update.message.reply_text(markov.make_sentence()) - - -def error(bot, update, error): - logger.error(error) - if "Timed out" in str(error): - if update is not None: - update.message.reply_text(markov.make_sentence(tries=100) or "Mmmm, I like it...") - handle_rest(bot, update) - else: - if update is not None: - update.message.reply_text("Something is fucked: %s" % error) - - -def main(): - global markov - - with open("initial.txt") as f: - text = f.read() - markov = markovify.NewlineText(text.lower()) - logger.info("Sentence of the day: " + markov.make_sentence()) - - updater = Updater("***REMOVED***") - - dp = updater.dispatcher - - dp.add_handler(CommandHandler("start", start)) - - dp.add_error_handler(error) - - dp.add_handler(MessageHandler(Filters.entity(MessageEntity.URL), handle_url)) - dp.add_handler( - MessageHandler( - Filters.photo | Filters.video | Filters.video_note | Filters.audio | Filters.voice | Filters.document, - handle_rest)) - dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), handle_hashtag)) - dp.add_handler(MessageHandler(Filters.text, handle_text)) - - updater.start_polling() - - logger.info("Started Telegram bot...") - - updater.idle() + def add_to_corpus(self, text): + text = text.lower() + new_sentence = markovify.NewlineText(text) + self.markov = markovify.combine([self.markov, new_sentence]) + with open(self.filepath, 'a') as f: + f.write(text + '\n') if __name__ == '__main__': - main() + logging.basicConfig(level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') + + _DIR_ = os.path.dirname(os.path.realpath(__file__)) + CONFIG_PATHS = ['/etc/delojza/delojza.ini', + os.path.join(os.getenv("HOME"), ".config/delojza/delojza.ini"), + os.path.join(_DIR_, "delojza.ini")] + + config = ConfigParser() + try: + CONF_FILE = next(conf_path for conf_path in CONFIG_PATHS if os.path.isfile(conf_path)) + config.read(CONF_FILE) + except StopIteration: + logging.error("No config file found, stopping.") + sys.exit(-1) + + markov = MarkovBlabberer("initial.txt") + + delojza = DelojzaBot(config.get('delojza', 'tg_api_key'), + config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")), + tmp_dir=config.get('delojza', 'tmp_dir', fallback="/var/tmp"), + tumblr_keys=(config.get('tumblr', 'consumer_key'), + config.get('tumblr', 'consumer_secret'), + config.get('tumblr', 'oauth_key'), + config.get('tumblr', 'oauth_secret')), + markov=None) + delojza.run_idle() diff --git a/robot.sh b/robot.sh deleted file mode 100755 index 2d8e617..0000000 --- a/robot.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd ${DIR} -./update.sh & -source ./.venv/bin/activate -python3 delojza.py 2>&1 |tee -a delojza.log diff --git a/update.sh b/update.sh deleted file mode 100755 index d89e9f4..0000000 --- a/update.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -while :;do - NUM=$( grep 'INFO - Downloading' delojza.log|wc -l) - echo $NUM - curl -s 'https://kunsaxan.sdbs.cz/counter.php?key=delojza7953713b19ef2ea055156c8dc175bf80&count='$NUM - sleep 300; -done