grande refactor -> Delojza now is a class

2019-04-17 18:05:17 +02:00 · 2019-04-17 18:05:17 +02:00 · c249de6e64
commit c249de6e64
parent 4ff94b2af6
5 changed files with 238 additions and 230 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,4 @@
-out
+delojza.ini
 downloaded.lst
 delojza.log
 initial.txt
--- a/delojza.ini
+++ b/delojza.ini
@ -0,0 +1,8 @@
+[delojza]
+tg_api_key = ***REMOVED***
+
+[tumblr]
+consumer_key = ***REMOVED***
+consumer_secret = ***REMOVED***
+oauth_key = ***REMOVED***
+oauth_secret = ***REMOVED***
--- a/delojza.py
+++ b/delojza.py
@ -1,10 +1,12 @@
 #!/usr/bin/env python3
+
 import errno
 import logging
 import os
 import re
 import shutil
 import sys
+from configparser import ConfigParser
 from glob import glob

 import filetype
@ -15,249 +17,260 @@ import youtube_dl
 from telegram import MessageEntity
 from telegram.ext import Updater, CommandHandler, MessageHandler, Filters

-DIR = os.path.dirname(os.path.realpath(__file__))
-TMP_DIR = '/var/tmp'
-OUT_DIR = DIR + '/out'
-
-logging.basicConfig(level=logging.INFO,
-                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-logger = logging.getLogger("kunsax")
-
-client = pytumblr.TumblrRestClient(
-    '***REMOVED***',
-    '***REMOVED***',
-    '***REMOVED***',
-    '***REMOVED***'
-)
-
-markov = None
-
-
-def add_to_corpus(text):
-    global markov
-    text = text.lower()
-    new_sentence = markovify.NewlineText(text)
-    markov = markovify.combine([markov, new_sentence])
-    with open("initial.txt", 'a') as f:
-        f.write(text + '\n')
-
-
-def datestr(date):
-    return date.strftime("%Y-%m-%d@%H%M")
-

 def mkdir_p(path):
    try:
        os.makedirs(path)
-    except OSError as exc:  # Python >2.5
+    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise


-def ytdl_has(url):
-    ies = youtube_dl.extractor.gen_extractors()
-    for ie in ies:
-        if ie.suitable(url) and ie.IE_NAME != 'generic' \
-                and '/channel/' not in url:
-            # Site has dedicated extractor
-            return True
-    return False
+def datestr(date):
+    return date.strftime("%Y-%m-%d@%H%M")


-def download_ydl(urls, subdir, date, extract=False, filename=None):
-    ydl_opts = {
-        'noplaylist': True,
-        'restrictfilenames': True,
-        'outtmpl': TMP_DIR + '/' + datestr(date) + '__%(title)s__%(id)s.%(ext)s'
-    }
-    if extract:
-        ydl_opts['format'] = 'bestaudio'
-        # ydl_opts['postprocessors'] = [{
-        #     'key': 'FFmpegExtractAudio',
-        #     'preferredcodec': 'wav'
-        # }]
-    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-        ydl.download(urls)
-        out_dir = OUT_DIR + '/' + subdir + '/'
-        for filename in map(ydl.prepare_filename, map(ydl.extract_info, urls)):
-            globbeds = glob(os.path.splitext(filename)[0] + '.*')
-            for globbed in globbeds:
-                logger.info("Moving %s to %s..." % (globbed, out_dir))
-                shutil.move(globbed, out_dir)
-    return []
+class DelojzaBot:
+    def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', tumblr_keys=None, markov=None):
+        self.logger = logging.getLogger("kunsax")

+        self.out_dir = out_dir
+        self.logger.debug('OUT_DIR: ' + out_dir)
+        self.tmp_dir = tmp_dir
+        self.logger.debug('TMP_DIR: ' + tmp_dir)
+        self.markov = markov

-def download_raw(urls, subdir, date, extract=False, filename=None):
-    filenames = []
-    for url in urls:
-        local_filename = OUT_DIR + '/' + subdir + '/' + "%s__%s" % (datestr(date), filename or url.split('/')[-1])
-        r = requests.get(url, stream=True)
-        with open(local_filename, 'wb') as f:
-            for chunk in r.iter_content(chunk_size=1024):
-                if chunk:
-                    f.write(chunk)
-        if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]):
-            kind = filetype.guess(local_filename)
-            if kind is None:
-                logger.error("File has no extension and could not be determined!")
-            else:
-                logger.info('Moving file without extension... %s?' % kind.extension)
-                shutil.move(local_filename, local_filename + '.' + kind.extension)
-        filenames.append(local_filename)
-    return filenames
+        self.updater = Updater(tg_api_key)
+        dp = self.updater.dispatcher

+        dp.add_handler(CommandHandler("start", self.tg_start))
+        dp.add_error_handler(self.tg_error)

-last_hashtag = None
+        dp.add_handler(MessageHandler(Filters.entity(MessageEntity.URL), self.handle_url))
+        dp.add_handler(
+            MessageHandler(
+                Filters.photo | Filters.video | Filters.video_note | Filters.audio | Filters.voice | Filters.document,
+                self.handle_rest))
+        dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), self.handle_hashtag))
+        dp.add_handler(MessageHandler(Filters.text, self.handle_text))

+        if tumblr_keys:
+            self.client = pytumblr.TumblrRestClient(*tumblr_keys)

-def get_first_hashtag(message):
-    global last_hashtag
-    hashtags = list(map(message.parse_entity,
-                        list(filter(lambda e: e.type == 'hashtag', message.entities))))
-    hashtags += list(map(message.parse_caption_entity,
-                         list(filter(lambda e: e.type == 'hashtag', message.caption_entities))))
-    if len(hashtags) == 0:
-        if last_hashtag is not None and last_hashtag[0] == message.from_user:
-            prehashtag = last_hashtag[1]
-            last_hashtag = None
-        else:
-            return None
-    else:
-        prehashtag = hashtags[0]
-    hashtag = prehashtag[1:].upper()
-    if "PRAS" in hashtag:
-        hashtag = "PRAS"
-    return hashtag
+        self.last_hashtag = None

+    @staticmethod
+    def ytdl_can(url):
+        ies = youtube_dl.extractor.gen_extractors()
+        for ie in ies:
+            if ie.suitable(url) and ie.IE_NAME != 'generic' \
+                    and '/channel/' not in url:
+                # Site has dedicated extractor
+                return True
+        return False

-def handle_hashtag(bot, update):
-    global last_hashtag
-    hashtags = list(map(update.message.parse_entity,
-                        list(filter(lambda e: e.type == 'hashtag', update.message.entities))))
-    if len(hashtags) > 0:
-        last_hashtag = (update.message.from_user, hashtags[0])
+    def download_ytdl(self, urls, subdir, date, extract=False, filename=None):
+        ydl_opts = {
+            'noplaylist': True,
+            'restrictfilenames': True,
+            'outtmpl': os.path.join(self.tmp_dir, datestr(date), '__%(title)s__%(id)s.%(ext)s')  # HOW?
+        }
+        if extract:
+            ydl_opts['format'] = 'bestaudio'
+            # ydl_opts['postprocessors'] = [{
+            #     'key': 'FFmpegExtractAudio',
+            #     'preferredcodec': 'wav'
+            # }]
+        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+            ydl.download(urls)
+            out_dir = os.path.join(self.out_dir, subdir)
+            for filename in map(ydl.prepare_filename, map(ydl.extract_info, urls)):
+                globbeds = glob(os.path.splitext(filename)[0] + '.*')
+                for globbed in globbeds:
+                    self.logger.info("Moving %s to %s..." % (globbed, out_dir))
+                    shutil.move(globbed, out_dir)
+        return []

-
-# noinspection PyBroadException
-def handle(urls, message, download, tumblr=False, filename=None):
-    try:
-        hashtag = get_first_hashtag(message)
-        if hashtag is None:
-            logger.info("Ignoring %s due to no hashtag present..." % urls)
-            return
-
-        logger.info("Downloading %s" % urls)
-
-        reply = 'Downloading'
-        if hashtag:
-            mkdir_p(OUT_DIR + '/' + hashtag)
-            reply += ' to "' + hashtag + '"'
-        reply += '...'
-
-        extract = False
-        if hashtag in ('AUDIO', 'RADIO') and download != download_raw:
-            extract = True
-            reply += ' (And also guessing you want to extract the audio)'
-        message.reply_text(reply)
-        filenames = download(urls,
-                             hashtag or '.', message.date,
-                             extract=extract, filename=filename)
-        if hashtag == 'TUMBLR':
-            message.reply_text('(btw, queueing to tumblr)')
-            for filename in filenames:
-                client.create_photo('kunsaxan', state="queue", data=filename)
+    def download_raw(self, urls, subdir, date, extract=False, filename=None):
+        filenames = []
+        for url in urls:
+            local_filename = os.path.join(self.out_dir, subdir,
+                                          "%s__%s" % (datestr(date), filename or url.split('/')[-1]))
+            r = requests.get(url, stream=True)
+            with open(local_filename, 'wb') as f:
+                for chunk in r.iter_content(chunk_size=1024):
+                    if chunk:
+                        f.write(chunk)
+            if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]):
+                kind = filetype.guess(local_filename)
+                if kind is None:
+                    self.logger.error("File has no extension and could not be determined!")
+                else:
+                    self.logger.info('Moving file without extension... %s?' % kind.extension)
+                    shutil.move(local_filename, local_filename + '.' + kind.extension)
+            filenames.append(local_filename)
        return filenames
-    except:
-        _, exc_value, __ = sys.exc_info()
-        if "Timed out" not in str(exc_value):
-            message.reply_text("Something is FUCKED: %s" % exc_value)
+
+    def get_first_hashtag(self, message):
+        hashtags = list(map(message.parse_entity,
+                            list(filter(lambda e: e.type == 'hashtag', message.entities))))
+        hashtags += list(map(message.parse_caption_entity,
+                             list(filter(lambda e: e.type == 'hashtag', message.caption_entities))))
+        if len(hashtags) == 0:
+            if self.last_hashtag is not None and self.last_hashtag[0] == message.from_user:
+                prehashtag = self.last_hashtag[1]
+                self.last_hashtag = None
+            else:
+                return None
+        else:
+            prehashtag = hashtags[0]
+        hashtag = prehashtag[1:].upper()
+        if "PRAS" in hashtag:
+            hashtag = "PRAS"
+        return hashtag
+
+    def handle_hashtag(self, bot, update):
+        hashtags = list(map(update.message.parse_entity,
+                            list(filter(lambda e: e.type == 'hashtag', update.message.entities))))
+        if len(hashtags) > 0:
+            self.last_hashtag = (update.message.from_user, hashtags[0])
+
+    # noinspection PyBroadException
+    def handle(self, urls, message, download, tumblr=False, filename=None):
+        try:
+            hashtag = self.get_first_hashtag(message)
+            if hashtag is None:
+                self.logger.info("Ignoring %s due to no hashtag present..." % urls)
+                return
+
+            self.logger.info("Downloading %s under '%s'" % (urls, hashtag))
+
+            reply = 'Downloading'
+            if hashtag:
+                mkdir_p(os.path.join(self.out_dir, hashtag))
+                reply += ' to "' + hashtag + '"'
+            reply += '...'
+
+            extract = False
+            if hashtag in ('AUDIO', 'RADIO') and download != self.download_raw:
+                extract = True
+                reply += ' (And also guessing you want to extract the audio)'
+            message.reply_text(reply)
+            filenames = download(urls,
+                                 hashtag or '.', message.date,
+                                 extract=extract, filename=filename)
+            if hashtag == 'TUMBLR':
+                message.reply_text('(btw, queueing to tumblr)')
+                for filename in filenames:
+                    self.client.create_photo('kunsaxan', state="queue", data=filename)
+            return filenames
+        except:
+            _, exc_value, __ = sys.exc_info()
+            if "Timed out" not in str(exc_value):
+                message.reply_text("Something is FUCKED: %s" % exc_value)
+
+    def handle_url(self, bot, update):
+        ytdl_urls = list(filter(self.ytdl_can,
+                                map(lambda e: update.message.parse_entity(e),
+                                    filter(lambda e: e.type == 'url',
+                                           update.message.entities))))
+        if len(ytdl_urls) > 0:
+            self.handle(ytdl_urls, update.message, self.download_ytdl)
+
+    # noinspection PyBroadException
+    def handle_rest(self, bot, update):
+        file, filename, tumblr = None, None, False
+        if len(update.message.photo) > 0:
+            photo = max(update.message.photo, key=lambda p: p.width)
+            file = photo.file_id
+            tumblr = True
+        elif update.message.document is not None:
+            filename = update.message.document.file_name
+            file = update.message.document.file_id
+        elif update.message.audio is not None:
+            filename = update.message.audio.title
+            file = update.message.audio.file_id
+        elif update.message.video is not None:
+            file = update.message.video.file_id
+        elif update.message.video_note is not None:
+            file = update.message.video_note.file_id
+        elif update.message.voice is not None:
+            file = update.message.voice.file_id
+
+        if file is not None:
+            url = bot.getFile(file).file_path
+            self.handle([url], update.message, self.download_raw, tumblr=tumblr, filename=filename)
+
+    def handle_text(self, bot, update):
+        self.markov.add_to_corpus(update.message.text)
+
+    def tg_start(self, bot, update):
+        update.message.reply_text(self.markov.make_sentence())
+
+    def tg_error(self, bot, update, error):
+        self.logger.error(error)
+        if "Timed out" in str(error):
+            if update is not None:
+                update.message.reply_text(self.markov.make_sentence(tries=100) or "Mmmm, I like it...")
+                self.handle_rest(bot, update)
+        else:
+            if update is not None:
+                update.message.reply_text("Something is fucked: %s" % error)
+
+    def run_idle(self):
+        self.updater.start_polling()
+        self.logger.info("Started Telegram bot...")
+        self.updater.idle()


-def handle_url(bot, update):
-    ytdl_urls = list(filter(ytdl_has,
-                            map(lambda e: update.message.parse_entity(e),
-                                filter(lambda e: e.type == 'url',
-                                       update.message.entities))))
-    if len(ytdl_urls) > 0:
-        handle(ytdl_urls, update.message, download_ydl)
+class MarkovBlabberer:
+    def __init__(self, filepath):
+        self.logger = logging.getLogger('markov')
+        self.filepath = filepath

+        with open(filepath) as f:
+            text = f.read()
+            self.markov = markovify.NewlineText(text.lower())
+            self.logger.info("Sentence of the day: " + self.make_sentence())

-# noinspection PyBroadException
-def handle_rest(bot, update):
-    file, filename, tumblr = None, None, False
-    if len(update.message.photo) > 0:
-        photo = max(update.message.photo, key=lambda p: p.width)
-        file = photo.file_id
-        tumblr = True
-    elif update.message.document is not None:
-        filename = update.message.document.file_name
-        file = update.message.document.file_id
-    elif update.message.audio is not None:
-        filename = update.message.audio.title
-        file = update.message.audio.file_id
-    elif update.message.video is not None:
-        file = update.message.video.file_id
-    elif update.message.video_note is not None:
-        file = update.message.video_note.file_id
-    elif update.message.voice is not None:
-        file = update.message.voice.file_id
+    def make_sentence(self, tries=100):
+        return self.markov.make_sentence(tries=tries)

-    if file is not None:
-        url = bot.getFile(file).file_path
-        handle([url], update.message, download_raw, tumblr=tumblr, filename=filename)
-
-
-def handle_text(bot, update):
-    add_to_corpus(update.message.text)
-
-
-def start(bot, update):
-    update.message.reply_text(markov.make_sentence())
-
-
-def error(bot, update, error):
-    logger.error(error)
-    if "Timed out" in str(error):
-        if update is not None:
-            update.message.reply_text(markov.make_sentence(tries=100) or "Mmmm, I like it...")
-            handle_rest(bot, update)
-    else:
-        if update is not None:
-            update.message.reply_text("Something is fucked: %s" % error)
-
-
-def main():
-    global markov
-
-    with open("initial.txt") as f:
-        text = f.read()
-        markov = markovify.NewlineText(text.lower())
-        logger.info("Sentence of the day: " + markov.make_sentence())
-
-    updater = Updater("***REMOVED***")
-
-    dp = updater.dispatcher
-
-    dp.add_handler(CommandHandler("start", start))
-
-    dp.add_error_handler(error)
-
-    dp.add_handler(MessageHandler(Filters.entity(MessageEntity.URL), handle_url))
-    dp.add_handler(
-        MessageHandler(
-            Filters.photo | Filters.video | Filters.video_note | Filters.audio | Filters.voice | Filters.document,
-            handle_rest))
-    dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), handle_hashtag))
-    dp.add_handler(MessageHandler(Filters.text, handle_text))
-
-    updater.start_polling()
-
-    logger.info("Started Telegram bot...")
-
-    updater.idle()
+    def add_to_corpus(self, text):
+        text = text.lower()
+        new_sentence = markovify.NewlineText(text)
+        self.markov = markovify.combine([self.markov, new_sentence])
+        with open(self.filepath, 'a') as f:
+            f.write(text + '\n')


 if __name__ == '__main__':
-    main()
+    logging.basicConfig(level=logging.INFO,
+                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+
+    _DIR_ = os.path.dirname(os.path.realpath(__file__))
+    CONFIG_PATHS = ['/etc/delojza/delojza.ini',
+                    os.path.join(os.getenv("HOME"), ".config/delojza/delojza.ini"),
+                    os.path.join(_DIR_, "delojza.ini")]
+
+    config = ConfigParser()
+    try:
+        CONF_FILE = next(conf_path for conf_path in CONFIG_PATHS if os.path.isfile(conf_path))
+        config.read(CONF_FILE)
+    except StopIteration:
+        logging.error("No config file found, stopping.")
+        sys.exit(-1)
+
+    markov = MarkovBlabberer("initial.txt")
+
+    delojza = DelojzaBot(config.get('delojza', 'tg_api_key'),
+                         config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")),
+                         tmp_dir=config.get('delojza', 'tmp_dir', fallback="/var/tmp"),
+                         tumblr_keys=(config.get('tumblr', 'consumer_key'),
+                                      config.get('tumblr', 'consumer_secret'),
+                                      config.get('tumblr', 'oauth_key'),
+                                      config.get('tumblr', 'oauth_secret')),
+                         markov=None)
+    delojza.run_idle()
--- a/robot.sh
+++ b/robot.sh
@ -1,6 +0,0 @@
-#!/bin/bash
-DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-cd ${DIR}
-./update.sh &
-source ./.venv/bin/activate
-python3 delojza.py 2>&1 |tee -a delojza.log
--- a/update.sh
+++ b/update.sh
@ -1,7 +0,0 @@
-#!/bin/bash
-while :;do
-	NUM=$( grep 'INFO - Downloading' delojza.log|wc -l)
-	echo $NUM
-	curl -s 'https://kunsaxan.sdbs.cz/counter.php?key=delojza7953713b19ef2ea055156c8dc175bf80&count='$NUM
-	sleep 300;
-done