grande refactor -> Delojza now is a class

This commit is contained in:
Tomáš Mládek 2019-04-17 18:05:17 +02:00 committed by Tomáš Mládek
parent 4ff94b2af6
commit c249de6e64
5 changed files with 238 additions and 230 deletions

2
.gitignore vendored
View file

@ -1,4 +1,4 @@
out delojza.ini
downloaded.lst downloaded.lst
delojza.log delojza.log
initial.txt initial.txt

8
delojza.ini Normal file
View file

@ -0,0 +1,8 @@
[delojza]
tg_api_key = ***REMOVED***
[tumblr]
consumer_key = ***REMOVED***
consumer_secret = ***REMOVED***
oauth_key = ***REMOVED***
oauth_secret = ***REMOVED***

View file

@ -1,10 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import errno import errno
import logging import logging
import os import os
import re import re
import shutil import shutil
import sys import sys
from configparser import ConfigParser
from glob import glob from glob import glob
import filetype import filetype
@ -15,48 +17,52 @@ import youtube_dl
from telegram import MessageEntity from telegram import MessageEntity
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters from telegram.ext import Updater, CommandHandler, MessageHandler, Filters
DIR = os.path.dirname(os.path.realpath(__file__))
TMP_DIR = '/var/tmp'
OUT_DIR = DIR + '/out'
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("kunsax")
client = pytumblr.TumblrRestClient(
'***REMOVED***',
'***REMOVED***',
'***REMOVED***',
'***REMOVED***'
)
markov = None
def add_to_corpus(text):
global markov
text = text.lower()
new_sentence = markovify.NewlineText(text)
markov = markovify.combine([markov, new_sentence])
with open("initial.txt", 'a') as f:
f.write(text + '\n')
def datestr(date):
return date.strftime("%Y-%m-%d@%H%M")
def mkdir_p(path): def mkdir_p(path):
try: try:
os.makedirs(path) os.makedirs(path)
except OSError as exc: # Python >2.5 except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(path): if exc.errno == errno.EEXIST and os.path.isdir(path):
pass pass
else: else:
raise raise
def ytdl_has(url): def datestr(date):
return date.strftime("%Y-%m-%d@%H%M")
class DelojzaBot:
def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', tumblr_keys=None, markov=None):
self.logger = logging.getLogger("kunsax")
self.out_dir = out_dir
self.logger.debug('OUT_DIR: ' + out_dir)
self.tmp_dir = tmp_dir
self.logger.debug('TMP_DIR: ' + tmp_dir)
self.markov = markov
self.updater = Updater(tg_api_key)
dp = self.updater.dispatcher
dp.add_handler(CommandHandler("start", self.tg_start))
dp.add_error_handler(self.tg_error)
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.URL), self.handle_url))
dp.add_handler(
MessageHandler(
Filters.photo | Filters.video | Filters.video_note | Filters.audio | Filters.voice | Filters.document,
self.handle_rest))
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), self.handle_hashtag))
dp.add_handler(MessageHandler(Filters.text, self.handle_text))
if tumblr_keys:
self.client = pytumblr.TumblrRestClient(*tumblr_keys)
self.last_hashtag = None
@staticmethod
def ytdl_can(url):
ies = youtube_dl.extractor.gen_extractors() ies = youtube_dl.extractor.gen_extractors()
for ie in ies: for ie in ies:
if ie.suitable(url) and ie.IE_NAME != 'generic' \ if ie.suitable(url) and ie.IE_NAME != 'generic' \
@ -65,12 +71,11 @@ def ytdl_has(url):
return True return True
return False return False
def download_ytdl(self, urls, subdir, date, extract=False, filename=None):
def download_ydl(urls, subdir, date, extract=False, filename=None):
ydl_opts = { ydl_opts = {
'noplaylist': True, 'noplaylist': True,
'restrictfilenames': True, 'restrictfilenames': True,
'outtmpl': TMP_DIR + '/' + datestr(date) + '__%(title)s__%(id)s.%(ext)s' 'outtmpl': os.path.join(self.tmp_dir, datestr(date), '__%(title)s__%(id)s.%(ext)s') # HOW?
} }
if extract: if extract:
ydl_opts['format'] = 'bestaudio' ydl_opts['format'] = 'bestaudio'
@ -80,19 +85,19 @@ def download_ydl(urls, subdir, date, extract=False, filename=None):
# }] # }]
with youtube_dl.YoutubeDL(ydl_opts) as ydl: with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(urls) ydl.download(urls)
out_dir = OUT_DIR + '/' + subdir + '/' out_dir = os.path.join(self.out_dir, subdir)
for filename in map(ydl.prepare_filename, map(ydl.extract_info, urls)): for filename in map(ydl.prepare_filename, map(ydl.extract_info, urls)):
globbeds = glob(os.path.splitext(filename)[0] + '.*') globbeds = glob(os.path.splitext(filename)[0] + '.*')
for globbed in globbeds: for globbed in globbeds:
logger.info("Moving %s to %s..." % (globbed, out_dir)) self.logger.info("Moving %s to %s..." % (globbed, out_dir))
shutil.move(globbed, out_dir) shutil.move(globbed, out_dir)
return [] return []
def download_raw(self, urls, subdir, date, extract=False, filename=None):
def download_raw(urls, subdir, date, extract=False, filename=None):
filenames = [] filenames = []
for url in urls: for url in urls:
local_filename = OUT_DIR + '/' + subdir + '/' + "%s__%s" % (datestr(date), filename or url.split('/')[-1]) local_filename = os.path.join(self.out_dir, subdir,
"%s__%s" % (datestr(date), filename or url.split('/')[-1]))
r = requests.get(url, stream=True) r = requests.get(url, stream=True)
with open(local_filename, 'wb') as f: with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024): for chunk in r.iter_content(chunk_size=1024):
@ -101,27 +106,22 @@ def download_raw(urls, subdir, date, extract=False, filename=None):
if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]): if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]):
kind = filetype.guess(local_filename) kind = filetype.guess(local_filename)
if kind is None: if kind is None:
logger.error("File has no extension and could not be determined!") self.logger.error("File has no extension and could not be determined!")
else: else:
logger.info('Moving file without extension... %s?' % kind.extension) self.logger.info('Moving file without extension... %s?' % kind.extension)
shutil.move(local_filename, local_filename + '.' + kind.extension) shutil.move(local_filename, local_filename + '.' + kind.extension)
filenames.append(local_filename) filenames.append(local_filename)
return filenames return filenames
def get_first_hashtag(self, message):
last_hashtag = None
def get_first_hashtag(message):
global last_hashtag
hashtags = list(map(message.parse_entity, hashtags = list(map(message.parse_entity,
list(filter(lambda e: e.type == 'hashtag', message.entities)))) list(filter(lambda e: e.type == 'hashtag', message.entities))))
hashtags += list(map(message.parse_caption_entity, hashtags += list(map(message.parse_caption_entity,
list(filter(lambda e: e.type == 'hashtag', message.caption_entities)))) list(filter(lambda e: e.type == 'hashtag', message.caption_entities))))
if len(hashtags) == 0: if len(hashtags) == 0:
if last_hashtag is not None and last_hashtag[0] == message.from_user: if self.last_hashtag is not None and self.last_hashtag[0] == message.from_user:
prehashtag = last_hashtag[1] prehashtag = self.last_hashtag[1]
last_hashtag = None self.last_hashtag = None
else: else:
return None return None
else: else:
@ -131,33 +131,30 @@ def get_first_hashtag(message):
hashtag = "PRAS" hashtag = "PRAS"
return hashtag return hashtag
def handle_hashtag(self, bot, update):
def handle_hashtag(bot, update):
global last_hashtag
hashtags = list(map(update.message.parse_entity, hashtags = list(map(update.message.parse_entity,
list(filter(lambda e: e.type == 'hashtag', update.message.entities)))) list(filter(lambda e: e.type == 'hashtag', update.message.entities))))
if len(hashtags) > 0: if len(hashtags) > 0:
last_hashtag = (update.message.from_user, hashtags[0]) self.last_hashtag = (update.message.from_user, hashtags[0])
# noinspection PyBroadException
# noinspection PyBroadException def handle(self, urls, message, download, tumblr=False, filename=None):
def handle(urls, message, download, tumblr=False, filename=None):
try: try:
hashtag = get_first_hashtag(message) hashtag = self.get_first_hashtag(message)
if hashtag is None: if hashtag is None:
logger.info("Ignoring %s due to no hashtag present..." % urls) self.logger.info("Ignoring %s due to no hashtag present..." % urls)
return return
logger.info("Downloading %s" % urls) self.logger.info("Downloading %s under '%s'" % (urls, hashtag))
reply = 'Downloading' reply = 'Downloading'
if hashtag: if hashtag:
mkdir_p(OUT_DIR + '/' + hashtag) mkdir_p(os.path.join(self.out_dir, hashtag))
reply += ' to "' + hashtag + '"' reply += ' to "' + hashtag + '"'
reply += '...' reply += '...'
extract = False extract = False
if hashtag in ('AUDIO', 'RADIO') and download != download_raw: if hashtag in ('AUDIO', 'RADIO') and download != self.download_raw:
extract = True extract = True
reply += ' (And also guessing you want to extract the audio)' reply += ' (And also guessing you want to extract the audio)'
message.reply_text(reply) message.reply_text(reply)
@ -167,25 +164,23 @@ def handle(urls, message, download, tumblr=False, filename=None):
if hashtag == 'TUMBLR': if hashtag == 'TUMBLR':
message.reply_text('(btw, queueing to tumblr)') message.reply_text('(btw, queueing to tumblr)')
for filename in filenames: for filename in filenames:
client.create_photo('kunsaxan', state="queue", data=filename) self.client.create_photo('kunsaxan', state="queue", data=filename)
return filenames return filenames
except: except:
_, exc_value, __ = sys.exc_info() _, exc_value, __ = sys.exc_info()
if "Timed out" not in str(exc_value): if "Timed out" not in str(exc_value):
message.reply_text("Something is FUCKED: %s" % exc_value) message.reply_text("Something is FUCKED: %s" % exc_value)
def handle_url(self, bot, update):
def handle_url(bot, update): ytdl_urls = list(filter(self.ytdl_can,
ytdl_urls = list(filter(ytdl_has,
map(lambda e: update.message.parse_entity(e), map(lambda e: update.message.parse_entity(e),
filter(lambda e: e.type == 'url', filter(lambda e: e.type == 'url',
update.message.entities)))) update.message.entities))))
if len(ytdl_urls) > 0: if len(ytdl_urls) > 0:
handle(ytdl_urls, update.message, download_ydl) self.handle(ytdl_urls, update.message, self.download_ytdl)
# noinspection PyBroadException
# noinspection PyBroadException def handle_rest(self, bot, update):
def handle_rest(bot, update):
file, filename, tumblr = None, None, False file, filename, tumblr = None, None, False
if len(update.message.photo) > 0: if len(update.message.photo) > 0:
photo = max(update.message.photo, key=lambda p: p.width) photo = max(update.message.photo, key=lambda p: p.width)
@ -206,58 +201,76 @@ def handle_rest(bot, update):
if file is not None: if file is not None:
url = bot.getFile(file).file_path url = bot.getFile(file).file_path
handle([url], update.message, download_raw, tumblr=tumblr, filename=filename) self.handle([url], update.message, self.download_raw, tumblr=tumblr, filename=filename)
def handle_text(self, bot, update):
self.markov.add_to_corpus(update.message.text)
def handle_text(bot, update): def tg_start(self, bot, update):
add_to_corpus(update.message.text) update.message.reply_text(self.markov.make_sentence())
def tg_error(self, bot, update, error):
def start(bot, update): self.logger.error(error)
update.message.reply_text(markov.make_sentence())
def error(bot, update, error):
logger.error(error)
if "Timed out" in str(error): if "Timed out" in str(error):
if update is not None: if update is not None:
update.message.reply_text(markov.make_sentence(tries=100) or "Mmmm, I like it...") update.message.reply_text(self.markov.make_sentence(tries=100) or "Mmmm, I like it...")
handle_rest(bot, update) self.handle_rest(bot, update)
else: else:
if update is not None: if update is not None:
update.message.reply_text("Something is fucked: %s" % error) update.message.reply_text("Something is fucked: %s" % error)
def run_idle(self):
self.updater.start_polling()
self.logger.info("Started Telegram bot...")
self.updater.idle()
def main():
global markov
with open("initial.txt") as f: class MarkovBlabberer:
def __init__(self, filepath):
self.logger = logging.getLogger('markov')
self.filepath = filepath
with open(filepath) as f:
text = f.read() text = f.read()
markov = markovify.NewlineText(text.lower()) self.markov = markovify.NewlineText(text.lower())
logger.info("Sentence of the day: " + markov.make_sentence()) self.logger.info("Sentence of the day: " + self.make_sentence())
updater = Updater("***REMOVED***") def make_sentence(self, tries=100):
return self.markov.make_sentence(tries=tries)
dp = updater.dispatcher def add_to_corpus(self, text):
text = text.lower()
dp.add_handler(CommandHandler("start", start)) new_sentence = markovify.NewlineText(text)
self.markov = markovify.combine([self.markov, new_sentence])
dp.add_error_handler(error) with open(self.filepath, 'a') as f:
f.write(text + '\n')
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.URL), handle_url))
dp.add_handler(
MessageHandler(
Filters.photo | Filters.video | Filters.video_note | Filters.audio | Filters.voice | Filters.document,
handle_rest))
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), handle_hashtag))
dp.add_handler(MessageHandler(Filters.text, handle_text))
updater.start_polling()
logger.info("Started Telegram bot...")
updater.idle()
if __name__ == '__main__': if __name__ == '__main__':
main() logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
_DIR_ = os.path.dirname(os.path.realpath(__file__))
CONFIG_PATHS = ['/etc/delojza/delojza.ini',
os.path.join(os.getenv("HOME"), ".config/delojza/delojza.ini"),
os.path.join(_DIR_, "delojza.ini")]
config = ConfigParser()
try:
CONF_FILE = next(conf_path for conf_path in CONFIG_PATHS if os.path.isfile(conf_path))
config.read(CONF_FILE)
except StopIteration:
logging.error("No config file found, stopping.")
sys.exit(-1)
markov = MarkovBlabberer("initial.txt")
delojza = DelojzaBot(config.get('delojza', 'tg_api_key'),
config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")),
tmp_dir=config.get('delojza', 'tmp_dir', fallback="/var/tmp"),
tumblr_keys=(config.get('tumblr', 'consumer_key'),
config.get('tumblr', 'consumer_secret'),
config.get('tumblr', 'oauth_key'),
config.get('tumblr', 'oauth_secret')),
markov=None)
delojza.run_idle()

View file

@ -1,6 +0,0 @@
#!/bin/bash
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd ${DIR}
./update.sh &
source ./.venv/bin/activate
python3 delojza.py 2>&1 |tee -a delojza.log

View file

@ -1,7 +0,0 @@
#!/bin/bash
while :;do
NUM=$( grep 'INFO - Downloading' delojza.log|wc -l)
echo $NUM
curl -s 'https://kunsaxan.sdbs.cz/counter.php?key=delojza7953713b19ef2ea055156c8dc175bf80&count='$NUM
sleep 300;
done