grande refactor -> Delojza now is a class

This commit is contained in:
Tomáš Mládek 2019-04-17 18:05:17 +02:00 committed by Tomáš Mládek
parent 4ff94b2af6
commit c249de6e64
5 changed files with 238 additions and 230 deletions

2
.gitignore vendored
View file

@ -1,4 +1,4 @@
out delojza.ini
downloaded.lst downloaded.lst
delojza.log delojza.log
initial.txt initial.txt

8
delojza.ini Normal file
View file

@ -0,0 +1,8 @@
[delojza]
tg_api_key = ***REMOVED***
[tumblr]
consumer_key = ***REMOVED***
consumer_secret = ***REMOVED***
oauth_key = ***REMOVED***
oauth_secret = ***REMOVED***

View file

@ -1,10 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import errno import errno
import logging import logging
import os import os
import re import re
import shutil import shutil
import sys import sys
from configparser import ConfigParser
from glob import glob from glob import glob
import filetype import filetype
@ -15,249 +17,260 @@ import youtube_dl
from telegram import MessageEntity from telegram import MessageEntity
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters from telegram.ext import Updater, CommandHandler, MessageHandler, Filters
DIR = os.path.dirname(os.path.realpath(__file__))
TMP_DIR = '/var/tmp'
OUT_DIR = DIR + '/out'
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("kunsax")
client = pytumblr.TumblrRestClient(
'***REMOVED***',
'***REMOVED***',
'***REMOVED***',
'***REMOVED***'
)
markov = None
def add_to_corpus(text):
global markov
text = text.lower()
new_sentence = markovify.NewlineText(text)
markov = markovify.combine([markov, new_sentence])
with open("initial.txt", 'a') as f:
f.write(text + '\n')
def datestr(date):
return date.strftime("%Y-%m-%d@%H%M")
def mkdir_p(path): def mkdir_p(path):
try: try:
os.makedirs(path) os.makedirs(path)
except OSError as exc: # Python >2.5 except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(path): if exc.errno == errno.EEXIST and os.path.isdir(path):
pass pass
else: else:
raise raise
def ytdl_has(url): def datestr(date):
ies = youtube_dl.extractor.gen_extractors() return date.strftime("%Y-%m-%d@%H%M")
for ie in ies:
if ie.suitable(url) and ie.IE_NAME != 'generic' \
and '/channel/' not in url:
# Site has dedicated extractor
return True
return False
def download_ydl(urls, subdir, date, extract=False, filename=None): class DelojzaBot:
ydl_opts = { def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', tumblr_keys=None, markov=None):
'noplaylist': True, self.logger = logging.getLogger("kunsax")
'restrictfilenames': True,
'outtmpl': TMP_DIR + '/' + datestr(date) + '__%(title)s__%(id)s.%(ext)s'
}
if extract:
ydl_opts['format'] = 'bestaudio'
# ydl_opts['postprocessors'] = [{
# 'key': 'FFmpegExtractAudio',
# 'preferredcodec': 'wav'
# }]
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(urls)
out_dir = OUT_DIR + '/' + subdir + '/'
for filename in map(ydl.prepare_filename, map(ydl.extract_info, urls)):
globbeds = glob(os.path.splitext(filename)[0] + '.*')
for globbed in globbeds:
logger.info("Moving %s to %s..." % (globbed, out_dir))
shutil.move(globbed, out_dir)
return []
self.out_dir = out_dir
self.logger.debug('OUT_DIR: ' + out_dir)
self.tmp_dir = tmp_dir
self.logger.debug('TMP_DIR: ' + tmp_dir)
self.markov = markov
def download_raw(urls, subdir, date, extract=False, filename=None): self.updater = Updater(tg_api_key)
filenames = [] dp = self.updater.dispatcher
for url in urls:
local_filename = OUT_DIR + '/' + subdir + '/' + "%s__%s" % (datestr(date), filename or url.split('/')[-1])
r = requests.get(url, stream=True)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]):
kind = filetype.guess(local_filename)
if kind is None:
logger.error("File has no extension and could not be determined!")
else:
logger.info('Moving file without extension... %s?' % kind.extension)
shutil.move(local_filename, local_filename + '.' + kind.extension)
filenames.append(local_filename)
return filenames
dp.add_handler(CommandHandler("start", self.tg_start))
dp.add_error_handler(self.tg_error)
last_hashtag = None dp.add_handler(MessageHandler(Filters.entity(MessageEntity.URL), self.handle_url))
dp.add_handler(
MessageHandler(
Filters.photo | Filters.video | Filters.video_note | Filters.audio | Filters.voice | Filters.document,
self.handle_rest))
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), self.handle_hashtag))
dp.add_handler(MessageHandler(Filters.text, self.handle_text))
if tumblr_keys:
self.client = pytumblr.TumblrRestClient(*tumblr_keys)
def get_first_hashtag(message): self.last_hashtag = None
global last_hashtag
hashtags = list(map(message.parse_entity,
list(filter(lambda e: e.type == 'hashtag', message.entities))))
hashtags += list(map(message.parse_caption_entity,
list(filter(lambda e: e.type == 'hashtag', message.caption_entities))))
if len(hashtags) == 0:
if last_hashtag is not None and last_hashtag[0] == message.from_user:
prehashtag = last_hashtag[1]
last_hashtag = None
else:
return None
else:
prehashtag = hashtags[0]
hashtag = prehashtag[1:].upper()
if "PRAS" in hashtag:
hashtag = "PRAS"
return hashtag
@staticmethod
def ytdl_can(url):
ies = youtube_dl.extractor.gen_extractors()
for ie in ies:
if ie.suitable(url) and ie.IE_NAME != 'generic' \
and '/channel/' not in url:
# Site has dedicated extractor
return True
return False
def handle_hashtag(bot, update): def download_ytdl(self, urls, subdir, date, extract=False, filename=None):
global last_hashtag ydl_opts = {
hashtags = list(map(update.message.parse_entity, 'noplaylist': True,
list(filter(lambda e: e.type == 'hashtag', update.message.entities)))) 'restrictfilenames': True,
if len(hashtags) > 0: 'outtmpl': os.path.join(self.tmp_dir, datestr(date), '__%(title)s__%(id)s.%(ext)s') # HOW?
last_hashtag = (update.message.from_user, hashtags[0]) }
if extract:
ydl_opts['format'] = 'bestaudio'
# ydl_opts['postprocessors'] = [{
# 'key': 'FFmpegExtractAudio',
# 'preferredcodec': 'wav'
# }]
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(urls)
out_dir = os.path.join(self.out_dir, subdir)
for filename in map(ydl.prepare_filename, map(ydl.extract_info, urls)):
globbeds = glob(os.path.splitext(filename)[0] + '.*')
for globbed in globbeds:
self.logger.info("Moving %s to %s..." % (globbed, out_dir))
shutil.move(globbed, out_dir)
return []
def download_raw(self, urls, subdir, date, extract=False, filename=None):
# noinspection PyBroadException filenames = []
def handle(urls, message, download, tumblr=False, filename=None): for url in urls:
try: local_filename = os.path.join(self.out_dir, subdir,
hashtag = get_first_hashtag(message) "%s__%s" % (datestr(date), filename or url.split('/')[-1]))
if hashtag is None: r = requests.get(url, stream=True)
logger.info("Ignoring %s due to no hashtag present..." % urls) with open(local_filename, 'wb') as f:
return for chunk in r.iter_content(chunk_size=1024):
if chunk:
logger.info("Downloading %s" % urls) f.write(chunk)
if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]):
reply = 'Downloading' kind = filetype.guess(local_filename)
if hashtag: if kind is None:
mkdir_p(OUT_DIR + '/' + hashtag) self.logger.error("File has no extension and could not be determined!")
reply += ' to "' + hashtag + '"' else:
reply += '...' self.logger.info('Moving file without extension... %s?' % kind.extension)
shutil.move(local_filename, local_filename + '.' + kind.extension)
extract = False filenames.append(local_filename)
if hashtag in ('AUDIO', 'RADIO') and download != download_raw:
extract = True
reply += ' (And also guessing you want to extract the audio)'
message.reply_text(reply)
filenames = download(urls,
hashtag or '.', message.date,
extract=extract, filename=filename)
if hashtag == 'TUMBLR':
message.reply_text('(btw, queueing to tumblr)')
for filename in filenames:
client.create_photo('kunsaxan', state="queue", data=filename)
return filenames return filenames
except:
_, exc_value, __ = sys.exc_info() def get_first_hashtag(self, message):
if "Timed out" not in str(exc_value): hashtags = list(map(message.parse_entity,
message.reply_text("Something is FUCKED: %s" % exc_value) list(filter(lambda e: e.type == 'hashtag', message.entities))))
hashtags += list(map(message.parse_caption_entity,
list(filter(lambda e: e.type == 'hashtag', message.caption_entities))))
if len(hashtags) == 0:
if self.last_hashtag is not None and self.last_hashtag[0] == message.from_user:
prehashtag = self.last_hashtag[1]
self.last_hashtag = None
else:
return None
else:
prehashtag = hashtags[0]
hashtag = prehashtag[1:].upper()
if "PRAS" in hashtag:
hashtag = "PRAS"
return hashtag
def handle_hashtag(self, bot, update):
hashtags = list(map(update.message.parse_entity,
list(filter(lambda e: e.type == 'hashtag', update.message.entities))))
if len(hashtags) > 0:
self.last_hashtag = (update.message.from_user, hashtags[0])
# noinspection PyBroadException
def handle(self, urls, message, download, tumblr=False, filename=None):
try:
hashtag = self.get_first_hashtag(message)
if hashtag is None:
self.logger.info("Ignoring %s due to no hashtag present..." % urls)
return
self.logger.info("Downloading %s under '%s'" % (urls, hashtag))
reply = 'Downloading'
if hashtag:
mkdir_p(os.path.join(self.out_dir, hashtag))
reply += ' to "' + hashtag + '"'
reply += '...'
extract = False
if hashtag in ('AUDIO', 'RADIO') and download != self.download_raw:
extract = True
reply += ' (And also guessing you want to extract the audio)'
message.reply_text(reply)
filenames = download(urls,
hashtag or '.', message.date,
extract=extract, filename=filename)
if hashtag == 'TUMBLR':
message.reply_text('(btw, queueing to tumblr)')
for filename in filenames:
self.client.create_photo('kunsaxan', state="queue", data=filename)
return filenames
except:
_, exc_value, __ = sys.exc_info()
if "Timed out" not in str(exc_value):
message.reply_text("Something is FUCKED: %s" % exc_value)
def handle_url(self, bot, update):
ytdl_urls = list(filter(self.ytdl_can,
map(lambda e: update.message.parse_entity(e),
filter(lambda e: e.type == 'url',
update.message.entities))))
if len(ytdl_urls) > 0:
self.handle(ytdl_urls, update.message, self.download_ytdl)
# noinspection PyBroadException
def handle_rest(self, bot, update):
file, filename, tumblr = None, None, False
if len(update.message.photo) > 0:
photo = max(update.message.photo, key=lambda p: p.width)
file = photo.file_id
tumblr = True
elif update.message.document is not None:
filename = update.message.document.file_name
file = update.message.document.file_id
elif update.message.audio is not None:
filename = update.message.audio.title
file = update.message.audio.file_id
elif update.message.video is not None:
file = update.message.video.file_id
elif update.message.video_note is not None:
file = update.message.video_note.file_id
elif update.message.voice is not None:
file = update.message.voice.file_id
if file is not None:
url = bot.getFile(file).file_path
self.handle([url], update.message, self.download_raw, tumblr=tumblr, filename=filename)
def handle_text(self, bot, update):
self.markov.add_to_corpus(update.message.text)
def tg_start(self, bot, update):
update.message.reply_text(self.markov.make_sentence())
def tg_error(self, bot, update, error):
self.logger.error(error)
if "Timed out" in str(error):
if update is not None:
update.message.reply_text(self.markov.make_sentence(tries=100) or "Mmmm, I like it...")
self.handle_rest(bot, update)
else:
if update is not None:
update.message.reply_text("Something is fucked: %s" % error)
def run_idle(self):
self.updater.start_polling()
self.logger.info("Started Telegram bot...")
self.updater.idle()
def handle_url(bot, update): class MarkovBlabberer:
ytdl_urls = list(filter(ytdl_has, def __init__(self, filepath):
map(lambda e: update.message.parse_entity(e), self.logger = logging.getLogger('markov')
filter(lambda e: e.type == 'url', self.filepath = filepath
update.message.entities))))
if len(ytdl_urls) > 0:
handle(ytdl_urls, update.message, download_ydl)
with open(filepath) as f:
text = f.read()
self.markov = markovify.NewlineText(text.lower())
self.logger.info("Sentence of the day: " + self.make_sentence())
# noinspection PyBroadException def make_sentence(self, tries=100):
def handle_rest(bot, update): return self.markov.make_sentence(tries=tries)
file, filename, tumblr = None, None, False
if len(update.message.photo) > 0:
photo = max(update.message.photo, key=lambda p: p.width)
file = photo.file_id
tumblr = True
elif update.message.document is not None:
filename = update.message.document.file_name
file = update.message.document.file_id
elif update.message.audio is not None:
filename = update.message.audio.title
file = update.message.audio.file_id
elif update.message.video is not None:
file = update.message.video.file_id
elif update.message.video_note is not None:
file = update.message.video_note.file_id
elif update.message.voice is not None:
file = update.message.voice.file_id
if file is not None: def add_to_corpus(self, text):
url = bot.getFile(file).file_path text = text.lower()
handle([url], update.message, download_raw, tumblr=tumblr, filename=filename) new_sentence = markovify.NewlineText(text)
self.markov = markovify.combine([self.markov, new_sentence])
with open(self.filepath, 'a') as f:
def handle_text(bot, update): f.write(text + '\n')
add_to_corpus(update.message.text)
def start(bot, update):
update.message.reply_text(markov.make_sentence())
def error(bot, update, error):
logger.error(error)
if "Timed out" in str(error):
if update is not None:
update.message.reply_text(markov.make_sentence(tries=100) or "Mmmm, I like it...")
handle_rest(bot, update)
else:
if update is not None:
update.message.reply_text("Something is fucked: %s" % error)
def main():
global markov
with open("initial.txt") as f:
text = f.read()
markov = markovify.NewlineText(text.lower())
logger.info("Sentence of the day: " + markov.make_sentence())
updater = Updater("***REMOVED***")
dp = updater.dispatcher
dp.add_handler(CommandHandler("start", start))
dp.add_error_handler(error)
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.URL), handle_url))
dp.add_handler(
MessageHandler(
Filters.photo | Filters.video | Filters.video_note | Filters.audio | Filters.voice | Filters.document,
handle_rest))
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), handle_hashtag))
dp.add_handler(MessageHandler(Filters.text, handle_text))
updater.start_polling()
logger.info("Started Telegram bot...")
updater.idle()
if __name__ == '__main__': if __name__ == '__main__':
main() logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
_DIR_ = os.path.dirname(os.path.realpath(__file__))
CONFIG_PATHS = ['/etc/delojza/delojza.ini',
os.path.join(os.getenv("HOME"), ".config/delojza/delojza.ini"),
os.path.join(_DIR_, "delojza.ini")]
config = ConfigParser()
try:
CONF_FILE = next(conf_path for conf_path in CONFIG_PATHS if os.path.isfile(conf_path))
config.read(CONF_FILE)
except StopIteration:
logging.error("No config file found, stopping.")
sys.exit(-1)
markov = MarkovBlabberer("initial.txt")
delojza = DelojzaBot(config.get('delojza', 'tg_api_key'),
config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")),
tmp_dir=config.get('delojza', 'tmp_dir', fallback="/var/tmp"),
tumblr_keys=(config.get('tumblr', 'consumer_key'),
config.get('tumblr', 'consumer_secret'),
config.get('tumblr', 'oauth_key'),
config.get('tumblr', 'oauth_secret')),
markov=None)
delojza.run_idle()

View file

@ -1,6 +0,0 @@
#!/bin/bash
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd ${DIR}
./update.sh &
source ./.venv/bin/activate
python3 delojza.py 2>&1 |tee -a delojza.log

View file

@ -1,7 +0,0 @@
#!/bin/bash
while :;do
NUM=$( grep 'INFO - Downloading' delojza.log|wc -l)
echo $NUM
curl -s 'https://kunsaxan.sdbs.cz/counter.php?key=delojza7953713b19ef2ea055156c8dc175bf80&count='$NUM
sleep 300;
done