grande refactor -> Delojza now is a class
This commit is contained in:
parent
4ff94b2af6
commit
c249de6e64
5 changed files with 238 additions and 230 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -1,4 +1,4 @@
|
|||
out
|
||||
delojza.ini
|
||||
downloaded.lst
|
||||
delojza.log
|
||||
initial.txt
|
8
delojza.ini
Normal file
8
delojza.ini
Normal file
|
@ -0,0 +1,8 @@
|
|||
[delojza]
|
||||
tg_api_key = ***REMOVED***
|
||||
|
||||
[tumblr]
|
||||
consumer_key = ***REMOVED***
|
||||
consumer_secret = ***REMOVED***
|
||||
oauth_key = ***REMOVED***
|
||||
oauth_secret = ***REMOVED***
|
445
delojza.py
445
delojza.py
|
@ -1,10 +1,12 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import errno
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
from configparser import ConfigParser
|
||||
from glob import glob
|
||||
|
||||
import filetype
|
||||
|
@ -15,249 +17,260 @@ import youtube_dl
|
|||
from telegram import MessageEntity
|
||||
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters
|
||||
|
||||
DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
TMP_DIR = '/var/tmp'
|
||||
OUT_DIR = DIR + '/out'
|
||||
|
||||
logging.basicConfig(level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger("kunsax")
|
||||
|
||||
client = pytumblr.TumblrRestClient(
|
||||
'***REMOVED***',
|
||||
'***REMOVED***',
|
||||
'***REMOVED***',
|
||||
'***REMOVED***'
|
||||
)
|
||||
|
||||
markov = None
|
||||
|
||||
|
||||
def add_to_corpus(text):
|
||||
global markov
|
||||
text = text.lower()
|
||||
new_sentence = markovify.NewlineText(text)
|
||||
markov = markovify.combine([markov, new_sentence])
|
||||
with open("initial.txt", 'a') as f:
|
||||
f.write(text + '\n')
|
||||
|
||||
|
||||
def datestr(date):
|
||||
return date.strftime("%Y-%m-%d@%H%M")
|
||||
|
||||
|
||||
def mkdir_p(path):
|
||||
try:
|
||||
os.makedirs(path)
|
||||
except OSError as exc: # Python >2.5
|
||||
except OSError as exc:
|
||||
if exc.errno == errno.EEXIST and os.path.isdir(path):
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def ytdl_has(url):
|
||||
ies = youtube_dl.extractor.gen_extractors()
|
||||
for ie in ies:
|
||||
if ie.suitable(url) and ie.IE_NAME != 'generic' \
|
||||
and '/channel/' not in url:
|
||||
# Site has dedicated extractor
|
||||
return True
|
||||
return False
|
||||
def datestr(date):
|
||||
return date.strftime("%Y-%m-%d@%H%M")
|
||||
|
||||
|
||||
def download_ydl(urls, subdir, date, extract=False, filename=None):
|
||||
ydl_opts = {
|
||||
'noplaylist': True,
|
||||
'restrictfilenames': True,
|
||||
'outtmpl': TMP_DIR + '/' + datestr(date) + '__%(title)s__%(id)s.%(ext)s'
|
||||
}
|
||||
if extract:
|
||||
ydl_opts['format'] = 'bestaudio'
|
||||
# ydl_opts['postprocessors'] = [{
|
||||
# 'key': 'FFmpegExtractAudio',
|
||||
# 'preferredcodec': 'wav'
|
||||
# }]
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(urls)
|
||||
out_dir = OUT_DIR + '/' + subdir + '/'
|
||||
for filename in map(ydl.prepare_filename, map(ydl.extract_info, urls)):
|
||||
globbeds = glob(os.path.splitext(filename)[0] + '.*')
|
||||
for globbed in globbeds:
|
||||
logger.info("Moving %s to %s..." % (globbed, out_dir))
|
||||
shutil.move(globbed, out_dir)
|
||||
return []
|
||||
class DelojzaBot:
|
||||
def __init__(self, tg_api_key, out_dir, tmp_dir='/var/tmp', tumblr_keys=None, markov=None):
|
||||
self.logger = logging.getLogger("kunsax")
|
||||
|
||||
self.out_dir = out_dir
|
||||
self.logger.debug('OUT_DIR: ' + out_dir)
|
||||
self.tmp_dir = tmp_dir
|
||||
self.logger.debug('TMP_DIR: ' + tmp_dir)
|
||||
self.markov = markov
|
||||
|
||||
def download_raw(urls, subdir, date, extract=False, filename=None):
|
||||
filenames = []
|
||||
for url in urls:
|
||||
local_filename = OUT_DIR + '/' + subdir + '/' + "%s__%s" % (datestr(date), filename or url.split('/')[-1])
|
||||
r = requests.get(url, stream=True)
|
||||
with open(local_filename, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]):
|
||||
kind = filetype.guess(local_filename)
|
||||
if kind is None:
|
||||
logger.error("File has no extension and could not be determined!")
|
||||
else:
|
||||
logger.info('Moving file without extension... %s?' % kind.extension)
|
||||
shutil.move(local_filename, local_filename + '.' + kind.extension)
|
||||
filenames.append(local_filename)
|
||||
return filenames
|
||||
self.updater = Updater(tg_api_key)
|
||||
dp = self.updater.dispatcher
|
||||
|
||||
dp.add_handler(CommandHandler("start", self.tg_start))
|
||||
dp.add_error_handler(self.tg_error)
|
||||
|
||||
last_hashtag = None
|
||||
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.URL), self.handle_url))
|
||||
dp.add_handler(
|
||||
MessageHandler(
|
||||
Filters.photo | Filters.video | Filters.video_note | Filters.audio | Filters.voice | Filters.document,
|
||||
self.handle_rest))
|
||||
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), self.handle_hashtag))
|
||||
dp.add_handler(MessageHandler(Filters.text, self.handle_text))
|
||||
|
||||
if tumblr_keys:
|
||||
self.client = pytumblr.TumblrRestClient(*tumblr_keys)
|
||||
|
||||
def get_first_hashtag(message):
|
||||
global last_hashtag
|
||||
hashtags = list(map(message.parse_entity,
|
||||
list(filter(lambda e: e.type == 'hashtag', message.entities))))
|
||||
hashtags += list(map(message.parse_caption_entity,
|
||||
list(filter(lambda e: e.type == 'hashtag', message.caption_entities))))
|
||||
if len(hashtags) == 0:
|
||||
if last_hashtag is not None and last_hashtag[0] == message.from_user:
|
||||
prehashtag = last_hashtag[1]
|
||||
last_hashtag = None
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
prehashtag = hashtags[0]
|
||||
hashtag = prehashtag[1:].upper()
|
||||
if "PRAS" in hashtag:
|
||||
hashtag = "PRAS"
|
||||
return hashtag
|
||||
self.last_hashtag = None
|
||||
|
||||
@staticmethod
|
||||
def ytdl_can(url):
|
||||
ies = youtube_dl.extractor.gen_extractors()
|
||||
for ie in ies:
|
||||
if ie.suitable(url) and ie.IE_NAME != 'generic' \
|
||||
and '/channel/' not in url:
|
||||
# Site has dedicated extractor
|
||||
return True
|
||||
return False
|
||||
|
||||
def handle_hashtag(bot, update):
|
||||
global last_hashtag
|
||||
hashtags = list(map(update.message.parse_entity,
|
||||
list(filter(lambda e: e.type == 'hashtag', update.message.entities))))
|
||||
if len(hashtags) > 0:
|
||||
last_hashtag = (update.message.from_user, hashtags[0])
|
||||
def download_ytdl(self, urls, subdir, date, extract=False, filename=None):
|
||||
ydl_opts = {
|
||||
'noplaylist': True,
|
||||
'restrictfilenames': True,
|
||||
'outtmpl': os.path.join(self.tmp_dir, datestr(date), '__%(title)s__%(id)s.%(ext)s') # HOW?
|
||||
}
|
||||
if extract:
|
||||
ydl_opts['format'] = 'bestaudio'
|
||||
# ydl_opts['postprocessors'] = [{
|
||||
# 'key': 'FFmpegExtractAudio',
|
||||
# 'preferredcodec': 'wav'
|
||||
# }]
|
||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(urls)
|
||||
out_dir = os.path.join(self.out_dir, subdir)
|
||||
for filename in map(ydl.prepare_filename, map(ydl.extract_info, urls)):
|
||||
globbeds = glob(os.path.splitext(filename)[0] + '.*')
|
||||
for globbed in globbeds:
|
||||
self.logger.info("Moving %s to %s..." % (globbed, out_dir))
|
||||
shutil.move(globbed, out_dir)
|
||||
return []
|
||||
|
||||
|
||||
# noinspection PyBroadException
|
||||
def handle(urls, message, download, tumblr=False, filename=None):
|
||||
try:
|
||||
hashtag = get_first_hashtag(message)
|
||||
if hashtag is None:
|
||||
logger.info("Ignoring %s due to no hashtag present..." % urls)
|
||||
return
|
||||
|
||||
logger.info("Downloading %s" % urls)
|
||||
|
||||
reply = 'Downloading'
|
||||
if hashtag:
|
||||
mkdir_p(OUT_DIR + '/' + hashtag)
|
||||
reply += ' to "' + hashtag + '"'
|
||||
reply += '...'
|
||||
|
||||
extract = False
|
||||
if hashtag in ('AUDIO', 'RADIO') and download != download_raw:
|
||||
extract = True
|
||||
reply += ' (And also guessing you want to extract the audio)'
|
||||
message.reply_text(reply)
|
||||
filenames = download(urls,
|
||||
hashtag or '.', message.date,
|
||||
extract=extract, filename=filename)
|
||||
if hashtag == 'TUMBLR':
|
||||
message.reply_text('(btw, queueing to tumblr)')
|
||||
for filename in filenames:
|
||||
client.create_photo('kunsaxan', state="queue", data=filename)
|
||||
def download_raw(self, urls, subdir, date, extract=False, filename=None):
|
||||
filenames = []
|
||||
for url in urls:
|
||||
local_filename = os.path.join(self.out_dir, subdir,
|
||||
"%s__%s" % (datestr(date), filename or url.split('/')[-1]))
|
||||
r = requests.get(url, stream=True)
|
||||
with open(local_filename, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]):
|
||||
kind = filetype.guess(local_filename)
|
||||
if kind is None:
|
||||
self.logger.error("File has no extension and could not be determined!")
|
||||
else:
|
||||
self.logger.info('Moving file without extension... %s?' % kind.extension)
|
||||
shutil.move(local_filename, local_filename + '.' + kind.extension)
|
||||
filenames.append(local_filename)
|
||||
return filenames
|
||||
except:
|
||||
_, exc_value, __ = sys.exc_info()
|
||||
if "Timed out" not in str(exc_value):
|
||||
message.reply_text("Something is FUCKED: %s" % exc_value)
|
||||
|
||||
def get_first_hashtag(self, message):
|
||||
hashtags = list(map(message.parse_entity,
|
||||
list(filter(lambda e: e.type == 'hashtag', message.entities))))
|
||||
hashtags += list(map(message.parse_caption_entity,
|
||||
list(filter(lambda e: e.type == 'hashtag', message.caption_entities))))
|
||||
if len(hashtags) == 0:
|
||||
if self.last_hashtag is not None and self.last_hashtag[0] == message.from_user:
|
||||
prehashtag = self.last_hashtag[1]
|
||||
self.last_hashtag = None
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
prehashtag = hashtags[0]
|
||||
hashtag = prehashtag[1:].upper()
|
||||
if "PRAS" in hashtag:
|
||||
hashtag = "PRAS"
|
||||
return hashtag
|
||||
|
||||
def handle_hashtag(self, bot, update):
|
||||
hashtags = list(map(update.message.parse_entity,
|
||||
list(filter(lambda e: e.type == 'hashtag', update.message.entities))))
|
||||
if len(hashtags) > 0:
|
||||
self.last_hashtag = (update.message.from_user, hashtags[0])
|
||||
|
||||
# noinspection PyBroadException
|
||||
def handle(self, urls, message, download, tumblr=False, filename=None):
|
||||
try:
|
||||
hashtag = self.get_first_hashtag(message)
|
||||
if hashtag is None:
|
||||
self.logger.info("Ignoring %s due to no hashtag present..." % urls)
|
||||
return
|
||||
|
||||
self.logger.info("Downloading %s under '%s'" % (urls, hashtag))
|
||||
|
||||
reply = 'Downloading'
|
||||
if hashtag:
|
||||
mkdir_p(os.path.join(self.out_dir, hashtag))
|
||||
reply += ' to "' + hashtag + '"'
|
||||
reply += '...'
|
||||
|
||||
extract = False
|
||||
if hashtag in ('AUDIO', 'RADIO') and download != self.download_raw:
|
||||
extract = True
|
||||
reply += ' (And also guessing you want to extract the audio)'
|
||||
message.reply_text(reply)
|
||||
filenames = download(urls,
|
||||
hashtag or '.', message.date,
|
||||
extract=extract, filename=filename)
|
||||
if hashtag == 'TUMBLR':
|
||||
message.reply_text('(btw, queueing to tumblr)')
|
||||
for filename in filenames:
|
||||
self.client.create_photo('kunsaxan', state="queue", data=filename)
|
||||
return filenames
|
||||
except:
|
||||
_, exc_value, __ = sys.exc_info()
|
||||
if "Timed out" not in str(exc_value):
|
||||
message.reply_text("Something is FUCKED: %s" % exc_value)
|
||||
|
||||
def handle_url(self, bot, update):
|
||||
ytdl_urls = list(filter(self.ytdl_can,
|
||||
map(lambda e: update.message.parse_entity(e),
|
||||
filter(lambda e: e.type == 'url',
|
||||
update.message.entities))))
|
||||
if len(ytdl_urls) > 0:
|
||||
self.handle(ytdl_urls, update.message, self.download_ytdl)
|
||||
|
||||
# noinspection PyBroadException
|
||||
def handle_rest(self, bot, update):
|
||||
file, filename, tumblr = None, None, False
|
||||
if len(update.message.photo) > 0:
|
||||
photo = max(update.message.photo, key=lambda p: p.width)
|
||||
file = photo.file_id
|
||||
tumblr = True
|
||||
elif update.message.document is not None:
|
||||
filename = update.message.document.file_name
|
||||
file = update.message.document.file_id
|
||||
elif update.message.audio is not None:
|
||||
filename = update.message.audio.title
|
||||
file = update.message.audio.file_id
|
||||
elif update.message.video is not None:
|
||||
file = update.message.video.file_id
|
||||
elif update.message.video_note is not None:
|
||||
file = update.message.video_note.file_id
|
||||
elif update.message.voice is not None:
|
||||
file = update.message.voice.file_id
|
||||
|
||||
if file is not None:
|
||||
url = bot.getFile(file).file_path
|
||||
self.handle([url], update.message, self.download_raw, tumblr=tumblr, filename=filename)
|
||||
|
||||
def handle_text(self, bot, update):
|
||||
self.markov.add_to_corpus(update.message.text)
|
||||
|
||||
def tg_start(self, bot, update):
|
||||
update.message.reply_text(self.markov.make_sentence())
|
||||
|
||||
def tg_error(self, bot, update, error):
|
||||
self.logger.error(error)
|
||||
if "Timed out" in str(error):
|
||||
if update is not None:
|
||||
update.message.reply_text(self.markov.make_sentence(tries=100) or "Mmmm, I like it...")
|
||||
self.handle_rest(bot, update)
|
||||
else:
|
||||
if update is not None:
|
||||
update.message.reply_text("Something is fucked: %s" % error)
|
||||
|
||||
def run_idle(self):
|
||||
self.updater.start_polling()
|
||||
self.logger.info("Started Telegram bot...")
|
||||
self.updater.idle()
|
||||
|
||||
|
||||
def handle_url(bot, update):
|
||||
ytdl_urls = list(filter(ytdl_has,
|
||||
map(lambda e: update.message.parse_entity(e),
|
||||
filter(lambda e: e.type == 'url',
|
||||
update.message.entities))))
|
||||
if len(ytdl_urls) > 0:
|
||||
handle(ytdl_urls, update.message, download_ydl)
|
||||
class MarkovBlabberer:
|
||||
def __init__(self, filepath):
|
||||
self.logger = logging.getLogger('markov')
|
||||
self.filepath = filepath
|
||||
|
||||
with open(filepath) as f:
|
||||
text = f.read()
|
||||
self.markov = markovify.NewlineText(text.lower())
|
||||
self.logger.info("Sentence of the day: " + self.make_sentence())
|
||||
|
||||
# noinspection PyBroadException
|
||||
def handle_rest(bot, update):
|
||||
file, filename, tumblr = None, None, False
|
||||
if len(update.message.photo) > 0:
|
||||
photo = max(update.message.photo, key=lambda p: p.width)
|
||||
file = photo.file_id
|
||||
tumblr = True
|
||||
elif update.message.document is not None:
|
||||
filename = update.message.document.file_name
|
||||
file = update.message.document.file_id
|
||||
elif update.message.audio is not None:
|
||||
filename = update.message.audio.title
|
||||
file = update.message.audio.file_id
|
||||
elif update.message.video is not None:
|
||||
file = update.message.video.file_id
|
||||
elif update.message.video_note is not None:
|
||||
file = update.message.video_note.file_id
|
||||
elif update.message.voice is not None:
|
||||
file = update.message.voice.file_id
|
||||
def make_sentence(self, tries=100):
|
||||
return self.markov.make_sentence(tries=tries)
|
||||
|
||||
if file is not None:
|
||||
url = bot.getFile(file).file_path
|
||||
handle([url], update.message, download_raw, tumblr=tumblr, filename=filename)
|
||||
|
||||
|
||||
def handle_text(bot, update):
|
||||
add_to_corpus(update.message.text)
|
||||
|
||||
|
||||
def start(bot, update):
|
||||
update.message.reply_text(markov.make_sentence())
|
||||
|
||||
|
||||
def error(bot, update, error):
|
||||
logger.error(error)
|
||||
if "Timed out" in str(error):
|
||||
if update is not None:
|
||||
update.message.reply_text(markov.make_sentence(tries=100) or "Mmmm, I like it...")
|
||||
handle_rest(bot, update)
|
||||
else:
|
||||
if update is not None:
|
||||
update.message.reply_text("Something is fucked: %s" % error)
|
||||
|
||||
|
||||
def main():
|
||||
global markov
|
||||
|
||||
with open("initial.txt") as f:
|
||||
text = f.read()
|
||||
markov = markovify.NewlineText(text.lower())
|
||||
logger.info("Sentence of the day: " + markov.make_sentence())
|
||||
|
||||
updater = Updater("***REMOVED***")
|
||||
|
||||
dp = updater.dispatcher
|
||||
|
||||
dp.add_handler(CommandHandler("start", start))
|
||||
|
||||
dp.add_error_handler(error)
|
||||
|
||||
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.URL), handle_url))
|
||||
dp.add_handler(
|
||||
MessageHandler(
|
||||
Filters.photo | Filters.video | Filters.video_note | Filters.audio | Filters.voice | Filters.document,
|
||||
handle_rest))
|
||||
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), handle_hashtag))
|
||||
dp.add_handler(MessageHandler(Filters.text, handle_text))
|
||||
|
||||
updater.start_polling()
|
||||
|
||||
logger.info("Started Telegram bot...")
|
||||
|
||||
updater.idle()
|
||||
def add_to_corpus(self, text):
|
||||
text = text.lower()
|
||||
new_sentence = markovify.NewlineText(text)
|
||||
self.markov = markovify.combine([self.markov, new_sentence])
|
||||
with open(self.filepath, 'a') as f:
|
||||
f.write(text + '\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
logging.basicConfig(level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
|
||||
_DIR_ = os.path.dirname(os.path.realpath(__file__))
|
||||
CONFIG_PATHS = ['/etc/delojza/delojza.ini',
|
||||
os.path.join(os.getenv("HOME"), ".config/delojza/delojza.ini"),
|
||||
os.path.join(_DIR_, "delojza.ini")]
|
||||
|
||||
config = ConfigParser()
|
||||
try:
|
||||
CONF_FILE = next(conf_path for conf_path in CONFIG_PATHS if os.path.isfile(conf_path))
|
||||
config.read(CONF_FILE)
|
||||
except StopIteration:
|
||||
logging.error("No config file found, stopping.")
|
||||
sys.exit(-1)
|
||||
|
||||
markov = MarkovBlabberer("initial.txt")
|
||||
|
||||
delojza = DelojzaBot(config.get('delojza', 'tg_api_key'),
|
||||
config.get('delojza', 'OUT_DIR', fallback=os.path.join(_DIR_, "out")),
|
||||
tmp_dir=config.get('delojza', 'tmp_dir', fallback="/var/tmp"),
|
||||
tumblr_keys=(config.get('tumblr', 'consumer_key'),
|
||||
config.get('tumblr', 'consumer_secret'),
|
||||
config.get('tumblr', 'oauth_key'),
|
||||
config.get('tumblr', 'oauth_secret')),
|
||||
markov=None)
|
||||
delojza.run_idle()
|
||||
|
|
6
robot.sh
6
robot.sh
|
@ -1,6 +0,0 @@
|
|||
#!/bin/bash
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
cd ${DIR}
|
||||
./update.sh &
|
||||
source ./.venv/bin/activate
|
||||
python3 delojza.py 2>&1 |tee -a delojza.log
|
|
@ -1,7 +0,0 @@
|
|||
#!/bin/bash
|
||||
while :;do
|
||||
NUM=$( grep 'INFO - Downloading' delojza.log|wc -l)
|
||||
echo $NUM
|
||||
curl -s 'https://kunsaxan.sdbs.cz/counter.php?key=delojza7953713b19ef2ea055156c8dc175bf80&count='$NUM
|
||||
sleep 300;
|
||||
done
|
Loading…
Reference in a new issue