delojza/delojza.py

264 lines
8.2 KiB
Python
Raw Normal View History

2018-01-31 12:32:25 +01:00
#!/usr/bin/env python3
2018-02-02 15:28:49 +01:00
import errno
2018-01-30 13:47:33 +01:00
import logging
import os
2018-01-31 14:34:59 +01:00
import re
2018-01-31 14:11:42 +01:00
import shutil
2018-01-31 12:30:08 +01:00
import sys
2018-01-31 14:23:01 +01:00
from glob import glob
2018-01-30 13:47:33 +01:00
2018-01-31 14:34:59 +01:00
import filetype
2018-10-02 16:12:32 +02:00
import markovify
2018-10-28 14:00:25 +01:00
import pytumblr
2018-01-30 13:47:33 +01:00
import requests
import youtube_dl
from telegram import MessageEntity
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters
DIR = os.path.dirname(os.path.realpath(__file__))
TMP_DIR = '/var/tmp'
2018-01-30 13:47:33 +01:00
OUT_DIR = DIR + '/out'
2018-01-31 15:58:56 +01:00
logging.basicConfig(level=logging.INFO,
2018-01-31 14:56:29 +01:00
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("kunsax")
2018-10-28 14:00:25 +01:00
client = pytumblr.TumblrRestClient(
'***REMOVED***',
'***REMOVED***',
'***REMOVED***',
'***REMOVED***'
)
2018-10-02 16:15:29 +02:00
markov = None
2018-10-02 16:12:32 +02:00
def add_to_corpus(text):
global markov
text = text.lower()
new_sentence = markovify.NewlineText(text)
markov = markovify.combine([markov, new_sentence])
with open("initial.txt", 'a') as f:
f.write(text + '\n')
2018-01-30 13:47:33 +01:00
2018-02-02 15:28:49 +01:00
def datestr(date):
return date.strftime("%Y-%m-%d@%H%M")
def mkdir_p(path):
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
2018-01-30 13:47:33 +01:00
def ytdl_has(url):
ies = youtube_dl.extractor.gen_extractors()
for ie in ies:
if ie.suitable(url) and ie.IE_NAME != 'generic' \
and '/channel/' not in url:
# Site has dedicated extractor
return True
return False
2018-04-25 14:21:48 +02:00
def download_ydl(urls, subdir, date, extract=False, filename=None):
2018-01-30 13:47:33 +01:00
ydl_opts = {
'noplaylist': True,
2018-01-31 14:23:01 +01:00
'restrictfilenames': True,
2018-09-02 20:04:16 +02:00
'outtmpl': TMP_DIR + '/' + datestr(date) + '__%(title)s__%(id)s.%(ext)s'
2018-01-30 13:47:33 +01:00
}
2018-02-02 16:03:25 +01:00
if extract:
ydl_opts['format'] = 'bestaudio'
2018-02-04 23:38:07 +01:00
# ydl_opts['postprocessors'] = [{
# 'key': 'FFmpegExtractAudio',
# 'preferredcodec': 'wav'
# }]
2018-01-30 13:47:33 +01:00
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(urls)
2018-09-02 20:04:16 +02:00
out_dir = OUT_DIR + '/' + subdir + '/'
2018-01-31 14:11:42 +01:00
for filename in map(ydl.prepare_filename, map(ydl.extract_info, urls)):
2018-01-31 14:23:01 +01:00
globbeds = glob(os.path.splitext(filename)[0] + '.*')
for globbed in globbeds:
2018-02-02 15:28:49 +01:00
logger.info("Moving %s to %s..." % (globbed, out_dir))
shutil.move(globbed, out_dir)
2018-10-28 14:00:25 +01:00
return []
2018-01-30 13:47:33 +01:00
2018-04-25 14:21:48 +02:00
def download_raw(urls, subdir, date, extract=False, filename=None):
2018-10-28 14:00:25 +01:00
filenames = []
2018-04-25 14:21:48 +02:00
for url in urls:
2018-09-02 20:04:16 +02:00
local_filename = OUT_DIR + '/' + subdir + '/' + "%s__%s" % (datestr(date), filename or url.split('/')[-1])
2018-04-25 14:21:48 +02:00
r = requests.get(url, stream=True)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]):
kind = filetype.guess(local_filename)
if kind is None:
logger.error("File has no extension and could not be determined!")
else:
logger.info('Moving file without extension... %s?' % kind.extension)
shutil.move(local_filename, local_filename + '.' + kind.extension)
2018-10-28 14:00:25 +01:00
filenames.append(local_filename)
return filenames
2018-01-30 13:47:33 +01:00
2018-04-25 14:30:08 +02:00
last_hashtag = None
2018-02-02 15:28:49 +01:00
def get_first_hashtag(message):
2018-04-25 14:30:08 +02:00
global last_hashtag
2018-02-06 19:42:51 +01:00
hashtags = list(map(message.parse_entity,
list(filter(lambda e: e.type == 'hashtag', message.entities))))
hashtags += list(map(message.parse_caption_entity,
list(filter(lambda e: e.type == 'hashtag', message.caption_entities))))
2018-02-02 15:28:49 +01:00
if len(hashtags) == 0:
2018-04-25 14:30:08 +02:00
if last_hashtag is not None and last_hashtag[0] == message.from_user:
prehashtag = last_hashtag[1]
last_hashtag = None
else:
return None
else:
prehashtag = hashtags[0]
hashtag = prehashtag[1:].upper()
2018-04-25 13:55:52 +02:00
if "PRAS" in hashtag:
hashtag = "PRAS"
return hashtag
2018-02-02 15:28:49 +01:00
2018-04-25 14:30:08 +02:00
def handle_hashtag(bot, update):
global last_hashtag
hashtags = list(map(update.message.parse_entity,
list(filter(lambda e: e.type == 'hashtag', update.message.entities))))
if len(hashtags) > 0:
last_hashtag = (update.message.from_user, hashtags[0])
2018-01-31 12:30:08 +01:00
# noinspection PyBroadException
2018-10-28 14:08:57 +01:00
def handle(urls, message, download, tumblr=False, filename=None):
2018-04-26 18:03:28 +02:00
try:
hashtag = get_first_hashtag(message)
2018-09-02 19:55:18 +02:00
if hashtag is None:
logger.info("Ignoring %s due to no hashtag present..." % urls)
2018-04-26 18:03:28 +02:00
return
2018-09-02 19:55:18 +02:00
logger.info("Downloading %s" % urls)
2018-04-26 18:03:28 +02:00
reply = 'Downloading'
if hashtag:
2018-09-02 20:04:16 +02:00
mkdir_p(OUT_DIR + '/' + hashtag)
reply += ' to "' + hashtag + '"'
2018-04-26 18:03:28 +02:00
reply += '...'
2019-04-03 11:01:09 +02:00
extract = False
if hashtag in ('AUDIO', 'RADIO') and download != download_raw:
extract = True
2018-04-26 18:03:28 +02:00
reply += ' (And also guessing you want to extract the audio)'
message.reply_text(reply)
2018-10-28 14:08:57 +01:00
filenames = download(urls,
hashtag or '.', message.date,
2019-04-03 11:01:09 +02:00
extract=extract, filename=filename)
2018-10-28 14:08:57 +01:00
if hashtag == 'TUMBLR':
message.reply_text('(btw, queueing to tumblr)')
for filename in filenames:
client.create_photo('kunsaxan', state="queue", data=filename)
return filenames
2018-04-26 18:03:28 +02:00
except:
_, exc_value, __ = sys.exc_info()
if "Timed out" not in str(exc_value):
message.reply_text("Something is FUCKED: %s" % exc_value)
2018-04-25 14:21:48 +02:00
def handle_url(bot, update):
ytdl_urls = list(filter(ytdl_has,
map(lambda e: update.message.parse_entity(e),
filter(lambda e: e.type == 'url',
update.message.entities))))
if len(ytdl_urls) > 0:
handle(ytdl_urls, update.message, download_ydl)
2018-01-31 12:30:08 +01:00
# noinspection PyBroadException
def handle_rest(bot, update):
2018-10-28 14:00:25 +01:00
file, filename, tumblr = None, None, False
2018-01-31 13:42:16 +01:00
if len(update.message.photo) > 0:
2018-01-31 12:30:08 +01:00
photo = max(update.message.photo, key=lambda p: p.width)
file = photo.file_id
2018-10-28 14:00:25 +01:00
tumblr = True
2018-01-31 12:30:08 +01:00
elif update.message.document is not None:
2018-02-05 09:39:38 +01:00
filename = update.message.document.file_name
2018-01-31 12:30:08 +01:00
file = update.message.document.file_id
elif update.message.audio is not None:
2018-02-05 09:39:38 +01:00
filename = update.message.audio.title
2018-01-31 12:30:08 +01:00
file = update.message.audio.file_id
elif update.message.video is not None:
file = update.message.video.file_id
elif update.message.video_note is not None:
file = update.message.video_note.file_id
2018-02-03 15:44:01 +01:00
elif update.message.voice is not None:
file = update.message.voice.file_id
2018-01-31 12:30:08 +01:00
if file is not None:
2018-04-25 14:21:48 +02:00
url = bot.getFile(file).file_path
2018-10-28 14:08:57 +01:00
handle([url], update.message, download_raw, tumblr=tumblr, filename=filename)
2018-01-30 13:47:33 +01:00
2018-10-02 16:12:32 +02:00
def handle_text(bot, update):
add_to_corpus(update.message.text)
2018-01-30 13:47:33 +01:00
def start(bot, update):
2018-10-02 16:12:32 +02:00
update.message.reply_text(markov.make_sentence())
2018-01-30 13:47:33 +01:00
def error(bot, update, error):
logger.error(error)
2018-09-02 20:06:27 +02:00
if "Timed out" in str(error):
2018-04-26 18:03:28 +02:00
if update is not None:
2018-10-02 16:12:32 +02:00
update.message.reply_text(markov.make_sentence(tries=100) or "Mmmm, I like it...")
2018-09-02 19:59:11 +02:00
handle_rest(bot, update)
2018-04-26 18:03:28 +02:00
else:
if update is not None:
update.message.reply_text("Something is fucked: %s" % error)
2018-01-30 13:47:33 +01:00
def main():
2018-10-02 16:12:32 +02:00
global markov
with open("initial.txt") as f:
text = f.read()
markov = markovify.NewlineText(text.lower())
2018-10-02 16:16:30 +02:00
logger.info("Sentence of the day: " + markov.make_sentence())
2018-10-02 16:12:32 +02:00
2018-01-30 13:47:33 +01:00
updater = Updater("***REMOVED***")
dp = updater.dispatcher
dp.add_handler(CommandHandler("start", start))
dp.add_error_handler(error)
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.URL), handle_url))
2018-02-05 09:39:38 +01:00
dp.add_handler(
MessageHandler(
Filters.photo | Filters.video | Filters.video_note | Filters.audio | Filters.voice | Filters.document,
handle_rest))
2018-04-25 14:30:08 +02:00
dp.add_handler(MessageHandler(Filters.entity(MessageEntity.HASHTAG), handle_hashtag))
2018-10-02 16:12:32 +02:00
dp.add_handler(MessageHandler(Filters.text, handle_text))
2018-01-30 13:47:33 +01:00
updater.start_polling()
logger.info("Started Telegram bot...")
updater.idle()
if __name__ == '__main__':
main()