delojza/delojza.py

1063 lines
39 KiB
Python
Raw Permalink Normal View History

2018-01-31 12:32:25 +01:00
#!/usr/bin/env python3
2018-01-30 13:47:33 +01:00
import logging
import os
import pprint
2018-01-31 14:34:59 +01:00
import re
2018-01-31 14:11:42 +01:00
import shutil
import sqlite3
import subprocess
2018-01-31 12:30:08 +01:00
import sys
2019-05-03 11:41:54 +02:00
import tempfile
2019-05-23 14:36:12 +02:00
import unicodedata
2019-11-07 16:29:29 +01:00
from configparser import ConfigParser, NoSectionError
from datetime import datetime, timedelta
2018-01-31 14:23:01 +01:00
from glob import glob
from operator import itemgetter
2019-05-29 10:25:34 +02:00
from random import random
2021-09-14 16:03:55 +02:00
from sqlite3.dbapi2 import Connection
from time import sleep
2021-09-14 16:03:55 +02:00
from typing import Any, List, Optional, Tuple, cast
2018-01-30 13:47:33 +01:00
2019-05-02 19:18:35 +02:00
import acoustid
2018-01-31 14:34:59 +01:00
import filetype
2019-05-01 10:50:21 +02:00
import mutagen.id3
2018-10-28 14:00:25 +01:00
import pytumblr
2018-01-30 13:47:33 +01:00
import requests
import telegram
2018-01-30 13:47:33 +01:00
import youtube_dl
2021-09-14 16:03:55 +02:00
from _typeshed import StrPath
from mutagen import File, FileType
from mutagen.easyid3 import EasyID3
2021-09-14 16:03:55 +02:00
from telegram.ext import CommandHandler, MessageHandler, Updater
from telegram.ext.callbackcontext import CallbackContext
from telegram.ext.filters import Filters
from telegram.update import Update
2019-05-29 13:50:44 +02:00
from youtube_dl import DownloadError
from youtube_dl.version import __version__ as YTDL_VERSION
2018-01-30 13:47:33 +01:00
from markov import MarkovBlabberer
from util import datestr, mkdir_p
2018-02-02 15:28:49 +01:00
class DelojzaDB:
def __init__(self, db_path):
self.db_path = db_path
2021-09-14 16:03:55 +02:00
self.db: Optional[Connection] = None
def initialize(self):
if self.db is None:
self.db = sqlite3.connect(self.db_path)
def get_protected_tags(self):
2021-09-14 16:03:55 +02:00
if self.db is None:
raise RuntimeError("Database not initialized!")
results = self.db.execute("SELECT tag FROM tags WHERE protected == 1")
return [res[0] for res in results.fetchall()]
def get_protected_chats(self):
2021-09-14 16:03:55 +02:00
if self.db is None:
raise RuntimeError("Database not initialized!")
results = self.db.execute("SELECT id FROM chats WHERE protected == 1")
return [res[0] for res in results.fetchall()]
def get_chat(self, id):
2021-09-14 16:03:55 +02:00
if self.db is None:
raise RuntimeError("Database not initialized!")
2021-09-14 14:08:01 +02:00
return self.db.execute(
"SELECT id, protected FROM chats WHERE id == ?", (id,)
).fetchone()
def set_chat_protected(self, id: int, protected: bool):
2021-09-14 16:03:55 +02:00
if self.db is None:
raise RuntimeError("Database not initialized!")
chat_in_db = self.get_chat(id)
if chat_in_db:
2021-09-14 14:08:01 +02:00
self.db.execute(
"UPDATE chats SET protected = ? WHERE id = ?", (protected, id)
)
else:
2021-09-14 14:08:01 +02:00
self.db.execute(
"INSERT INTO chats (id, protected) VALUES (?, ?)", (id, protected)
)
self.db.commit()
2021-09-14 16:03:55 +02:00
def get_tag(self, tag: str):
if self.db is None:
raise RuntimeError("Database not initialized!")
2021-09-14 14:08:01 +02:00
return self.db.execute(
"SELECT id, tag, protected FROM tags WHERE tag == ?", (tag,)
).fetchone()
2021-09-14 16:03:55 +02:00
def set_tag_protected(self, tag: str, protected: bool):
if self.db is None:
raise RuntimeError("Database not initialized!")
tag_in_db = self.get_tag(tag)
if tag_in_db:
2021-09-14 14:08:01 +02:00
self.db.execute(
"UPDATE tags SET protected = ? WHERE tag = ?", (protected, tag)
)
else:
2021-09-14 14:08:01 +02:00
self.db.execute(
"INSERT INTO tags (tag, protected) VALUES (?, ?)", (tag, protected)
)
self.db.commit()
class DelojzaBot:
2021-09-14 14:08:01 +02:00
def __init__(
self,
2021-09-14 16:03:55 +02:00
tg_api_key: str,
out_dir: StrPath,
redirects: Optional[List[Tuple[str, str]]] = None,
tmp_dir: Optional[StrPath] = None,
db_path: Optional[StrPath] = None,
protected_password: Optional[str] = None,
acoustid_key: Optional[str] = None,
tumblr_name: Optional[str] = None,
tumblr_keys: Optional[Tuple[str, str, str, str]] = None,
markov: Optional[MarkovBlabberer] = None,
2021-09-14 14:08:01 +02:00
):
2019-08-28 11:48:03 +02:00
self._setup_logging(os.path.dirname(os.path.realpath(__file__)))
2021-09-14 14:08:01 +02:00
self.db = DelojzaDB(
db_path
or os.path.join(os.path.dirname(os.path.realpath(__file__)), "delojza.db")
)
self.out_dir = os.path.abspath(out_dir)
self.out_dir = self.out_dir[:-1] if self.out_dir[-1] == "/" else self.out_dir
2021-09-14 16:03:55 +02:00
self.logger.debug(f"OUT_DIR: {out_dir}")
2019-05-03 11:41:54 +02:00
self.tmp_dir = tmp_dir if tmp_dir else tempfile.gettempdir()
2021-09-14 16:03:55 +02:00
self.logger.debug(f"TMP_DIR: {tmp_dir}")
self.markov = markov
self.redirects = {}
if redirects is not None:
for hashtag, directory in redirects:
hashtag = hashtag.upper()
2021-09-14 16:03:55 +02:00
directory = str(directory)
directory = directory[:-1] if directory[-1] == "/" else directory
mkdir_p(directory)
self.redirects[hashtag] = directory
self.logger.debug(f"Will redirect hashtag {hashtag} to {directory}")
self.updater = Updater(tg_api_key)
dp = self.updater.dispatcher
dp.add_handler(CommandHandler("start", self.tg_start))
dp.add_error_handler(self.tg_error)
dp.add_handler(CommandHandler("stats", self.tg_stats))
dp.add_handler(CommandHandler("orphans", self.tg_orphan))
dp.add_handler(CommandHandler("orphans_full", self.tg_orphan_full))
2019-05-29 10:25:34 +02:00
dp.add_handler(CommandHandler("retag", self.tg_retag))
2019-05-02 15:53:49 +02:00
dp.add_handler(CommandHandler("delete", self.tg_delete))
dp.add_handler(CommandHandler("protect", self.tg_protect))
dp.add_handler(CommandHandler("version", self.tg_version))
2019-10-24 14:07:50 +02:00
dp.add_handler(CommandHandler("queue", self.tg_queue))
dp.add_handler(MessageHandler(Filters.all, self.tg_handle))
2019-05-02 19:18:35 +02:00
self.acoustid_key = acoustid_key
if tumblr_name and tumblr_keys:
self.tumblr_name = tumblr_name
self.tumblr_client = pytumblr.TumblrRestClient(*tumblr_keys)
2019-05-31 19:39:43 +02:00
else:
self.tumblr_client = None
self.protected_password = protected_password
self.last_downloaded = {}
self.last_hashtags = {}
2021-09-14 16:03:55 +02:00
def _setup_logging(self, log_path: StrPath):
2019-08-28 11:48:03 +02:00
self.logger = logging.getLogger("delojza")
self.logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
2021-09-14 16:03:55 +02:00
dfh = logging.FileHandler(os.path.join(log_path, "delojza.log"))
2019-08-28 11:48:03 +02:00
dfh.setLevel(logging.DEBUG)
2021-09-14 14:08:01 +02:00
formatter = logging.Formatter(
"%(asctime)s - %(name)s [%(levelname)s] %(message)s"
)
2019-08-28 11:48:03 +02:00
ch.setFormatter(formatter)
dfh.setFormatter(formatter)
self.logger.addHandler(ch)
self.logger.addHandler(dfh)
def _log_msg(self, update):
from_user = update.message.from_user
2021-09-14 14:08:01 +02:00
self.logger.debug(
f"Received from {from_user.username or (from_user.first_name + from_user.last_name)}"
f" ({update.message.chat.id}): " + (update.message.text or "<NONE>")
)
2019-08-28 11:48:03 +02:00
@staticmethod
2021-09-14 16:03:55 +02:00
def ytdl_can(url: str):
ies = youtube_dl.extractor.gen_extractors()
for ie in ies:
2021-09-14 14:08:01 +02:00
if ie.suitable(url) and ie.IE_NAME != "generic" and "/channel/" not in url:
# Site has dedicated extractor
return True
return False
2019-05-23 14:36:12 +02:00
# https://github.com/django/django/blob/master/django/utils/text.py#L393
2019-05-22 15:21:25 +02:00
@staticmethod
2021-09-14 16:03:55 +02:00
def sanitize(text: str):
if text is None:
return ""
text = (
unicodedata.normalize("NFKD", text)
2021-09-14 14:08:01 +02:00
.encode("ascii", "ignore")
.decode("ascii")
)
2021-09-14 16:03:55 +02:00
return re.sub(r"[^\w.()\[\]{}#-]", "_", text)
2019-05-22 15:21:25 +02:00
@staticmethod
2021-09-14 16:03:55 +02:00
def _get_tags(filepath: StrPath):
try:
audio = EasyID3(filepath)
2021-09-14 14:08:01 +02:00
return (
audio["artist"][0] if audio["artist"] else None,
audio["title"][0] if audio["title"] else None,
)
except mutagen.id3.ID3NoHeaderError:
return None, None
2019-05-29 10:25:34 +02:00
@staticmethod
2021-09-14 16:03:55 +02:00
def _tag_file(filepath: StrPath, artist: Optional[str], title: str):
2019-05-29 10:25:34 +02:00
try:
id3 = mutagen.id3.ID3(filepath)
except mutagen.id3.ID3NoHeaderError:
2021-09-14 16:03:55 +02:00
mutafile = cast(Optional[FileType], File(filepath))
if not mutafile:
return
2019-05-29 10:25:34 +02:00
mutafile.add_tags()
mutafile.save()
id3 = mutagen.id3.ID3(filepath)
id3.add(mutagen.id3.TIT2(encoding=3, text=title))
if artist:
id3.add(mutagen.id3.TOPE(encoding=3, text=artist))
id3.add(mutagen.id3.TPE1(encoding=3, text=artist))
id3.save()
def _autotag_file(self, filepath, message, info=None):
2019-05-02 19:18:35 +02:00
if info is None:
info = {}
2019-05-01 12:23:30 +02:00
title = None
artist = None
2019-05-02 19:18:35 +02:00
source = None
best_acoustid_score = 0
2019-05-02 19:18:35 +02:00
if self.acoustid_key:
2019-05-02 19:18:35 +02:00
try:
self.logger.debug("Requesting AcoustID for {}".format(filepath))
2021-09-14 14:08:01 +02:00
results = sorted(
acoustid.match(self.acoustid_key, filepath),
key=itemgetter(0),
reverse=True,
)
2019-05-02 19:18:35 +02:00
if len(results) > 0:
score, rid, aid_title, aid_artist = results[0]
2021-09-14 14:08:01 +02:00
if score > 0.4:
2019-05-02 19:18:35 +02:00
title = aid_title
2021-09-14 16:03:55 +02:00
artist = re.sub(r" *; +", " & ", aid_artist or "")
best_acoustid_score = score
2019-05-02 19:18:35 +02:00
source = "AcoustID ({}%)".format(round(score * 100))
except acoustid.NoBackendError:
self.logger.warning("chromaprint library/tool not found")
except acoustid.FingerprintGenerationError:
self.logger.warning("fingerprint could not be calculated")
except acoustid.WebServiceError as exc:
2021-09-14 14:08:01 +02:00
self.logger.warning(
"web service request failed: {}".format(exc.message)
)
2019-05-02 19:18:35 +02:00
2021-09-14 14:08:01 +02:00
if best_acoustid_score < 0.8:
if "track" in info:
title = info["track"]
if "artist" in info:
artist = info["artist"]
2021-09-14 14:08:01 +02:00
if "track" in info or "artist" in info:
source = "supplied metadata"
2019-05-02 19:18:35 +02:00
2021-09-14 14:08:01 +02:00
if title is None and artist is None and "-" in info.get("title", ""):
split = info["title"].split("-")
artist = split[0]
title = split[1]
source = "fallback (artist - title)"
2019-05-02 19:18:35 +02:00
2021-09-14 14:08:01 +02:00
if title is None and "title" in info:
title = info["title"]
source = "full title fallback"
2021-09-14 14:08:01 +02:00
if "soundcloud" in info.get("extractor", "") and artist is None:
artist = info["uploader"]
source = 'soundcloud "fallback"'
2019-05-01 12:23:30 +02:00
2019-05-02 19:18:35 +02:00
artist = artist.strip() if artist else None
title = title.strip() if title else None
2021-09-14 16:03:55 +02:00
if title is None:
message.reply_text("Tried tagging, found nothing :(")
return
2021-09-14 14:08:01 +02:00
message.reply_text(
'Tagging as "{}" by "{}"\nvia {}'.format(title, artist, source)
)
self.logger.info(
"Tagging {} w/ {} - {} [{}]...".format(filepath, title, artist, source)
)
2019-05-29 10:25:34 +02:00
self._tag_file(filepath, artist, title)
2019-05-02 19:18:35 +02:00
@staticmethod
2021-09-14 16:03:55 +02:00
def _get_percent_filled(directory: str):
output = subprocess.check_output(["df", directory])
2021-09-14 14:08:01 +02:00
percents_re = re.search(r"[0-9]+%", output.decode("utf-8"))
2019-07-25 11:25:35 +02:00
if not percents_re:
raise RuntimeError
return int(percents_re.group(0)[:-1])
# noinspection PyUnusedLocal
2021-09-14 16:03:55 +02:00
def download_ytdl(
self,
urls: List[str],
out_path: StrPath,
date: datetime,
message: telegram.Message,
audio: bool = False,
filetitle: Optional[str] = None,
):
2019-05-29 13:45:56 +02:00
ytdl = {
2021-09-14 14:08:01 +02:00
"noplaylist": True,
"restrictfilenames": True,
"outtmpl": os.path.join(
self.tmp_dir, "{}__%(title)s__%(id)s.%(ext)s".format(datestr(date))
),
}
2019-05-02 19:18:35 +02:00
if audio:
2021-09-14 14:08:01 +02:00
ytdl["format"] = "bestaudio/best"
ytdl["postprocessors"] = [
{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
"preferredquality": "256",
}
]
ytdl["postprocessor_args"] = ["-ar", "44100"]
2019-05-02 15:53:49 +02:00
filenames = []
2019-05-29 13:45:56 +02:00
with youtube_dl.YoutubeDL(ytdl) as ytdl:
attempts = 0
2019-05-29 13:50:44 +02:00
while True:
try:
ytdl.download(urls)
break
except DownloadError as exc:
attempts += 1
2021-09-14 14:08:01 +02:00
if "403" in str(exc) and attempts < 5:
2019-05-29 13:50:44 +02:00
self.logger.warning("Received a 403!")
sleep(1.357)
2019-05-29 13:50:44 +02:00
if self.markov:
message.reply_text(self.markov.make_sentence())
2019-05-29 13:50:44 +02:00
else:
raise exc
2019-05-29 13:45:56 +02:00
for info in [ytdl.extract_info(url, download=False) for url in urls]:
2021-09-14 16:03:55 +02:00
filename = cast(str, ytdl.prepare_filename(info))
2021-09-14 14:08:01 +02:00
globbeds = glob(os.path.splitext(filename)[0] + ".*")
for globbed in globbeds:
2019-05-01 10:50:21 +02:00
if globbed.endswith("mp3"):
2019-05-29 10:25:34 +02:00
self._autotag_file(globbed, message, info=info)
self.logger.info("Moving %s to %s..." % (globbed, out_path))
dest = shutil.move(globbed, out_path)
2019-05-02 15:53:49 +02:00
filenames.append(dest)
return filenames
2021-09-14 16:03:55 +02:00
def download_raw(
self,
urls: List[str],
out_path: StrPath,
date: datetime,
message: telegram.Message,
audio: bool = False,
filetitle: Optional[str] = None,
):
filenames = []
for url in urls:
2021-09-14 14:08:01 +02:00
local_filename = os.path.join(
out_path,
"{}__{}".format(
datestr(date), self.sanitize(filetitle or url.split("/")[-1])
),
)
2019-05-02 19:18:35 +02:00
final_filename = local_filename
is_mp3 = local_filename.endswith("mp3")
r = requests.get(url, stream=True)
2021-09-14 14:08:01 +02:00
with open(local_filename, "wb") as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
2019-05-02 19:18:35 +02:00
2021-09-14 14:08:01 +02:00
if not re.match(r".*\..{3,5}$", os.path.split(local_filename)[-1]):
kind = filetype.guess(local_filename)
if kind is None:
2021-09-14 14:08:01 +02:00
self.logger.error(
"File has no extension and could not be determined!"
)
else:
2021-09-14 14:08:01 +02:00
self.logger.info(
"Moving file without extension... %s?" % kind.extension
)
final_filename = shutil.move(
local_filename, local_filename + "." + kind.extension
)
2019-05-02 19:18:35 +02:00
is_mp3 = kind.extension == "mp3"
filenames.append(final_filename)
if audio and is_mp3:
2019-05-03 11:45:46 +02:00
try:
id3 = mutagen.id3.ID3(final_filename)
2021-09-14 14:08:01 +02:00
untagged = "TIT2" not in id3
2019-05-03 11:45:46 +02:00
except mutagen.id3.ID3NoHeaderError:
untagged = True
if untagged:
2019-05-29 10:25:34 +02:00
self._autotag_file(final_filename, message)
2019-05-02 19:18:35 +02:00
2018-10-28 14:08:57 +01:00
return filenames
2018-01-30 13:47:33 +01:00
2019-04-18 17:40:48 +02:00
@staticmethod
2021-09-14 16:03:55 +02:00
def extract_hashtags(message: telegram.Message):
2021-09-14 14:08:01 +02:00
hashtags = list(
map(
message.parse_entity,
list(filter(lambda e: e.type == "hashtag", message.entities)),
)
)
hashtags += list(
map(
message.parse_caption_entity,
list(filter(lambda e: e.type == "hashtag", message.caption_entities)),
)
)
2019-04-18 17:40:48 +02:00
if len(hashtags) > 0:
hashtags = [hashtag[1:].upper() for hashtag in hashtags]
for i, hashtag in enumerate(hashtags):
if "PRAS" in hashtag:
hashtags[i] = "PRAS"
return hashtags
2021-09-14 16:03:55 +02:00
def _get_hashtags(self, message: telegram.Message):
hashtags = self.extract_hashtags(message)
if len(hashtags) == 0 and self.last_hashtags.get(message.chat.id) is not None:
user, ts, last_hashtags = self.last_hashtags[message.chat.id]
if user == message.from_user and ts > datetime.now() - timedelta(hours=1):
2019-05-19 11:11:27 +02:00
hashtags = last_hashtags
return hashtags
2021-09-14 16:03:55 +02:00
def handle_text(self, message: telegram.Message, hashtags: List[str]):
2021-09-14 14:08:01 +02:00
if len(hashtags) == 0 or hashtags[0] not in ("TEXT", "TXT"):
return
2021-09-14 14:08:01 +02:00
info_line = self.sanitize(
"-".join(re.sub(r"#[\w]+", "", message.text).strip().split()[:7])
)
if len(info_line) > 64:
info_line = info_line[:64]
2021-09-14 14:08:01 +02:00
filename = "{}__{}.txt".format(datestr(message.date), info_line)
out_dir = self.redirects.get(hashtags[0], self.out_dir)
2021-09-14 14:08:01 +02:00
out_path = os.path.join(out_dir, *hashtags[1:] or ["TEXT"])
file_path = os.path.join(out_path, filename)
mkdir_p(out_path)
2021-09-14 14:08:01 +02:00
with open(file_path, "w") as out_file:
out_file.write(message.text)
2021-09-14 14:08:01 +02:00
message.reply_text(
'Saved text to "{}"...'.format(
os.path.join(*hashtags[1:] or ["TEXT"], filename)
)
)
# noinspection PyBroadException
2021-09-14 16:03:55 +02:00
def handle(
self,
urls: List[str],
message: telegram.Message,
hashtags: List[str],
download_fn: Any,
filetitle=None,
):
self.db.initialize()
try:
if len(hashtags) == 0:
self.logger.info("Ignoring %s due to no hashtag present..." % urls)
return False
original_hashtags = hashtags
if hashtags[0] in self.redirects:
out_dir = self.redirects[hashtags[0]]
hashtags = hashtags[1:]
else:
out_dir = self.out_dir
2021-09-14 14:08:01 +02:00
if any(
hashtag in self.db.get_protected_tags() for hashtag in original_hashtags
):
if message.chat.id not in self.db.get_protected_chats():
2021-09-14 14:08:01 +02:00
self.logger.info(
"Redirecting {} in chat {} due to protected hashtags: {}...".format(
urls, message.chat.title, hashtags
)
)
hashtags.insert(0, "PUBLIC")
2019-05-18 14:28:23 +02:00
for i in range(len(hashtags)):
2021-09-14 14:08:01 +02:00
current_path = hashtags[: i + 1]
if not os.path.isdir(os.path.join(out_dir, *current_path)):
test_path = current_path
test_path[-1] = "_" + test_path[-1]
if os.path.isdir(os.path.join(out_dir, *test_path)):
2021-09-14 14:08:01 +02:00
self.logger.debug(
f"Rerouting {current_path[-1]} to _{test_path[-1]}"
)
hashtags[i] = test_path[-1]
self.last_hashtags[message.chat.id] = None
2021-09-14 14:08:01 +02:00
self.logger.info(
"Downloading %s into '%s' (%s)"
% (urls, "/".join(original_hashtags), out_dir)
)
out_path = os.path.join(out_dir, *hashtags)
mkdir_p(out_path)
reply = 'Downloading to "{}"...'.format("/".join(original_hashtags))
2021-09-14 14:08:01 +02:00
audio = any(
[
any([tag in hashtag for tag in ("AUDIO", "RADIO")])
for hashtag in original_hashtags
]
)
2019-05-02 19:26:39 +02:00
if audio and download_fn != self.download_raw:
2021-09-14 14:08:01 +02:00
reply += " (And also guessing you want to extract the audio)"
message.reply_text(reply)
2019-04-18 12:54:10 +02:00
2021-09-14 14:08:01 +02:00
filenames = download_fn(
urls, out_path, message.date, message, audio=audio, filetitle=filetitle
)
cmd_hashtag = original_hashtags[0]
tumblr_ids = []
2021-09-14 14:08:01 +02:00
if cmd_hashtag in ("TUMBLR", "TUMBLR_NOW") and self.tumblr_client:
now = cmd_hashtag == "TUMBLR_NOW"
reply = "(btw, {})".format(
"***FIRING TO TUMBLR RIGHT AWAY***" if now else "queueing to tumblr"
)
message.reply_text(reply, parse_mode=telegram.ParseMode.MARKDOWN)
for filename in filenames:
if filename.endswith(".mp4"):
try:
2021-09-14 14:08:01 +02:00
output_filename = filename[: -len(".mp4")] + ".gif"
subprocess.check_output(
["ffmpeg", "-i", filename, output_filename]
)
filename = output_filename
except subprocess.CalledProcessError:
2021-09-14 14:08:01 +02:00
message.reply_text(
"Conversion to gif failed, sorry! Check log..."
)
continue
2021-09-14 14:08:01 +02:00
response = self.tumblr_client.create_photo(
self.tumblr_name,
data=filename,
state="published" if now else "queue",
)
if "id" in response:
tumblr_ids.append(response["id"])
else:
2021-09-14 14:08:01 +02:00
self.logger.warning(
"Did not receive 'id' in tumblr response: \n"
+ pprint.pformat(response)
)
message.reply_text(
"Something weird happened with the tumblrs, check it!"
)
self.last_downloaded[message.chat.id] = (
filenames,
original_hashtags,
tumblr_ids,
)
return True
except:
2019-05-15 11:28:36 +02:00
exc_type, exc_value, __ = sys.exc_info()
if "Timed out" not in str(exc_value):
2021-09-14 14:08:01 +02:00
message.reply_text(
"Something is FUCKED: [{}] {}".format(exc_type, exc_value)
)
return False
2019-04-18 17:40:48 +02:00
def handle_tg_message(self, message, bot, hashtag):
2019-05-22 15:21:25 +02:00
file, filetitle, tumblr = None, None, False
2019-04-18 17:40:48 +02:00
if len(message.photo) > 0:
photo = max(message.photo, key=lambda p: p.width)
file = photo.file_id
2019-04-18 17:40:48 +02:00
elif message.document is not None:
2019-05-22 15:21:25 +02:00
filetitle = message.document.file_name
2019-04-18 17:40:48 +02:00
file = message.document.file_id
elif message.audio is not None:
2019-05-22 15:21:25 +02:00
filetitle = message.audio.title
2019-04-18 17:40:48 +02:00
file = message.audio.file_id
elif message.video is not None:
file = message.video.file_id
elif message.video_note is not None:
file = message.video_note.file_id
elif message.voice is not None:
file = message.voice.file_id
if file is not None:
url = bot.getFile(file).file_path
2021-09-14 14:08:01 +02:00
return self.handle(
[url], message, hashtag, self.download_raw, filetitle=filetitle
)
else:
return False
2021-09-14 16:03:55 +02:00
def handle_urls(self, message: telegram.Message, hashtags: List[str]):
2021-09-14 14:08:01 +02:00
urls = list(
map(
lambda e: message.parse_entity(e),
filter(lambda e: e.type == "url", message.entities),
)
)
ytdl_res = False
ytdl_urls = [url for url in urls if self.ytdl_can(url)]
if len(ytdl_urls) > 0:
ytdl_res = self.handle(ytdl_urls, message, hashtags, self.download_ytdl)
raw_res = False
normal_urls = [url for url in urls if not self.ytdl_can(url)]
if len(normal_urls) > 0:
2021-09-14 14:08:01 +02:00
file_urls = [
url
for url in normal_urls
if "text" not in requests.head(url).headers.get("Content-Type", "text")
]
if len(file_urls) > 0:
raw_res = self.handle(file_urls, message, hashtags, self.download_raw)
return ytdl_res or raw_res
def tg_handle(self, update: Update, context: CallbackContext):
2019-08-28 11:48:03 +02:00
self._log_msg(update)
hashtags = self._get_hashtags(update.message)
if hashtags:
2021-09-14 14:08:01 +02:00
url_res = self.handle_urls(
update.message, self._get_hashtags(update.message)
)
if url_res:
return
2021-09-14 14:08:01 +02:00
msg_res = self.handle_tg_message(
update.message, context.bot, self._get_hashtags(update.message)
2021-09-14 14:08:01 +02:00
)
if msg_res:
return
hashtags = self.extract_hashtags(update.message)
if len(hashtags) > 0:
2021-09-14 14:08:01 +02:00
self.handle_text(
update.message.reply_to_message or update.message, hashtags
)
if update.message.reply_to_message:
self.handle_tg_message(update.message.reply_to_message, context.bot, hashtags)
self.handle_urls(update.message.reply_to_message, hashtags)
else:
2021-09-14 14:08:01 +02:00
self.last_hashtags[update.message.chat.id] = (
update.message.from_user,
datetime.now(),
hashtags,
)
else:
2019-07-03 16:26:05 +02:00
if self.markov and update.message.text:
self.markov.add_to_corpus(update.message.text)
def _get_tag_dirs(self):
2021-09-14 14:08:01 +02:00
return (
list(
filter(
lambda x: x.upper() == x,
filter(
lambda directory: os.path.isdir(
os.path.join(self.out_dir, directory)
),
os.listdir(self.out_dir),
),
)
)
+ list(self.redirects.keys())
)
def tg_stats(self, update: Update, context: CallbackContext):
2019-08-28 11:48:03 +02:00
self._log_msg(update)
self.db.initialize()
if update.message.chat.id not in self.db.get_protected_chats():
2021-09-14 14:08:01 +02:00
update.message.reply_text(
(self.markov.make_sentence() + "!")
if self.markov and random() > 0.7
else "nope."
)
return
tag_dirs = self._get_tag_dirs()
reply = "Total number of tags: {}\n\n".format(len(tag_dirs))
2021-09-14 14:08:01 +02:00
counts = [
(directory, os.listdir(os.path.join(self.out_dir, directory)))
for directory in tag_dirs
] # TODO REDIRECTS
counts.sort(key=itemgetter(0))
counts.sort(key=lambda x: len(x[1]), reverse=True)
for directory, files in counts:
if len(files) == 1:
break
2021-09-14 14:08:01 +02:00
abs_paths = [
os.path.join(self.out_dir, directory, file) for file in files
] # TODO REDIRECTS
abs_files = list(filter(os.path.isfile, abs_paths))
2019-05-01 13:39:49 +02:00
# mimes = [magic.from_file(path, mime=True).split("/")[0] for path in abs_files]
# mime_counts = [(mime, mimes.count(mime)) for mime in set(mimes)]
2021-09-14 14:08:01 +02:00
exts = [
ext[1:]
for ext in [os.path.splitext(path)[1] for path in abs_files]
if len(ext) > 0
]
2019-05-01 13:39:49 +02:00
ext_counts = [(ext, exts.count(ext)) for ext in set(exts)]
dir_cnt = len(abs_paths) - len(abs_files)
2021-09-14 14:08:01 +02:00
type_counts = ext_counts + (
[("directorie", dir_cnt)] if dir_cnt > 0 else []
)
details = ", ".join(
[
"{} {}s".format(cnt, mime)
for mime, cnt in sorted(
type_counts, key=itemgetter(1), reverse=True
)
]
)
2019-05-01 13:39:49 +02:00
if len(type_counts) == 1:
reply += "<b>{}:</b> {}\n".format(directory, details)
else:
2021-09-14 14:08:01 +02:00
reply += "<b>{}:</b> {} files ({})\n".format(
directory, len(files), details
)
orphans = list(filter(lambda cnt: len(cnt[1]) <= 1, counts))
if len(orphans) > 0:
2021-09-14 14:08:01 +02:00
reply += "\nFollowing tags are orphans: " + ", ".join(
map(itemgetter(0), orphans)
)
update.message.reply_text(reply, parse_mode=telegram.ParseMode.HTML)
def _get_orphan_tags(self):
result = []
for directory in self._get_tag_dirs():
files = os.listdir(os.path.join(self.out_dir, directory))
if len(files) == 1:
result.append((directory, files[0]))
if len(files) == 0:
result.append((directory, "NO FILE AT ALL..."))
return sorted(result, key=itemgetter(0))
def tg_orphan(self, update: Update, context: CallbackContext):
2019-08-28 11:48:03 +02:00
self._log_msg(update)
self.db.initialize()
if update.message.chat.id not in self.db.get_protected_chats():
2021-09-14 14:08:01 +02:00
update.message.reply_text(
(self.markov.make_sentence() + "!")
if self.markov and random() > 0.7
else "nope."
)
return
orphans = self._get_orphan_tags()
if len(orphans) == 0:
update.message.reply_text("Good job, no orphan tags!")
else:
2021-09-14 14:08:01 +02:00
update.message.reply_text(
"The following tags only contain a single file:\n"
+ ", ".join(map(itemgetter(0), orphans))
)
def tg_orphan_full(self, _, update):
2019-08-28 11:48:03 +02:00
self._log_msg(update)
self.db.initialize()
if update.message.chat.id not in self.db.get_protected_chats():
2021-09-14 14:08:01 +02:00
update.message.reply_text(
(self.markov.make_sentence() + "!")
if self.markov and random() > 0.7
else "nope."
)
return
orphans = self._get_orphan_tags()
if len(orphans) == 0:
update.message.reply_text("Good job, no orphan tags!")
else:
tmp_reply = "The following tags only contain a single file:\n"
for directory, file in orphans:
line = "{}: {}\n".format(directory, file)
if len(tmp_reply + line) > 4096:
update.message.reply_text(tmp_reply)
tmp_reply = ""
tmp_reply += line
if len(tmp_reply) > 0:
update.message.reply_text(tmp_reply)
def tg_retag(self, update: Update, context: CallbackContext):
2019-08-28 11:48:03 +02:00
self._log_msg(update)
if self.last_downloaded.get(update.message.chat.id) is not None:
files, hashtags, tumblr_ids = self.last_downloaded[update.message.chat.id]
out_dir = self.redirects.get(hashtags[0], self.out_dir)
mp3s = [filename for filename in files if filename.endswith("mp3")]
if len(mp3s) > 0:
2021-09-14 14:08:01 +02:00
arg_raw = re.sub(r"^/[@\w]+ ?", "", update.message.text).strip()
artist, title = None, None
reverse = len(arg_raw) == 0
if not reverse:
tagline = arg_raw.split(" - ")
if len(tagline) == 1:
title = tagline[0].strip()
else:
artist = tagline[0].strip()
title = tagline[1].strip()
for mp3 in mp3s:
if reverse:
orig_artist, orig_title = self._get_tags(mp3)
title, artist = orig_artist, orig_title
2021-09-14 16:03:55 +02:00
self._tag_file(mp3, artist, cast(str, title))
2021-09-14 14:08:01 +02:00
update.message.reply_text(
'Tagging "{}" as "{}" by "{}"!'.format(
mp3[len(out_dir) + 1 :], title, artist
)
)
else:
2021-09-14 14:08:01 +02:00
update.message.reply_text(
(self.markov.make_sentence() if self.markov and random() > 0.7 else "")
+ "???"
)
2019-05-29 10:25:34 +02:00
def tg_delete(self, update: Update, context: CallbackContext):
2019-08-28 11:48:03 +02:00
self._log_msg(update)
if self.last_downloaded.get(update.message.chat.id) is not None:
files, hashtags, tumblr_ids = self.last_downloaded[update.message.chat.id]
out_dir = self.redirects.get(hashtags[0], self.out_dir)
for file in files:
2021-09-14 14:08:01 +02:00
update.message.reply_text(
'Removing "{}"!'.format(file[len(out_dir) + 1 :])
)
os.remove(file)
parent_dir = os.path.dirname(file)
while True:
if len(os.listdir(parent_dir)) == 0:
2021-09-14 14:08:01 +02:00
update.message.reply_text(
'Removing directory "{}" as it\'s empty...'.format(
parent_dir[len(out_dir) + 1 :]
)
)
os.rmdir(parent_dir)
if parent_dir == out_dir:
break
parent_dir = os.path.dirname(parent_dir)
if len(tumblr_ids) > 0:
2021-09-14 14:08:01 +02:00
plural = (
"s (all {} of them)".format(len(tumblr_ids))
if len(tumblr_ids) > 1
else ""
)
update.message.reply_text("Also deleting tumblr post{}!".format(plural))
for tumblr_id in tumblr_ids:
if self.tumblr_client:
self.tumblr_client.delete_post(self.tumblr_name, tumblr_id)
self.last_downloaded[update.message.chat.id] = None
return
update.message.reply_text("Nothing to remove!")
2019-05-02 15:53:49 +02:00
def tg_protect(self, update: Update, context: CallbackContext):
2019-08-28 11:48:03 +02:00
self._log_msg(update)
self.db.initialize()
msg_split = update.message.text.split(" ")
if len(msg_split) != 3:
2021-09-14 14:08:01 +02:00
update.message.reply_text(
(self.markov.make_sentence() if self.markov and random() > 0.7 else "")
+ "???"
)
return
chat_in_db = self.db.get_chat(update.message.chat.id)
cmd = msg_split[1]
2021-09-14 14:08:01 +02:00
if cmd == "tag":
if chat_in_db and chat_in_db[1]:
tag = msg_split[2].upper()
tag_in_db = self.db.get_tag(tag)
if tag_in_db:
_, _, protected = tag_in_db
end_protected = not protected
else:
end_protected = True
self.db.set_tag_protected(tag, end_protected)
2021-09-14 14:08:01 +02:00
update.message.reply_text(
f"got it, will {'NOT ' if not end_protected else ''}protect tag {tag}!"
)
else:
2021-09-14 14:08:01 +02:00
update.message.reply_text(
(
self.markov.make_sentence()
if self.markov and random() > 0.7
else "hublubl"
)
)
elif cmd == "chat":
password = msg_split[2]
if password == self.protected_password:
if chat_in_db:
_, protected = chat_in_db
end_protected = not protected
else:
end_protected = True
self.db.set_chat_protected(update.message.chat.id, end_protected)
2021-09-14 14:08:01 +02:00
update.message.reply_text(
f"got it, will {'NOT ' if not end_protected else ''}protect this chat!"
)
else:
2021-09-14 14:08:01 +02:00
update.message.reply_text(
(
self.markov.make_sentence()
if self.markov and random() > 0.7
else "hublubl"
)
)
else:
2021-09-14 14:08:01 +02:00
update.message.reply_text(
(self.markov.make_sentence() if self.markov and random() > 0.7 else "")
+ "???"
)
def tg_queue(self, update: Update, context: CallbackContext):
2019-10-24 14:07:50 +02:00
if self.tumblr_client:
blog_info = self.tumblr_client.blog_info(self.tumblr_name)
2021-09-14 14:08:01 +02:00
update.message.reply_text(
"Currently queued tumblr posts: "
+ str(blog_info["blog"].get("queue", "???"))
)
2019-10-24 14:07:50 +02:00
else:
2021-09-14 14:08:01 +02:00
update.message.reply_text(
(self.markov.make_sentence() if self.markov and random() > 0.7 else "")
+ "???"
)
2019-10-24 14:07:50 +02:00
# noinspection PyMethodMayBeStatic
def tg_version(self, update: Update, context: CallbackContext):
2019-08-28 11:48:03 +02:00
self._log_msg(update)
2021-09-14 14:08:01 +02:00
delojza_date = datetime.fromtimestamp(
os.path.getmtime(os.path.realpath(__file__))
).strftime("%Y/%m/%d - %H:%M:%S")
update.message.reply_text(
"delojza modified date: {}\nyoutube-dl version: {}".format(
delojza_date, YTDL_VERSION
)
)
def tg_start(self, update: Update, context: CallbackContext):
2019-08-28 11:48:03 +02:00
self._log_msg(update)
2021-09-14 14:08:01 +02:00
update.message.reply_text(
self.markov.make_sentence() if self.markov else "HELLO"
)
def tg_error(self, update: object, context: CallbackContext):
self.logger.error(context.error)
if isinstance(update, Update):
update.message.reply_text(f"Something is fucked: {context.error}")
2018-01-30 13:47:33 +01:00
def run_idle(self):
self.updater.start_polling()
self.logger.info("Started Telegram bot...")
self.updater.idle()
2018-01-30 13:47:33 +01:00
2021-09-14 14:08:01 +02:00
if __name__ == "__main__":
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
2018-01-30 13:47:33 +01:00
_DIR_ = os.path.dirname(os.path.realpath(__file__))
2021-09-14 14:08:01 +02:00
CONFIG_PATHS = [
"/etc/delojza/delojza.ini",
os.path.join(os.getenv("HOME") or "", ".config/delojza/delojza.ini"),
os.path.join(_DIR_, "delojza.ini"),
]
2018-01-30 13:47:33 +01:00
config = ConfigParser()
try:
2021-09-14 14:08:01 +02:00
CONF_FILE = next(
conf_path for conf_path in CONFIG_PATHS if os.path.isfile(conf_path)
)
config.read(CONF_FILE)
except StopIteration:
logging.error("No config file found, quitting.")
sys.exit(-1)
2019-05-02 19:26:25 +02:00
try:
markov = MarkovBlabberer("initial.txt")
except FileNotFoundError:
2021-09-14 14:08:01 +02:00
logging.warning(
"Didn't find `initial.txt`, continuing without markov blabbering!"
)
2019-05-02 19:26:25 +02:00
markov = None
2019-11-07 16:29:29 +01:00
try:
2021-09-14 16:03:55 +02:00
redirects: Optional[List[Tuple[str, str]]] = config.items("redirects")
2019-11-07 16:29:29 +01:00
except NoSectionError:
2021-09-14 16:03:55 +02:00
redirects = None
try:
tumblr_keys = (
config.get("tumblr", "consumer_key"),
config.get("tumblr", "consumer_secret"),
config.get("tumblr", "oauth_key"),
config.get("tumblr", "oauth_secret"),
)
except (NoSectionError, KeyError):
tumblr_keys = None
2019-11-07 16:29:29 +01:00
2021-09-14 14:08:01 +02:00
delojza = DelojzaBot(
config.get("delojza", "tg_api_key"),
config.get("delojza", "OUT_DIR", fallback=os.path.join(_DIR_, "out")),
tmp_dir=config.get("delojza", "tmp_dir", fallback=tempfile.gettempdir()),
redirects=redirects,
protected_password=config.get("delojza", "protected_password", fallback=None),
acoustid_key=config.get("delojza", "acoustid_api_key", fallback=None),
tumblr_name=config.get("tumblr", "blog_name", fallback=None),
2021-09-14 16:03:55 +02:00
tumblr_keys=tumblr_keys,
2021-09-14 14:08:01 +02:00
markov=markov,
)
delojza.run_idle()