delojza/robot.py

#!/usr/bin/env python3
import errno
import logging
import os
import re
import shutil
import sys
from glob import glob

import filetype
import requests
import youtube_dl
from telegram import MessageEntity
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters

DIR = os.path.dirname(os.path.realpath(__file__))
TMP_DIR = '/tmp'
OUT_DIR = DIR + '/out'

logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("kunsax")


def datestr(date):
    return date.strftime("%Y-%m-%d@%H%M")


def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise


def ytdl_has(url):
    ies = youtube_dl.extractor.gen_extractors()
    for ie in ies:
        if ie.suitable(url) and ie.IE_NAME != 'generic' \
                and '/channel/' not in url:
            # Site has dedicated extractor
            return True
    return False


def download_ydl(urls, subdir, date, extract=False):
    ydl_opts = {
        'noplaylist': True,
        'restrictfilenames': True,
        'download_archive': DIR + '/downloaded.lst',
        'outtmpl': f'{TMP_DIR}/' + datestr(date) + '__%(title)s__%(id)s.%(ext)s'
    }
    if extract:
        ydl_opts['format'] = 'bestaudio'
        ydl_opts['postprocessors'] = [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav'
        }]
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        ydl.download(urls)
        out_dir = f'{OUT_DIR}/{subdir}/'
        for filename in map(ydl.prepare_filename, map(ydl.extract_info, urls)):
            globbeds = glob(os.path.splitext(filename)[0] + '.*')
            for globbed in globbeds:
                logger.info("Moving %s to %s..." % (globbed, out_dir))
                shutil.move(globbed, out_dir)


def download_raw(url, subdir, date):
    local_filename = f"{OUT_DIR}/{subdir}/" + "%s__%s" % (datestr(date), url.split('/')[-1])
    # local_filename = OUT_DIR + '/' + ("%s/" % subdir) if subdir else '' + datestr(date) + '__' + url.split('/')[-1]
    r = requests.get(url, stream=True)
    with open(local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
    if not re.match(r'.*\..{3,5}$', os.path.split(local_filename)[-1]):
        kind = filetype.guess(local_filename)
        if kind is None:
            logger.error("File has no extension and could not be determined!")
        else:
            logger.info('Moving file without extension... %s?' % kind.extension)
            shutil.move(local_filename, local_filename + '.' + kind.extension)


def get_first_hashtag(message):
    hashtags = list(filter(lambda e: e.type == 'hashtag', message.entities))
    if len(hashtags) == 0:
        return None
    hashtag = message.parse_entity(hashtags[0])
    return hashtag[1:].upper()


# noinspection PyBroadException
def handle_url(bot, update):
    ytdl_urls = list(filter(ytdl_has,
                            map(lambda e: update.message.parse_entity(e),
                                filter(lambda e: e.type == 'url',
                                       update.message.entities))))
    if len(ytdl_urls) > 0:
        try:
            logger.info("Downloading %s" % ytdl_urls)
            hashtag = get_first_hashtag(update.message)

            reply = 'Downloading'
            if hashtag:
                mkdir_p(f'{OUT_DIR}/{hashtag}')
                reply += f' to "{hashtag}"'
            reply += '...'

            if hashtag == 'AUDIO':
                reply += ' (And also guessing you want to extract the audio)'
            update.message.reply_text(reply)
            download_ydl(ytdl_urls, hashtag or '.', update.message.date, extract=hashtag == 'AUDIO')
        except:
            type, value, _ = sys.exc_info()
            update.message.reply_text("Something is FUCKED: %s, %s" % (type, value))


# noinspection PyBroadException
def handle_rest(bot, update):
    file = None
    if len(update.message.photo) > 0:
        photo = max(update.message.photo, key=lambda p: p.width)
        file = photo.file_id
    elif update.message.document is not None:
        file = update.message.document.file_id
    elif update.message.audio is not None:
        file = update.message.audio.file_id
    elif update.message.video is not None:
        file = update.message.video.file_id
    if file is not None:
        try:
            url = bot.getFile(file).file_path
            logger.info("Downloading %s" % url)
            hashtag = get_first_hashtag(update.message)
            if hashtag:
                mkdir_p(f'{OUT_DIR}/{hashtag}')
            update.message.reply_text('Downloading%s...' % f' to "{hashtag}"' if hashtag else '')
            download_raw(url, hashtag or '.', update.message.date)
        except:
            type, value, _ = sys.exc_info()
            update.message.reply_text("Something is FUCKED: %s, %s" % (type, value))


def start(bot, update):
    update.message.reply_text('WOOP WOOP')


def error(bot, update, error):
    logger.error(error)
    if update is not None:
        update.message.reply_text("Something is fucked: %s" % error)


def main():
    updater = Updater("***REMOVED***")

    dp = updater.dispatcher

    dp.add_handler(CommandHandler("start", start))

    dp.add_error_handler(error)

    dp.add_handler(MessageHandler(Filters.entity(MessageEntity.URL), handle_url))
    dp.add_handler(MessageHandler(Filters.photo | Filters.video | Filters.audio | Filters.document, handle_rest))

    updater.start_polling()

    logger.info("Started Telegram bot...")

    updater.idle()


if __name__ == '__main__':
    main()