carch-web-api/run.py

"""

"""
import socketio
from yt_dlp import YoutubeDL
import json
import asyncio
import tornado
import requests
import os
import random
import uuid
import zipfile
import datetime
import sys
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
from moviepy.editor import VideoFileClip
from pygifsicle import optimize
from mutagen.easyid3 import EasyID3
import sentry_sdk
from sentry_sdk import capture_exception

# TODO: auto-reload/reload on webhook using gitpython

# README: functionality is described once per documentation in order to leave as
# little clutter as possible

conf = {}
""" Global configuration variable """

confpath = ".conf.json"
"""Local path to json config"""

if "docs" in str(os.getcwd()):
    """Check if building docs, if so, change conf path"""
    confpath = "../.conf.json"

# Load configuration at runtime
with open(confpath, "r") as f:
    conf = json.loads(f.read())

# If using bugcatcher such as Glitchtip/Sentry set it up
if conf["bugcatcher"]:
    sentry_sdk.init(conf["bugcatcherdsn"])

def dlProxies(path="proxies.txt"):
    """
    Function to download proxies from plain url to a given path. this is useful for me, but if other people need to utilize a more complex method of downloading proxies I recommend implementing it and doing a merge request
    """
    r = requests.get(conf["proxyListURL"])
    with open(path, "w") as f:
        rlist = r.text.split("\n")
        rlistfixed = []
        for p in rlist[:-1]:
            pl = p.replace("\n", "").replace("\r", "").split(":")
            proxy = "{0}:{1}@{2}:{3}".format(pl[2], pl[3], pl[0], pl[1])
            rlistfixed.append(proxy)
        f.write("\n".join(rlistfixed))
    print("Proxies refreshed!")

# If using proxy list url and there's no proxies file, download proxies at runtime
if conf["proxyListURL"] != False:
    if not os.path.exists("proxies.txt"):
        dlProxies()

def resInit(method, spinnerid):
    """
    Function to initialize response to client
    Takes method and spinnerid
    spinnerid is the id of the spinner object to remove on the ui, none is fine here
    """
    res = {
        "method": method,
        "error": True,
        "spinnerid": spinnerid
    }
    return res

# create a Socket.IO server
# This will either connect to a message queue or not depending on whether multithreading is specified
sio = ""
if "mt" in sys.argv:
    mgr = socketio.KombuManager('amqp://yda-rabbit')
    sio = socketio.AsyncServer(cors_allowed_origins=conf["allowedorigins"], async_mode="tornado", client_manager=mgr)
else:
    sio = socketio.AsyncServer(cors_allowed_origins=conf["allowedorigins"], async_mode="tornado")


@sio.event
async def toMP3(sid, data, loop=0):
    """
    Socketio event, takes the client id, a json payload and a loop count for retries
    Converts link to mp3 file
    """
    # Initialize response, if spinnerid data doesn't exist it will just set it to none
    res = resInit("toMP3", data.get("spinnerid"))
    # Try/catch loop will send error message to client on error
    try:
        # Get video url from data
        url = data["url"]
        if "list" in url:
            raise ValueError("Method is for singular videos")
        # Get information about the video via yt-dlp to make future decisions
        info = getInfo(url)
        # Return an error if the video is longer than the configured maximum video length
        if info["duration"] > conf["maxLength"]:
            raise ValueError("Video is longer than configured maximum length")
        else:
            # Get file system safe title for video    
            title = makeSafe(info["title"])
            # Download video as MP3 from given url and get the final title of the video
            ftitle = download(url, True, title, "mp3")
            # Tell the client there is no error
            res["error"] = False
            # Give the client the download link
            res["link"] = conf["url"] + "/downloads/" + ftitle + ".mp3"
            # Give the client the initial safe title just for display on the ui
            res["title"] = title
            # If there is id3 metadata apply this metadata to the file
            if data["id3"] != None:
                # We use EasyID3 here as, well, it's easy, if you need to add more fields
                # please read the mutagen documentation for this here:
                # https://mutagen.readthedocs.io/en/latest/user/id3.html
                audio = EasyID3("downloads/" + ftitle + ".mp3")
                for key, value in data["id3"].items():
                    if value != "" and value != None:
                        audio[key] = value
                audio.save()
            # Emit result to client
            await sio.emit("done", res, sid)
    except OSError as e:
        capture_exception(e)
        if loop > 0:
            capture_exception(OSError("Retry unsuccessful"))
            # Get text of error
            res["details"] = str(e)
            await sio.emit("done", res, sid)
        else:
            await toMP3(sid, data, loop=1)
    except Exception as e:
        capture_exception(e)
        # Get text of error
        res["details"] = str(e)
        await sio.emit("done", res, sid)
    
@sio.event
async def playlist(sid, data, loop=0):
    """
    Downloads playlist as a zip of MP3s
    """
    res = resInit("playlist", data.get("spinnerid"))
    try:
        purl = data["url"]
        # Get playlist info
        info = getInfo(purl)
        # Create playlist title from the file system safe title and a random uuid
        # The uuid is to prevent two users from accidentally overwriting each other's files (very unlikely due to cleanup but still possible)
        ptitle = makeSafe(info["title"]) + str(uuid.uuid4())
        # If the number of entries is larger than the configured maximum playlist length throw an error
        if len(info["entries"]) > conf["maxPlaylistLength"]:
            raise ValueError("Playlist is longer than configured maximum length")
        else:
            # Check the length of all videos in the playlist, if any are longer than the configured maximum
            # length for playlist videos throw an error
            for v in info["entries"]:
                if v["duration"] > conf["maxLengthPlaylistVideo"]:
                    raise ValueError("Video in playlist is longer than configured maximum length")
            # Iterate through all videos on the playlist, download each one as an MP3 and then write it to the playlist zip file
            for v in info["entries"]:
                #TODO: make generic
                vid = v["id"]
                vurl = "https://www.youtube.com/watch?v=" + vid
                title = makeSafe(v["title"])
                ftitle = download(vurl, True, title, "mp3")
                with zipfile.ZipFile("downloads/" + ptitle + '.zip', 'a') as myzip:
                    myzip.write("downloads/" + ftitle + ".mp3")
            res["error"] = False
            res["link"] = conf["url"] + "/downloads/" + ptitle + ".zip"
            res["title"] = title
            await sio.emit("done", res, sid)
    except OSError as e:
        capture_exception(e)
        if loop > 0:
            # Get text of error
            res["details"] = str(e)
            await sio.emit("done", res, sid)
        else:
            await playlist(sid, data, loop=1)
    except Exception as e:
        capture_exception(e)
        res["details"] = str(e)
        await sio.emit("done", res, sid)

@sio.event
async def subtitles(sid, data, loop=0):
    """
    Two step event
    1. Get list of subtitles
    2. Download chosen subtitle file
    """
    res = resInit("subtitles", data.get("spinnerid"))
    try:
        step = int(data["step"])
        url = data["url"]
        if "list" in url:
            raise ValueError("Method is for singular videos")
        # Step 1 of subtitles is to get the list of subtitles available and return them
        if step == 1:
            info = getInfo(url, getSubtitles=True)
            title = makeSafe(info["title"])
            res["error"] = False
            res["title"] = title
            # List of subtitle keys for picking subtitles
            res["select"] = list(info["subtitles"].keys())
            # Step for front end use, the value here doesn't really matter, the variable just has to exist to tell the ui to move to step 2 when the method is called again
            res["step"] = 0
            # Again details doesn't need a value it just needs to exist to let the front end know to populate the details column with a select defined by the list provided by select
            res["details"] = ""
            await sio.emit("done", res, sid)
        # Step 2 of subtitles is to download the subtitles to the server and provide that link to the user
        elif step == 2:
            # Get the selected subtitles by language code
            languageCode = data["languageCode"]
            # Check if the user wants to download autosubs
            autoSub = data["autoSub"]
            info = getInfo(url)
            title = makeSafe(info["title"])
            # Download the subtitles
            # Unfortunately at the moment this requires downloading the lowest quality stream as well, in the future some modification to yt-dlp might be necessary to avoid this
            ftitle = download(url, False, title, "subtitles", languageCode=languageCode, autoSub=autoSub)
            res["error"] = False
            res["link"] = conf["url"] + "/downloads/" + ftitle + "." + languageCode + ".vtt"
            res["title"] = title
            await sio.emit("done", res, sid)
    except OSError as e:
        capture_exception(e)
        if loop > 0:
            capture_exception(OSError("Retry unsuccessful"))
            # Get text of error
            res["details"] = str(e)
            await sio.emit("done", res, sid)
        else:
            await subtitles(sid, data, loop=1)
    except Exception as e:
        capture_exception(e)
        res["details"] = str(e)
        await sio.emit("done", res, sid)

@sio.event
async def clip(sid, data, loop=0):
    """
    Event to clip a given stream and return the clip to the user, the user can optionally convert this clip into a gif
    """
    res = resInit("clip", data.get("spinnerid"))
    try:
        url = data["url"]
        if "list" in url:
            raise ValueError("Method is for singular videos")
        info = getInfo(url)
        # Check if directURL is in the data from the client
        # directURL defines a video url to download from directly instead of through yt-dlp
        directURL = False
        if "directURL" in data.keys():
            directURL = data["directURL"]
        # Check if user wants to create a gif
        gif = False
        if "gif" in data.keys():
            gif = True
        # Get the format id the user wants for downloading a given stream from a given video
        format_id = False
        if "format_id" in data.keys():
            format_id = data["format_id"]
        if info["duration"] > conf["maxLength"]:
            raise ValueError("Video is longer than configured maximum length")
        # Get the start and end time for the clip
        timeA = int(data["timeA"])
        timeB = int(data["timeB"])
        # If we're making a gif make sure the clip is not longer than the maximum gif length
        # Please be careful with gif lengths, if you set this too high you may end up with huge gifs hogging the server
        if gif and ((timeB - timeA) > conf["maxGifLength"]):
            raise ValueError("Range is too large for gif")
        title = makeSafe(info["title"])
        # If the directURL is set download directly
        if directURL != False:
            ititle = title + "." + info["ext"]
            downloadDirect(directURL, "downloads/" + ititle)      
        # Otherwise download the video through yt-dlp
        # If there's no format id just get the default video
        else:
            if format_id != False:
                ititle = download(url, False, title, "mp4", extension=info["ext"], format_id=format_id)
            else:
                ititle = download(url, False, title, "mp4", extension=info["ext"])
        cuuid = str(uuid.uuid4())
        if gif:
            # Clip video and then convert it to a gif
            (VideoFileClip("downloads/" + ititle)).subclip(timeA, timeB).write_gif("downloads/" + title + "." + cuuid+ ".clipped.gif")
            # Optimize the gif
            optimize("downloads/" + title + ".clipped.gif")
        else:
            # Clip the video and return the mp4 of the clip
            ffmpeg_extract_subclip("downloads/" + ititle, timeA, timeB, targetname="downloads/" + title + "." + cuuid + ".clipped.mp4")
        res["error"] = False
        # Set the extension to use either to mp4 or gif depending on whether the user wanted a gif
        # The extension is just for creating the url for the clip
        extension = "mp4"
        if gif:
            extension = "gif"
        res["link"] = conf["url"] + "/downloads/" + title + "." + cuuid + ".clipped." + extension
        res["title"] = title
        await sio.emit("done", res, sid)
    except OSError as e:
        capture_exception(e)
        if loop > 0:
            capture_exception(OSError("Retry unsuccessful"))
            # Get text of error
            res["details"] = str(e)
            await sio.emit("done", res, sid)
        else:
            await clip(sid, data, loop=1)
    except Exception as e:
        capture_exception(e)
        res["details"] = str(e)
        await sio.emit("done", res, sid)

@sio.event
async def combine(sid, data, loop=0):
    """
    Combine audio and video streams
    """
    res = resInit("combine", data.get("spinnerid"))
    try:
        curl = data["url"]
        # Get video info
        info = getInfo(curl)
        # Create the video title from the file system safe title and a random uuid
        # The uuid is to prevent two users from accidentally overwriting each other's files (very unlikely due to cleanup but still possible)
        ptitle = makeSafe(info["title"]) + str(uuid.uuid4())
        # If the number of entries is larger than the configured maximum playlist length throw an error
        if "list" in curl:
            raise ValueError("This method is for a single video")
        else:
            # Check the length of the video, if it's too long throw an error
            if info["duration"] > conf["maxLength"]:
                raise ValueError("Video is longer than configured maximum length")
            title = download(curl, False, ptitle, False, extension="mp4", format_id=data["format_id"], format_id_audio=data["format_id_audio"])
            res["error"] = False
            res["link"] = conf["url"] + "/downloads/" + title
            res["title"] = ptitle
            await sio.emit("done", res, sid)
    except OSError as e:
        capture_exception(e)
        if loop > 0:
            # Get text of error
            res["details"] = str(e)
            await sio.emit("done", res, sid)
        else:
            await playlist(sid, data, loop=1)
    except Exception as e:
        capture_exception(e)
        res["details"] = str(e)
        await sio.emit("done", res, sid)

@sio.event
async def getInfoEvent(sid, data):
    """
        Generic event to get all the information provided by yt-dlp for a given url
    """
    # Unlike other events we set the method here from the passed method in order to make this generic and flexible
    res = resInit(data["method"], data.get("spinnerid"))
    try:
        url = data["url"]
        if "list" in url:
            raise ValueError("Method is for singular videos")
        info = getInfo(url)
        if data["method"] == "streams":
            res["details"] = ""
            res["select"] = ""
        title = makeSafe(info["title"])
        res["error"] = False
        res["title"] = title
        res["info"] = info
        await sio.emit("done", res, sid)
    except Exception as e:
        capture_exception(e)
        res["details"] = str(e)
        await sio.emit("done", res, sid)

@sio.event
async def limits(sid, data):
    """
    Get set limits of server for display in UI
    """
    res = resInit("limits", data.get("spinnerid"))
    try:
        limits = [
            "maxLength",
            "maxPlaylistLength",
            "maxGifLength",
            "maxGifResolution",
            "maxLengthPlaylistVideo"
        ]
        res["limits"] = [{"limitid": limit, "limitvalue": conf[limit]} for limit in limits]
        res["error"] = False
        await sio.emit("done", res, sid)
    except Exception as e:
        capture_exception(e)
        res["details"] = str(e)
        await sio.emit("done", res, sid)

def download(url, isAudio, title, codec, languageCode=None, autoSub=False, extension=False, format_id=False, format_id_audio=False):
    """
    Generic download method
    """
    # Used to avoid filename conflicts
    ukey = str(uuid.uuid4())
    # Set the location/name of the output file
    ydl_opts = {
        'outtmpl': 'downloads/' + title + "." + ukey
    }
    # Add extension to filepath if set
    if extension != False:
        ydl_opts["outtmpl"] += "." + extension
    # If this is audio setup for getting the best audio with the given codec
    if isAudio:
        ydl_opts['format'] = "bestaudio/best"
        ydl_opts['postprocessors'] = [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': codec,
            'preferredquality': '192',
        }]
    # Otherwise...
    else:
        # Check if there's a format id, if so set the download format to that format id
        if format_id != False:
            ydl_opts['format'] = format_id
            if format_id_audio != False:
                ydl_opts['format'] += "+" + format_id_audio
                print(ydl_opts['format'])
        # Otherwise if we're downloading subtitles...
        elif codec == "subtitles":
            # Set up to write the subtitles to disk
            ydl_opts["writesubtitles"] = True
            # Further settings to write subtitles
            ydl_opts['subtitle'] = '--write-sub --sub-lang ' + languageCode
            # If the user wants to download auto subtitles set the subtitle field to do so
            if autoSub:
                ydl_opts['subtitle'] = "--write-auto-sub " + ydl_opts["subtitle"]
            ydl_opts['format'] = "worst"
        # Otherwise just download the best video
        else:
            ydl_opts['format'] = "bestvideo/best"
    # If there is a proxy list url set up, set yt-dlp to use a random proxy
    if conf["proxyListURL"] != False:
        ydl_opts['proxy'] = getProxy()
    # Finally, actually download the file/s
    with YoutubeDL(ydl_opts) as ydl:
        if codec == "subtitles":
            ydl.extract_info(url, download=True)
        else:
            ydl.download([url])
    # Construct and return the filepath for the downloaded file
    res = title + "." + ukey
    if extension != False:
        res += "." + extension
    return res

def downloadDirect(url, filename):
    """
    Download file directly, with random proxy if set up
    """
    if conf["proxyListURL"] != False:
        proxies = {'https': 'https://' + getProxy()}
        with requests.get(url, proxies=proxies, stream=True) as r:
            r.raise_for_status()
            with open(filename, 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192): 
                    f.write(chunk)
    else:
        with requests.get(url, stream=True) as r:
            r.raise_for_status()
            with open(filename, 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192): 
                    f.write(chunk)


def getInfo(url, getSubtitles=False):
    """
    Generic method to get sanitized information about the given url, with a random proxy if set up
    Try to write subtitles if requested
    """
    info = {
        "writesubtitles": getSubtitles
    }
    if conf["proxyListURL"] != False:
        info['proxy'] = getProxy()
    with YoutubeDL({}) as ydl:
        info = ydl.extract_info(url, download=False)
        info = ydl.sanitize_info(info)
    return info


def makeSafe(filename):
    """
    # Make title file system safe
# https://stackoverflow.com/questions/7406102/create-sane-safe-filename-from-any-unsafe-string
    """
    return "".join([c for c in filename if c.isalpha() or c.isdigit() or c==' ']).rstrip()

def getProxy():
    """
    Get random proxy from proxy list
    """
    proxy = ""
    with open("proxies.txt", "r") as f:
        proxy = random.choice(f.read().split("\n"))
    return proxy

async def refreshProxies():
    """
    Refresh proxies every hour
    """
    while True:
        dlProxies()
        await asyncio.sleep(3600)

async def clean():
    """
    Clean all files that are older than an hour out of downloads every hour
    """
    while True:
        for f in os.listdir("downloads"):
            fmt = datetime.datetime.fromtimestamp(os.path.getmtime('downloads/' + f))
            if (datetime.datetime.now() - fmt).total_seconds() > 7200:
                os.remove("downloads/" + f)
        print("Cleaned!")
        await asyncio.sleep(3600)

def make_app():
    return tornado.web.Application([
        (r'/downloads/(.*)', tornado.web.StaticFileHandler, {'path': "./downloads"}),
        (r"/socket.io/", socketio.get_tornado_handler(sio))
    ])

async def main():
    """
    Main method
    """
    # If proxies are configured set up the refresh proxies task
    if conf["proxyListURL"] != False:
        task = asyncio.create_task(refreshProxies())
        # This is needed to get the async task running
        await asyncio.sleep(0)
    # Set up cleaning task
    task2 = asyncio.create_task(clean())
    await asyncio.sleep(0)
    # Generic tornado setup
    app = make_app()
    app.listen(8888)
    await asyncio.Event().wait()

if __name__ == "__main__":
    asyncio.run(main())