X-Git-Url: https://code.delx.au/youtube-cgi/blobdiff_plain/7697735a88f01ad5d61f50663e4221e75fafc3ca..8bffa10d220eb18e9394a29d73d054dc395f6379:/youtube.cgi diff --git a/youtube.cgi b/youtube.cgi index c0b5e99..9e14fd1 100755 --- a/youtube.cgi +++ b/youtube.cgi @@ -15,8 +15,7 @@ import urllib.parse import urllib.request -MAX_MEMORY_BYTES = 128 * 1024*1024 -USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1" +USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0" MIMETYPES = { "video/mp4": "mp4", @@ -36,8 +35,11 @@ QUALITIES = { class VideoUnavailable(Exception): pass +class NotYouTube(Exception): + pass + def print_form(url="", msg=""): - script_url = "http://%s%s" % (os.environ["HTTP_HOST"], os.environ["REQUEST_URI"]) + script_url = "https://%s%s" % (os.environ["HTTP_HOST"], os.environ["REQUEST_URI"]) sys.stdout.write("Content-Type: text/html\r\n\r\n") sys.stdout.write(""" @@ -103,6 +105,16 @@ def urlopen(url, offset=None): assert start == offset return res +def validate_url(url): + parsed_url = urllib.parse.urlparse(url) + scheme_ok = parsed_url.scheme == "https" + host_ok = parsed_url.netloc.lstrip("www.") in ["youtube.com", "youtu.be"] + + if scheme_ok and host_ok: + return + else: + raise NotYouTube() + def parse_url(url, parser): f = urlopen(url) parser.feed(f.read().decode("utf-8")) @@ -138,7 +150,7 @@ def extract_js(script): def find_func_name(script): FUNC_NAME = R"([a-zA-Z0-9$]+)" - FUNC_PARAMS = R"(\([a-zA-Z]+\.s\))" + FUNC_PARAMS = R"(\([a-zA-Z,\.]+\.s\))" TERMINATOR = R"[,;\)]" PATTERN = FUNC_NAME + FUNC_PARAMS + TERMINATOR @@ -166,9 +178,9 @@ def decode_signature(js_url, signature): stdout=subprocess.PIPE ) js_decode_script = (""" - var vm = require('vm'); + const vm = require('vm'); - var sandbox = { + const sandbox = { location: { hash: '', href: '', @@ -181,23 +193,40 @@ def decode_signature(js_url, signature): navigator: { userAgent: '' }, + XMLHttpRequest: class XMLHttpRequest {}, + matchMedia: () => ({matches: () => {}, media: ''}), signature: %(signature)s, transformed_signature: null, g: function(){} // this is _yt_player }; sandbox.window = sandbox; - var code_string = %(code)s + ';'; - var exec_string = 'transformed_signature = %(func_name)s(signature);'; + const code_string = %(code)s + ';'; + const exec_string = 'transformed_signature = %(func_name)s("", "MARKER", signature);'; vm.runInNewContext(code_string + exec_string, sandbox); - console.log(sandbox.transformed_signature); + function findSignature(obj) { + if (typeof obj !== 'object') { + return; + } + for (const [key, value] of Object.entries(obj)) { + if (key === 'MARKER') { + return value; + } + const result = findSignature(value); + if (result) { + return result; + } + } + } + console.log(findSignature(sandbox.transformed_signature)); """ % params) p.stdin.write(js_decode_script.encode("utf-8")) p.stdin.close() transformed_signature = p.stdout.read().decode("utf-8").strip() + transformed_signature = urllib.parse.unquote(transformed_signature) if p.wait() != 0: raise Exception("js failed to execute: %d" % p.returncode) @@ -237,7 +266,8 @@ def get_best_video(player_config): signature = None if signature: - video_url = append_to_qs(video_url, {"signature": signature}) + sp = url_data.get("sp", ["signature"])[0] + video_url = append_to_qs(video_url, {sp: signature}) best_url = video_url best_quality = quality @@ -262,7 +292,7 @@ def get_video_url(page): if not video_url: return None, None - filename = sanitize_filename(page.title) + filename = sanitize_filename(player_config["args"]["title"]) filename += "." + extension return video_url, filename @@ -270,13 +300,11 @@ def get_video_url(page): class YouTubeVideoPageParser(html.parser.HTMLParser): def __init__(self): super().__init__() - self.title = None self.unavailable_message = None self.scripts = [] def handle_starttag(self, tag, attrs): attrs = dict(attrs) - self._handle_title(tag, attrs) self._handle_unavailable_message(tag, attrs) self._handle_script(tag, attrs) @@ -286,13 +314,6 @@ class YouTubeVideoPageParser(html.parser.HTMLParser): def _ignore_data(self, _): pass - def _handle_title(self, tag, attrs): - if tag == "title": - self.handle_data = self._handle_title_data - - def _handle_title_data(self, data): - self.title = data.strip() - def _handle_unavailable_message(self, tag, attrs): if attrs.get("id", None) == "unavailable-message": self.handle_data = self._handle_unavailable_message_data @@ -327,11 +348,12 @@ def cgimain(): try: url = args["url"][0] except: - print_form(url="http://www.youtube.com/watch?v=FOOBAR") + print_form(url="https://www.youtube.com/watch?v=FOOBAR") return try: page = YouTubeVideoPageParser() + validate_url(url) parse_url(url, page) video_url, filename = get_video_url(page) video_data = urlopen(video_url) @@ -340,10 +362,15 @@ def cgimain(): url=url, msg="

Sorry, there was an error: %s

" % cgi.escape(e.args[0]) ) + except NotYouTube: + print_form( + url=url, + msg="

Sorry, that does not look like a YouTube page!

" + ) except Exception as e: print_form( url=url, - msg="

Sorry, there was an error. Check your URL?

" + msg="

Sorry, there was an unknown error.

" ) return @@ -395,7 +422,7 @@ def main(): try: url = sys.argv[1] except: - print("Usage: %s http://youtube.com/watch?v=FOOBAR" % sys.argv[0], file=sys.stderr) + print("Usage: %s https://youtube.com/watch?v=FOOBAR" % sys.argv[0], file=sys.stderr) sys.exit(1) page = YouTubeVideoPageParser()