X-Git-Url: https://code.delx.au/youtube-cgi/blobdiff_plain/2ce47440a9dfc3dddf1da04ea9fecb0588e80bc1..88e031fe0052c3be591bf129b778698de4fceba5:/youtube.cgi diff --git a/youtube.cgi b/youtube.cgi index c9937b7..0aadef3 100755 --- a/youtube.cgi +++ b/youtube.cgi @@ -15,7 +15,7 @@ import urllib.parse import urllib.request -USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0" +USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:82.0) Gecko/20100101 Firefox/82.0" MIMETYPES = { "video/mp4": "mp4", @@ -108,7 +108,8 @@ def urlopen(url, offset=None): def validate_url(url): parsed_url = urllib.parse.urlparse(url) scheme_ok = parsed_url.scheme == "https" - host_ok = parsed_url.netloc.lstrip("www.") in ["youtube.com", "youtu.be"] + host = parsed_url.netloc.lstrip("www.").lstrip("m.") + host_ok = host in ["youtube.com", "youtu.be"] if scheme_ok and host_ok: return @@ -130,15 +131,20 @@ def append_to_qs(url, params): return url def get_player_config(scripts): - player_config = None + config_strings = [ + ("ytplayer.config = {", 1, "};", 1), + ("ytcfg.set({\"", 2, "});", 1), + ] + player_config = {} for script in scripts: for line in script.split("\n"): - s = "ytplayer.config = {" - if s in line: - p1 = line.find(s) + len(s) - 1 - p2 = line.find("};", p1) + 1 - if p1 >= 0 and p2 > 0: - return json.loads(line[p1:p2]) + for s1, off1, s2, off2 in config_strings: + if s1 in line: + p1 = line.find(s1) + len(s1) - off1 + p2 = line.find(s2, p1) + off2 + if p1 >= 0 and p2 > 0: + player_config.update(json.loads(line[p1:p2])) + return player_config def extract_js(script): PREFIX = "var _yt_player={};(function(g){var window=this;" @@ -148,7 +154,7 @@ def extract_js(script): return script[len(PREFIX):-len(SUFFIX)] -def find_func_name(script): +def find_cipher_func(script): FUNC_NAME = R"([a-zA-Z0-9$]+)" DECODE_URI_COMPONENT = R"(\(decodeURIComponent)?" FUNC_PARAMS = R"(\([a-zA-Z,\.]+\.s\))" @@ -159,16 +165,33 @@ def find_func_name(script): func_name = match.groups()[0] return func_name -def decode_signature(js_url, signature): +def find_url_func(script): + FUNC_NAME = R"([a-zA-Z0-9$]+)" + PATTERN = R"this\.url\s*=\s*" + FUNC_NAME + R"\s*\(\s*this\s*\)" + + match = re.search(PATTERN, script) + func_name = match.groups()[0] + return func_name + +def decode_cipher_url(js_url, cipher): + cipher = urllib.parse.parse_qs(cipher) + args = [ + cipher["url"][0], + cipher["sp"][0], + cipher["s"][0], + ] + f = urlopen(js_url) script = f.read().decode("utf-8") f.close() - func_name = find_func_name(script) + cipher_func_name = find_cipher_func(script) + url_func_name = find_url_func(script) params = { - "func_name": func_name, - "signature": json.dumps(signature), + "cipher_func_name": cipher_func_name, + "url_func_name": url_func_name, + "args": json.dumps(args), "code": json.dumps(extract_js(script)), } p = subprocess.Popen( @@ -181,47 +204,49 @@ def decode_signature(js_url, signature): js_decode_script = (""" const vm = require('vm'); - const sandbox = { - location: { - hash: '', - href: '', - protocol: 'http:' - }, - history: { - pushState: function(){} - }, - document: {}, - navigator: { - userAgent: '' - }, - XMLHttpRequest: class XMLHttpRequest {}, - matchMedia: () => ({matches: () => {}, media: ''}), - signature: %(signature)s, - transformed_signature: null, - g: function(){} // this is _yt_player + const fakeGlobal = {}; + fakeGlobal.window = fakeGlobal; + fakeGlobal.location = { + hash: '', + host: 'www.youtube.com', + hostname: 'www.youtube.com', + href: 'https://www.youtube.com', + origin: 'https://www.youtube.com', + pathname: '/', + protocol: 'https:' + }; + fakeGlobal.history = { + pushState: function(){} + }; + fakeGlobal.document = { + location: fakeGlobal.location }; - sandbox.window = sandbox; + fakeGlobal.document = {}; + fakeGlobal.navigator = { + userAgent: '' + }; + fakeGlobal.XMLHttpRequest = class XMLHttpRequest {}; + fakeGlobal.matchMedia = () => ({matches: () => {}, media: ''}); + fakeGlobal.result_url = null; + fakeGlobal.g = function(){}; // this is _yt_player const code_string = %(code)s + ';'; - const exec_string = 'transformed_signature = %(func_name)s(signature);'; - vm.runInNewContext(code_string + exec_string, sandbox); + const exec_string = 'result_url = %(url_func_name)s(%(cipher_func_name)s(...%(args)s));'; + vm.runInNewContext(code_string + exec_string, fakeGlobal); - console.log(sandbox.transformed_signature); + console.log(fakeGlobal.result_url); """ % params) p.stdin.write(js_decode_script.encode("utf-8")) p.stdin.close() - transformed_signature = p.stdout.read().decode("utf-8").strip() - transformed_signature = urllib.parse.unquote(transformed_signature) + result_url = p.stdout.read().decode("utf-8").strip() if p.wait() != 0: raise Exception("js failed to execute: %d" % p.returncode) - return transformed_signature + return result_url def get_best_video(player_config): - js_url = player_config["assets"]["js"] - player_args = player_config["args"] player_response = json.loads(player_args["player_response"]) formats = player_response["streamingData"]["formats"] @@ -244,15 +269,9 @@ def get_best_video(player_config): if best_quality is not None and quality < best_quality: continue - if "cipher" in format_data: - cipher = urllib.parse.parse_qs(format_data["cipher"]) - video_url = cipher["url"][0] - if "sig" in cipher: - signature = cipher["sig"][0] - elif "s" in cipher: - signature = decode_signature(js_url, cipher["s"][0]) - sp = cipher.get("sp", ["signature"])[0] - video_url = append_to_qs(video_url, {sp: signature}) + if "signatureCipher" in format_data: + js_url = player_config["PLAYER_JS_URL"] + video_url = decode_cipher_url(js_url, format_data["signatureCipher"]) else: video_url = format_data["url"]