- r = list(urlparse.urlsplit(url))
- qs = urlparse.parse_qs(r[3])
- qs.update(params)
- r[3] = urllib.urlencode(qs, True)
- url = urlparse.urlunsplit(r)
- return url
-
-def convert_from_old_itag(player_config):
- url_data = urlparse.parse_qs(player_config["args"]["url_encoded_fmt_stream_map"])
- url_data["url"] = []
- for itag_url in url_data["itag"]:
- pos = itag_url.find("url=")
- url_data["url"].append(itag_url[pos+4:])
- player_config["args"]["url_encoded_fmt_stream_map"] = urllib.urlencode(url_data, True)
-
-def get_player_config(doc):
- player_config = None
- for script in doc.xpath("//script"):
- if not script.text:
- continue
- for line in script.text.split("\n"):
- if "yt.playerConfig =" in line:
- p1 = line.find("=")
- p2 = line.rfind(";")
- if p1 >= 0 and p2 > 0:
- return json.loads(line[p1+1:p2])
- if "ytplayer.config =" in line:
- p1 = line.find("ytplayer.config =")
- p2 = line.rfind(";")
- if p1 >= 0 and p2 > 0:
- return json.loads(line[p1+18:p2])
- if "'PLAYER_CONFIG': " in line:
- p1 = line.find(":")
- if p1 >= 0:
- player_config = json.loads(line[p1+1:])
- convert_from_old_itag(player_config)
- return player_config
-
-def extract_function(output, script, func_name):
- p1 = script.find("function " + func_name)
- p2 = script.find("}", p1)
- code = script[p1:p2+1]
- output.append(code)
- deps = re.findall(R"[^\.]\b([a-zA-Z]+)\(", code)
- deps = set(deps)
- deps.remove(func_name)
- for dep in deps:
- extract_function(output, script, dep)
-
-def decode_signature(js_url, s):
- script = urlopen(js_url).read()
- func_name = re.search(R"\b([a-zA-Z]+)\([a-zA-Z]+\.s\);", script).groups()[0]
-
- codes = []
- extract_function(codes, script, func_name)
-
- p = subprocess.Popen(
- "js",
- shell=True,
- close_fds=True,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE
- )
- for code in codes:
- p.stdin.write(code + "\n")
- p.stdin.write("console.log(%s('%s'));\n" % (func_name, s))
- p.stdin.close()
-
- signature = p.stdout.read().strip()
- if p.wait() != 0:
- raise Exception("js failed to execute: %d" % p.returncode)
-
- return signature
+ r = list(urllib.parse.urlsplit(url))
+ qs = urllib.parse.parse_qs(r[3])
+ qs.update(params)
+ r[3] = urllib.parse.urlencode(qs, True)
+ url = urllib.parse.urlunsplit(r)
+ return url
+
+def get_player_config(scripts):
+ player_config = None
+ for script in scripts:
+ for line in script.split("\n"):
+ s = "ytplayer.config = {"
+ if s in line:
+ p1 = line.find(s) + len(s) - 1
+ p2 = line.find("};", p1) + 1
+ if p1 >= 0 and p2 > 0:
+ return json.loads(line[p1:p2])
+
+def extract_js(script):
+ PREFIX = "var _yt_player={};(function(g){var window=this;"
+ SUFFIX = ";})(_yt_player);\n"
+ assert script.startswith(PREFIX)
+ assert script.endswith(SUFFIX)
+
+ return script[len(PREFIX):-len(SUFFIX)]
+
+def find_func_name(script):
+ FUNC_NAME = R"([a-zA-Z0-9$]+)"
+ FUNC_PARAMS = R"(\([a-zA-Z,\.]+\.s\))"
+ TERMINATOR = R"[,;\)]"
+ PATTERN = FUNC_NAME + FUNC_PARAMS + TERMINATOR
+
+ match = re.search(PATTERN, script)
+ func_name = match.groups()[0]
+ return func_name
+
+def decode_signature(js_url, signature):
+ f = urlopen(js_url)
+ script = f.read().decode("utf-8")
+ f.close()
+
+ func_name = find_func_name(script)
+
+ params = {
+ "func_name": func_name,
+ "signature": json.dumps(signature),
+ "code": json.dumps(extract_js(script)),
+ }
+ p = subprocess.Popen(
+ "node",
+ shell=True,
+ close_fds=True,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE
+ )
+ js_decode_script = ("""
+ const vm = require('vm');
+
+ const sandbox = {
+ location: {
+ hash: '',
+ href: '',
+ protocol: 'http:'
+ },
+ history: {
+ pushState: function(){}
+ },
+ document: {},
+ navigator: {
+ userAgent: ''
+ },
+ XMLHttpRequest: class XMLHttpRequest {},
+ matchMedia: () => ({matches: () => {}, media: ''}),
+ signature: %(signature)s,
+ transformed_signature: null,
+ g: function(){} // this is _yt_player
+ };
+ sandbox.window = sandbox;
+
+ const code_string = %(code)s + ';';
+ const exec_string = 'transformed_signature = %(func_name)s("", "MARKER", signature);';
+ vm.runInNewContext(code_string + exec_string, sandbox);
+
+ function findSignature(obj) {
+ if (typeof obj !== 'object') {
+ return;
+ }
+ for (const [key, value] of Object.entries(obj)) {
+ if (key === 'MARKER') {
+ return value;
+ }
+ const result = findSignature(value);
+ if (result) {
+ return result;
+ }
+ }
+ }
+ console.log(findSignature(sandbox.transformed_signature));
+ """ % params)
+
+ p.stdin.write(js_decode_script.encode("utf-8"))
+ p.stdin.close()
+
+ transformed_signature = p.stdout.read().decode("utf-8").strip()
+ transformed_signature = urllib.parse.unquote(transformed_signature)
+ if p.wait() != 0:
+ raise Exception("js failed to execute: %d" % p.returncode)
+
+ return transformed_signature