+def append_to_qs(url, params):
+ r = list(urllib.parse.urlsplit(url))
+ qs = urllib.parse.parse_qs(r[3])
+ qs.update(params)
+ r[3] = urllib.parse.urlencode(qs, True)
+ url = urllib.parse.urlunsplit(r)
+ return url
+
+def get_player_config(scripts):
+ player_config = None
+ for script in scripts:
+ for line in script.split("\n"):
+ s = "ytplayer.config = {"
+ if s in line:
+ p1 = line.find(s) + len(s) - 1
+ p2 = line.find("};", p1) + 1
+ if p1 >= 0 and p2 > 0:
+ return json.loads(line[p1:p2])
+
+def extract_js(script):
+ PREFIX = "var _yt_player={};(function(g){var window=this;"
+ SUFFIX = ";})(_yt_player);\n"
+ assert script.startswith(PREFIX)
+ assert script.endswith(SUFFIX)
+
+ return script[len(PREFIX):-len(SUFFIX)]
+
+def find_cipher_func(script):
+ FUNC_NAME = R"([a-zA-Z0-9$]+)"
+ DECODE_URI_COMPONENT = R"(\(decodeURIComponent)?"
+ FUNC_PARAMS = R"(\([a-zA-Z,\.]+\.s\))"
+ TERMINATOR = R"[,;\)]"
+ PATTERN = FUNC_NAME + DECODE_URI_COMPONENT + FUNC_PARAMS + TERMINATOR
+
+ match = re.search(PATTERN, script)
+ func_name = match.groups()[0]
+ return func_name
+
+def find_url_func(script):
+ FUNC_NAME = R"([a-zA-Z0-9$]+)"
+ PATTERN = R"this\.url\s*=\s*" + FUNC_NAME + R"\s*\(\s*this\s*\)"
+
+ match = re.search(PATTERN, script)
+ func_name = match.groups()[0]
+ return func_name
+
+def decode_cipher_url(js_url, cipher):
+ cipher = urllib.parse.parse_qs(cipher)
+ args = [
+ cipher["url"][0],
+ cipher["sp"][0],
+ cipher["s"][0],
+ ]
+
+ f = urlopen(js_url)
+ script = f.read().decode("utf-8")
+ f.close()
+
+ cipher_func_name = find_cipher_func(script)
+ url_func_name = find_url_func(script)
+
+ params = {
+ "cipher_func_name": cipher_func_name,
+ "url_func_name": url_func_name,
+ "args": json.dumps(args),
+ "code": json.dumps(extract_js(script)),
+ }
+ p = subprocess.Popen(
+ "node",
+ shell=True,
+ close_fds=True,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE
+ )
+ js_decode_script = ("""
+ const vm = require('vm');
+
+ const fakeGlobal = {};
+ fakeGlobal.window = fakeGlobal;
+ fakeGlobal.location = {
+ hash: '',
+ host: 'www.youtube.com',
+ hostname: 'www.youtube.com',
+ href: 'https://www.youtube.com',
+ origin: 'https://www.youtube.com',
+ pathname: '/',
+ protocol: 'https:'
+ };
+ fakeGlobal.history = {
+ pushState: function(){}
+ };
+ fakeGlobal.document = {
+ location: fakeGlobal.location
+ };
+ fakeGlobal.document = {};
+ fakeGlobal.navigator = {
+ userAgent: ''
+ };
+ fakeGlobal.XMLHttpRequest = class XMLHttpRequest {};
+ fakeGlobal.matchMedia = () => ({matches: () => {}, media: ''});
+ fakeGlobal.result_url = null;
+ fakeGlobal.g = function(){}; // this is _yt_player
+
+ const code_string = %(code)s + ';';
+ const exec_string = 'result_url = %(url_func_name)s(%(cipher_func_name)s(...%(args)s));';
+ vm.runInNewContext(code_string + exec_string, fakeGlobal);
+
+ console.log(fakeGlobal.result_url);
+ """ % params)
+
+ p.stdin.write(js_decode_script.encode("utf-8"))
+ p.stdin.close()
+
+ result_url = p.stdout.read().decode("utf-8").strip()
+ if p.wait() != 0:
+ raise Exception("js failed to execute: %d" % p.returncode)
+
+ return result_url