- r = list(urlparse.urlsplit(url))
- qs = urlparse.parse_qs(r[3])
- qs.update(params)
- r[3] = urllib.urlencode(qs, True)
- url = urlparse.urlunsplit(r)
- return url
-
-def convert_from_old_itag(player_config):
- url_data = urlparse.parse_qs(player_config["args"]["url_encoded_fmt_stream_map"])
- url_data["url"] = []
- for itag_url in url_data["itag"]:
- pos = itag_url.find("url=")
- url_data["url"].append(itag_url[pos+4:])
- player_config["args"]["url_encoded_fmt_stream_map"] = urllib.urlencode(url_data, True)
-
-def get_player_config(doc):
- player_config = None
- for script in doc.xpath("//script"):
- if not script.text:
- continue
- for line in script.text.split("\n"):
- if "yt.playerConfig =" in line:
- p1 = line.find("=")
- p2 = line.rfind(";")
- if p1 >= 0 and p2 > 0:
- return json.loads(line[p1+1:p2])
- if "'PLAYER_CONFIG': " in line:
- p1 = line.find(":")
- if p1 >= 0:
- player_config = json.loads(line[p1+1:])
- convert_from_old_itag(player_config)
- return player_config
+ r = list(urllib.parse.urlsplit(url))
+ qs = urllib.parse.parse_qs(r[3])
+ qs.update(params)
+ r[3] = urllib.parse.urlencode(qs, True)
+ url = urllib.parse.urlunsplit(r)
+ return url
+
+def get_player_config(scripts):
+ config_strings = [
+ ("ytplayer.config = {", 1, "};", 1),
+ ("ytcfg.set({\"", 2, "});", 1),
+ ]
+ player_config = {}
+ for script in scripts:
+ for line in script.split("\n"):
+ for s1, off1, s2, off2 in config_strings:
+ if s1 in line:
+ p1 = line.find(s1) + len(s1) - off1
+ p2 = line.find(s2, p1) + off2
+ if p1 >= 0 and p2 > 0:
+ player_config.update(json.loads(line[p1:p2]))
+ return player_config
+
+def extract_js(script):
+ PREFIX = "var _yt_player={};(function(g){var window=this;"
+ SUFFIX = ";})(_yt_player);\n"
+ assert script.startswith(PREFIX)
+ assert script.endswith(SUFFIX)
+
+ return script[len(PREFIX):-len(SUFFIX)]
+
+def find_cipher_func(script):
+ FUNC_NAME = R"([a-zA-Z0-9$]+)"
+ DECODE_URI_COMPONENT = R"(\(decodeURIComponent)?"
+ FUNC_PARAMS = R"(\([a-zA-Z,\.]+\.s\))"
+ TERMINATOR = R"[,;\)]"
+ PATTERN = FUNC_NAME + DECODE_URI_COMPONENT + FUNC_PARAMS + TERMINATOR
+
+ match = re.search(PATTERN, script)
+ func_name = match.groups()[0]
+ return func_name
+
+def find_url_func(script):
+ FUNC_NAME = R"([a-zA-Z0-9$]+)"
+ PATTERN = R"this\.url\s*=\s*" + FUNC_NAME + R"\s*\(\s*this\s*\)"
+
+ match = re.search(PATTERN, script)
+ func_name = match.groups()[0]
+ return func_name
+
+def decode_cipher_url(js_url, cipher):
+ cipher = urllib.parse.parse_qs(cipher)
+ args = [
+ cipher["url"][0],
+ cipher["sp"][0],
+ cipher["s"][0],
+ ]
+
+ f = urlopen(js_url)
+ script = f.read().decode("utf-8")
+ f.close()
+
+ cipher_func_name = find_cipher_func(script)
+ url_func_name = find_url_func(script)
+
+ params = {
+ "cipher_func_name": cipher_func_name,
+ "url_func_name": url_func_name,
+ "args": json.dumps(args),
+ "code": json.dumps(extract_js(script)),
+ }
+ p = subprocess.Popen(
+ "node",
+ shell=True,
+ close_fds=True,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE
+ )
+ js_decode_script = ("""
+ const vm = require('vm');
+
+ const fakeGlobal = {};
+ fakeGlobal.window = fakeGlobal;
+ fakeGlobal.location = {
+ hash: '',
+ host: 'www.youtube.com',
+ hostname: 'www.youtube.com',
+ href: 'https://www.youtube.com',
+ origin: 'https://www.youtube.com',
+ pathname: '/',
+ protocol: 'https:'
+ };
+ fakeGlobal.history = {
+ pushState: function(){}
+ };
+ fakeGlobal.document = {
+ location: fakeGlobal.location
+ };
+ fakeGlobal.document = {};
+ fakeGlobal.navigator = {
+ userAgent: ''
+ };
+ fakeGlobal.XMLHttpRequest = class XMLHttpRequest {};
+ fakeGlobal.matchMedia = () => ({matches: () => {}, media: ''});
+ fakeGlobal.result_url = null;
+ fakeGlobal.g = function(){}; // this is _yt_player
+
+ const code_string = %(code)s + ';';
+ const exec_string = 'result_url = %(url_func_name)s(%(cipher_func_name)s(...%(args)s));';
+ vm.runInNewContext(code_string + exec_string, fakeGlobal);
+
+ console.log(fakeGlobal.result_url);
+ """ % params)
+
+ p.stdin.write(js_decode_script.encode("utf-8"))
+ p.stdin.close()
+
+ result_url = p.stdout.read().decode("utf-8").strip()
+ if p.wait() != 0:
+ raise Exception("js failed to execute: %d" % p.returncode)
+
+ return result_url