From 82fdab23ffe472c92e879cbb4325919e2aa7ea4d Mon Sep 17 00:00:00 2001 From: James Bunton Date: Thu, 17 Jul 2014 20:36:35 +1000 Subject: [PATCH] better extract_js for decoding signatures --- youtube.cgi | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/youtube.cgi b/youtube.cgi index 11d2538..d6273ba 100755 --- a/youtube.cgi +++ b/youtube.cgi @@ -35,6 +35,14 @@ QUALITIES = { "small": 1, } +JS_BROWSER_STUB = """ +var window={}; +var document={}; +window.location={}; +var navigator={}; +""" + + class VideoUnavailable(Exception): pass @@ -131,16 +139,13 @@ def get_player_config(doc): if p1 >= 0 and p2 > 0: return json.loads(line[p1:p2]) -def extract_function(output, script, func_name): - p1 = script.find("function " + func_name + "(") - p2 = script.find("}", p1) - code = script[p1:p2+1] - output.append(code) - deps = re.findall(R"[^\.][= ]([\$0-9a-zA-Z]+)\(", code) - deps = set(deps) - deps.remove(func_name) - for dep in deps: - extract_function(output, script, dep) +def extract_js(script): + PREFIX = "(function(){" + SUFFIX = "})();\n" + assert script.startswith(PREFIX) + assert script.endswith(SUFFIX) + + return script[len(PREFIX):-len(SUFFIX)] def find_func_name(script): FUNC_NAME = R"([a-zA-Z0-9$]+)" @@ -155,9 +160,6 @@ def decode_signature(js_url, s): script = urlopen(js_url).read() func_name = find_func_name(script) - codes = [] - extract_function(codes, script, func_name) - p = subprocess.Popen( "js", shell=True, @@ -165,9 +167,10 @@ def decode_signature(js_url, s): stdin=subprocess.PIPE, stdout=subprocess.PIPE ) - for code in codes: - p.stdin.write(code + "\n") - p.stdin.write("console.log(%s('%s'));\n" % (func_name, s)) + w = p.stdin.write + w(JS_BROWSER_STUB) + w(extract_js(script)) + w("console.log(%s('%s'));\n" % (func_name, s)) p.stdin.close() signature = p.stdout.read().strip() -- 2.39.2