better extract_js for decoding signatures
authorJames Bunton <jamesbunton@delx.net.au>
Thu, 17 Jul 2014 10:36:35 +0000 (20:36 +1000)
committerJames Bunton <jamesbunton@delx.net.au>
Thu, 17 Jul 2014 10:36:35 +0000 (20:36 +1000)
youtube.cgi

index 11d2538..d6273ba 100755 (executable)
@@ -35,6 +35,14 @@ QUALITIES = {
     "small": 1,
 }
 
+JS_BROWSER_STUB = """
+var window={};
+var document={};
+window.location={};
+var navigator={};
+"""
+
+
 
 class VideoUnavailable(Exception):
     pass
@@ -131,16 +139,13 @@ def get_player_config(doc):
                 if p1 >= 0 and p2 > 0:
                     return json.loads(line[p1:p2])
 
-def extract_function(output, script, func_name):
-    p1 = script.find("function " + func_name + "(")
-    p2 = script.find("}", p1)
-    code = script[p1:p2+1]
-    output.append(code)
-    deps = re.findall(R"[^\.][= ]([\$0-9a-zA-Z]+)\(", code)
-    deps = set(deps)
-    deps.remove(func_name)
-    for dep in deps:
-        extract_function(output, script, dep)
+def extract_js(script):
+    PREFIX = "(function(){"
+    SUFFIX = "})();\n"
+    assert script.startswith(PREFIX)
+    assert script.endswith(SUFFIX)
+
+    return script[len(PREFIX):-len(SUFFIX)]
 
 def find_func_name(script):
     FUNC_NAME = R"([a-zA-Z0-9$]+)"
@@ -155,9 +160,6 @@ def decode_signature(js_url, s):
     script = urlopen(js_url).read()
     func_name = find_func_name(script)
 
-    codes = []
-    extract_function(codes, script, func_name)
-
     p = subprocess.Popen(
         "js",
         shell=True,
@@ -165,9 +167,10 @@ def decode_signature(js_url, s):
         stdin=subprocess.PIPE,
         stdout=subprocess.PIPE
     )
-    for code in codes:
-        p.stdin.write(code + "\n")
-    p.stdin.write("console.log(%s('%s'));\n" % (func_name, s))
+    w = p.stdin.write
+    w(JS_BROWSER_STUB)
+    w(extract_js(script))
+    w("console.log(%s('%s'));\n" % (func_name, s))
     p.stdin.close()
 
     signature = p.stdout.read().strip()