X-Git-Url: https://code.delx.au/youtube-cgi/blobdiff_plain/47a28dce9c9d3fa5d8c328cd33987f3419c54865..65a17baddbcccc5cd4d4234cdfde5803e60a0899:/youtube.cgi

diff --git a/youtube.cgi b/youtube.cgi
index 2546a14..56b8937 100755
--- a/youtube.cgi
+++ b/youtube.cgi
@@ -1,16 +1,18 @@
-#!/usr/bin/env python
+#!/usr/bin/python2
+
+from __future__ import division
 
 import cookielib
 import cgi
-import itertools
 import json
-from lxml.html import document_fromstring, tostring
+from lxml import html
 import os
 import re
 import resource
 import shutil
 import subprocess
 import sys
+import time
 import urllib
 import urllib2
 import urlparse
@@ -26,6 +28,8 @@ MIMETYPES = {
 }
 
 QUALITIES = {
+	"hd1080": 5,
+	"hd720": 4,
 	"large": 3,
 	"medium": 2,
 	"small": 1,
@@ -37,8 +41,8 @@ class VideoUnavailable(Exception):
 
 def print_form(url="", msg=""):
 	script_url = "http://%s%s" % (os.environ["HTTP_HOST"], os.environ["REQUEST_URI"])
-	print "Content-Type: application/xhtml+xml\r\n\r\n"
-	print """
+	sys.stdout.write("Content-Type: application/xhtml+xml\r\n\r\n")
+	sys.stdout.write("""
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
@@ -68,24 +72,40 @@ def print_form(url="", msg=""):
 	browser's bookmarks menu to download the video straight away.</p>
 </body>
 </html>
-""".replace("{0}", msg).replace("{1}", url).replace("{2}", script_url)
+""".replace("{0}", msg).replace("{1}", url).replace("{2}", script_url))
 
 cookiejar = cookielib.CookieJar()
 urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
 referrer = ""
 
-def urlopen(url):
+def urlopen(url, offset=None):
+	if url.startswith("//"):
+		url = "http:" + url
+
 	global referrer
 	req = urllib2.Request(url)
 	if referrer:
 		req.add_header("Referer", referrer)
 	referrer = url
+
 	req.add_header("User-Agent", USER_AGENT)
-	return urlopener.open(req)
+
+	if offset:
+		req.add_header("Range", "bytes=%d-" % offset)
+
+	res = urlopener.open(req)
+
+	content_range = res.info().getheader("Content-Range")
+	if content_range:
+		tokens = content_range.split()
+		assert tokens[0] == "bytes"
+		start = int(tokens[1].split("-")[0])
+		assert start == offset
+	return res
 
 def parse_url(url):
 	f = urlopen(url)
-	doc = document_fromstring(f.read())
+	doc = html.parse(f, html.HTMLParser(encoding="utf-8", recover=True))
 	f.close()
 	return doc
 
@@ -97,58 +117,105 @@ def append_to_qs(url, params):
 	url = urlparse.urlunsplit(r)
 	return url
 
-def convert_from_old_itag(player_config):
-	url_data = urlparse.parse_qs(player_config["args"]["url_encoded_fmt_stream_map"])
-	url_data["url"] = []
-	for itag_url in url_data["itag"]:
-		pos = itag_url.find("url=")
-		url_data["url"].append(itag_url[pos+4:])
-	player_config["args"]["url_encoded_fmt_stream_map"] = urllib.urlencode(url_data, True)
-
 def get_player_config(doc):
 	player_config = None
 	for script in doc.xpath("//script"):
 		if not script.text:
 			continue
 		for line in script.text.split("\n"):
-			if "yt.playerConfig =" in line:
-				p1 = line.find("=")
-				p2 = line.rfind(";")
+			s = "ytplayer.config = {"
+			if s in line:
+				p1 = line.find(s) + len(s) - 1
+				p2 = line.find("};", p1) + 1
 				if p1 >= 0 and p2 > 0:
-					return json.loads(line[p1+1:p2])
-			if "'PLAYER_CONFIG': " in line:
-				p1 = line.find(":")
-				if p1 >= 0:
-					player_config = json.loads(line[p1+1:])
-					convert_from_old_itag(player_config)
-					return player_config
+					return json.loads(line[p1:p2])
 
-def get_best_video(player_config):
-	url_data = urlparse.parse_qs(player_config["args"]["url_encoded_fmt_stream_map"])
-	url_data = itertools.izip_longest(
-		url_data["url"],
-		url_data["type"],
-		url_data["quality"],
-		url_data.get("sig", []),
+def extract_function(output, script, func_name):
+	p1 = script.find("function " + func_name + "(")
+	p2 = script.find("}", p1)
+	code = script[p1:p2+1]
+	output.append(code)
+	deps = re.findall(R"[^\.][= ]([\$0-9a-zA-Z]+)\(", code)
+	deps = set(deps)
+	deps.remove(func_name)
+	for dep in deps:
+		extract_function(output, script, dep)
+
+def decode_signature(js_url, s):
+	script = urlopen(js_url).read()
+	func_name = re.search(R"\b([a-zA-Z]+)\([a-zA-Z]+\.s\);", script).groups()[0]
+
+	codes = []
+	extract_function(codes, script, func_name)
+
+	p = subprocess.Popen(
+		"js",
+		shell=True,
+		close_fds=True,
+		stdin=subprocess.PIPE,
+		stdout=subprocess.PIPE
 	)
+	for code in codes:
+		p.stdin.write(code + "\n")
+	p.stdin.write("console.log(%s('%s'));\n" % (func_name, s))
+	p.stdin.close()
+
+	signature = p.stdout.read().strip()
+	if p.wait() != 0:
+		raise Exception("js failed to execute: %d" % p.returncode)
+
+	return signature
+
+def get_best_video(player_config):
+	url_data_list = player_config["args"]["url_encoded_fmt_stream_map"].split(",")
+	js_url = player_config["assets"]["js"]
+
 	best_url = None
 	best_quality = None
 	best_extension = None
-	for video_url, mimetype, quality, signature in url_data:
-		mimetype = mimetype.split(";")[0]
+	for url_data in url_data_list:
+		url_data = urlparse.parse_qs(url_data)
+		mimetype = url_data["type"][0].split(";")[0]
+		quality = url_data["quality"][0]
+
+		if url_data.has_key("stereo3d"):
+			continue
+		if quality not in QUALITIES:
+			continue
 		if mimetype not in MIMETYPES:
 			continue
-		extension = "." + MIMETYPES[mimetype]
-		quality = QUALITIES.get(quality.split(",")[0], -1)
-		if best_quality is None or quality > best_quality:
-			if signature:
-				video_url = append_to_qs(video_url, {"signature": signature})
-			best_url = video_url
-			best_quality = quality
-			best_extension = extension
+
+		extension = MIMETYPES[mimetype]
+		quality = QUALITIES.get(quality, -1)
+
+		if best_quality is not None and quality < best_quality:
+			continue
+
+		video_url = url_data["url"][0]
+		if "sig" in url_data:
+			signature = url_data["sig"][0]
+		elif "s" in url_data:
+			signature = decode_signature(js_url, url_data["s"][0])
+		else:
+			signature = None
+
+		if signature:
+			video_url = append_to_qs(video_url, {"signature": signature})
+
+		best_url = video_url
+		best_quality = quality
+		best_extension = extension
 
 	return best_url, best_extension
 
+def sanitize_filename(filename):
+	return (
+		re.sub("\s+", " ", filename.strip())
+		.replace("\\", "-")
+		.replace("/", "-")
+		.replace("\0", " ")
+	)
+
 def get_video_url(doc):
 	unavailable = doc.xpath("//div[@id='unavailable-message']/text()")
 	if unavailable:
@@ -163,13 +230,20 @@ def get_video_url(doc):
 		return None, None
 
 	title = doc.xpath("/html/head/title/text()")[0]
-	title = re.sub("\s+", " ", title.strip())
-	valid_chars = frozenset("-_.() abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
-	filename = "".join(c for c in title.encode("ascii", "ignore") if c in valid_chars)
-	filename += extension
+	filename = sanitize_filename(title)
+	filename += "." + extension
 
 	return video_url, filename
 
+def write_video(filename, video_data):
+	httpinfo = video_data.info()
+	encoded_filename = urllib.quote(filename.encode("utf-8"))
+	sys.stdout.write("Content-Disposition: attachment; filename*=UTF-8''%s\r\n" % encoded_filename)
+	sys.stdout.write("Content-Length: %s\r\n" % httpinfo.getheader("Content-Length"))
+	sys.stdout.write("\r\n")
+	shutil.copyfileobj(video_data, sys.stdout)
+	video_data.close()
+
 def cgimain():
 	args = cgi.parse()
 	try:
@@ -181,13 +255,8 @@ def cgimain():
 	try:
 		doc = parse_url(url)
 		video_url, filename = get_video_url(doc)
-		data = urlopen(video_url)
-		httpinfo = data.info()
-		sys.stdout.write("Content-Disposition: attachment; filename=\"%s\"\r\n" % filename)
-		sys.stdout.write("Content-Length: %s\r\n" % httpinfo.getheader("Content-Length"))
-		sys.stdout.write("\r\n")
-		shutil.copyfileobj(data, sys.stdout)
-		data.close()
+		video_data = urlopen(video_url)
+		write_video(filename, video_data)
 	except VideoUnavailable, e:
 		print_form(
 			url=url,
@@ -200,25 +269,105 @@ def cgimain():
 		)
 		return
 
+def pp_size(size):
+	suffixes = ["", "KiB", "MiB", "GiB"]
+	for i, suffix in enumerate(suffixes):
+		if size < 1024:
+			break
+		size /= 1024
+	return "%.2f %s" % (size, suffix)
+
+def copy_with_progress(content_length, infile, outfile):
+	def print_status():
+		rate = 0
+		if now != last_ts:
+			rate = last_bytes_read / (now - last_ts)
+		sys.stdout.write("\33[2K\r")
+		sys.stdout.write("%s / %s (%s/sec)" % (
+			pp_size(bytes_read),
+			pp_size(content_length),
+			pp_size(rate),
+		))
+		sys.stdout.flush()
+
+	last_ts = 0
+	last_bytes_read = 0
+	bytes_read = 0
+	while True:
+		now = time.time()
+		if now - last_ts > 0.5:
+			print_status()
+			last_ts = now
+			last_bytes_read = 0
+
+		buf = infile.read(32768)
+		if not buf:
+			break
+		outfile.write(buf)
+		last_bytes_read += len(buf)
+		bytes_read += len(buf)
+
+	# Newline at the end
+	print_status()
+	print
+
 def main():
 	try:
 		url = sys.argv[1]
 	except:
 		print >>sys.stderr, "Usage: %s http://youtube.com/watch?v=FOOBAR" % sys.argv[0]
 		sys.exit(1)
+
 	doc = parse_url(url)
 	video_url, filename = get_video_url(doc)
-	data = urlopen(video_url)
-	outfile = open(filename, "w")
-	shutil.copyfileobj(data, outfile)
-	data.close()
+	print "Downloading", filename.encode("utf-8")
+
+	outfile = open(filename, "a")
+	offset = outfile.tell()
+	if offset > 0:
+		print "Resuming download from", pp_size(offset)
+	total_size = None
+
+	while True:
+		try:
+			video_data = urlopen(video_url, offset)
+		except urllib2.HTTPError, e:
+			if e.code == 416:
+				print "File is complete!"
+				break
+			else:
+				raise
+
+		content_length = int(video_data.info().getheader("Content-Length"))
+		if total_size is None:
+			total_size = content_length
+
+		try:
+			copy_with_progress(content_length, video_data, outfile)
+		except IOError, e:
+			print
+
+		video_data.close()
+		if outfile.tell() != total_size:
+			old_offset = offset
+			offset = outfile.tell()
+			if old_offset == offset:
+				time.sleep(1)
+			print "Restarting download from", pp_size(offset)
+		else:
+			break
+
 	outfile.close()
 
 
 if __name__ == "__main__":
-	resource.setrlimit(resource.RLIMIT_AS, (MAX_MEMORY_BYTES, MAX_MEMORY_BYTES))
+###	resource.setrlimit(resource.RLIMIT_AS, (MAX_MEMORY_BYTES, MAX_MEMORY_BYTES))
 	if os.environ.has_key("SCRIPT_NAME"):
 		cgimain()
 	else:
-		main()
+		try:
+			main()
+		except KeyboardInterrupt:
+			print "\nExiting..."
+			sys.exit(1)