]> code.delx.au - youtube-cgi/blobdiff - youtube.cgi
fixed parsing of video url data
[youtube-cgi] / youtube.cgi
index 293db8ab74d013e7306048bdd48c6eb56aca50af..a54fa911845109b04e39612ce2ff0716b3070f3b 100755 (executable)
@@ -1,8 +1,9 @@
 #!/usr/bin/env python
 
+from __future__ import division
+
 import cookielib
 import cgi
-import itertools
 import json
 from lxml import html
 import os
@@ -11,6 +12,7 @@ import resource
 import shutil
 import subprocess
 import sys
+import time
 import urllib
 import urllib2
 import urlparse
@@ -26,6 +28,8 @@ MIMETYPES = {
 }
 
 QUALITIES = {
+       "hd1080": 5,
+       "hd720": 4,
        "large": 3,
        "medium": 2,
        "small": 1,
@@ -74,14 +78,27 @@ cookiejar = cookielib.CookieJar()
 urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
 referrer = ""
 
-def urlopen(url):
+def urlopen(url, offset=None):
        global referrer
        req = urllib2.Request(url)
        if referrer:
                req.add_header("Referer", referrer)
        referrer = url
+
        req.add_header("User-Agent", USER_AGENT)
-       return urlopener.open(req)
+
+       if offset:
+               req.add_header("Range", "bytes=%d-" % offset)
+
+       res = urlopener.open(req)
+
+       content_range = res.info().getheader("Content-Range")
+       if content_range:
+               tokens = content_range.split()
+               assert tokens[0] == "bytes"
+               start = int(tokens[1].split("-")[0])
+               assert start == offset
+       return res
 
 def parse_url(url):
        f = urlopen(url)
@@ -124,22 +141,26 @@ def get_player_config(doc):
                                        return player_config
 
 def get_best_video(player_config):
-       url_data = urlparse.parse_qs(player_config["args"]["url_encoded_fmt_stream_map"])
-       url_data = itertools.izip_longest(
-               url_data["url"],
-               url_data["type"],
-               url_data["quality"],
-               url_data.get("sig", []),
-       )
+       url_data_list = player_config["args"]["url_encoded_fmt_stream_map"].split(",")
+
        best_url = None
        best_quality = None
        best_extension = None
-       for video_url, mimetype, quality, signature in url_data:
-               mimetype = mimetype.split(";")[0]
+       for url_data in url_data_list:
+               url_data = urlparse.parse_qs(url_data)
+               video_url = url_data["url"][0]
+               mimetype = url_data["type"][0].split(";")[0]
+               quality = url_data["quality"][0]
+               signature = url_data["sig"][0]
+
+               if quality not in QUALITIES:
+                       continue
                if mimetype not in MIMETYPES:
                        continue
+
                extension = MIMETYPES[mimetype]
-               quality = QUALITIES.get(quality.split(",")[0], -1)
+               quality = QUALITIES.get(quality, -1)
+
                if best_quality is None or quality > best_quality:
                        if signature:
                                video_url = append_to_qs(video_url, {"signature": signature})
@@ -210,18 +231,94 @@ def cgimain():
                )
                return
 
+def pp_size(size):
+       suffixes = ["", "KiB", "MiB", "GiB"]
+       for i, suffix in enumerate(suffixes):
+               if size < 1024:
+                       break
+               size /= 1024
+       return "%.2f %s" % (size, suffix)
+
+def copy_with_progress(content_length, infile, outfile):
+       def print_status():
+               rate = 0
+               if now != last_ts:
+                       rate = last_bytes_read / (now - last_ts)
+               sys.stdout.write("\33[2K\r")
+               sys.stdout.write("%s / %s (%s/sec)" % (
+                       pp_size(bytes_read),
+                       pp_size(content_length),
+                       pp_size(rate),
+               ))
+               sys.stdout.flush()
+
+       last_ts = 0
+       last_bytes_read = 0
+       bytes_read = 0
+       while True:
+               now = time.time()
+               if now - last_ts > 0.5:
+                       print_status()
+                       last_ts = now
+                       last_bytes_read = 0
+
+               buf = infile.read(32768)
+               if not buf:
+                       break
+               outfile.write(buf)
+               last_bytes_read += len(buf)
+               bytes_read += len(buf)
+
+       # Newline at the end
+       print_status()
+       print
+
 def main():
        try:
                url = sys.argv[1]
        except:
                print >>sys.stderr, "Usage: %s http://youtube.com/watch?v=FOOBAR" % sys.argv[0]
                sys.exit(1)
+
        doc = parse_url(url)
        video_url, filename = get_video_url(doc)
-       data = urlopen(video_url)
-       outfile = open(filename, "w")
-       shutil.copyfileobj(data, outfile)
-       data.close()
+       print "Downloading", filename.encode("utf-8")
+
+       outfile = open(filename, "a")
+       offset = outfile.tell()
+       if offset > 0:
+               print "Resuming download from", pp_size(offset)
+       total_size = None
+
+       while True:
+               try:
+                       video_data = urlopen(video_url, offset)
+               except urllib2.HTTPError, e:
+                       if e.code == 416:
+                               print "File is complete!"
+                               break
+                       else:
+                               raise
+
+               content_length = int(video_data.info().getheader("Content-Length"))
+               if total_size is None:
+                       total_size = content_length
+
+               try:
+                       copy_with_progress(content_length, video_data, outfile)
+               except IOError, e:
+                       print
+
+               video_data.close()
+               if outfile.tell() != total_size:
+                       old_offset = offset
+                       offset = outfile.tell()
+                       if old_offset == offset:
+                               time.sleep(1)
+                       print "Restarting download from", pp_size(offset)
+               else:
+                       break
+
        outfile.close()
 
 
@@ -230,5 +327,9 @@ if __name__ == "__main__":
        if os.environ.has_key("SCRIPT_NAME"):
                cgimain()
        else:
-               main()
+               try:
+                       main()
+               except KeyboardInterrupt:
+                       print "\nExiting..."
+                       sys.exit(1)