]> code.delx.au - youtube-cgi/commitdiff
auto-retry with HTTP ranged gets for command line grabber
authorJames Bunton <jamesbunton@delx.net.au>
Fri, 12 Oct 2012 13:38:31 +0000 (00:38 +1100)
committerJames Bunton <jamesbunton@delx.net.au>
Fri, 12 Oct 2012 13:38:31 +0000 (00:38 +1100)
youtube.cgi

index 283e69233b5efc4860482bdaa9f39a4720a6fed4..5e612d4084dd1f22ccc5d884dffefdd0ef2ec7f5 100755 (executable)
@@ -79,14 +79,27 @@ cookiejar = cookielib.CookieJar()
 urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
 referrer = ""
 
-def urlopen(url):
+def urlopen(url, offset=None):
        global referrer
        req = urllib2.Request(url)
        if referrer:
                req.add_header("Referer", referrer)
        referrer = url
+
        req.add_header("User-Agent", USER_AGENT)
-       return urlopener.open(req)
+
+       if offset:
+               req.add_header("Range", "bytes=%d-" % offset)
+
+       res = urlopener.open(req)
+
+       content_range = res.info().getheader("Content-Range")
+       if content_range:
+               tokens = content_range.split()
+               assert tokens[0] == "bytes"
+               start = int(tokens[1].split("-")[0])
+               assert start == offset
+       return res
 
 def parse_url(url):
        f = urlopen(url)
@@ -215,20 +228,20 @@ def cgimain():
                )
                return
 
-def copy_with_progress(total_size, infile, outfile):
-       def pp_size(size):
-               suffixes = ["", "KiB", "MiB", "GiB"]
-               for i, suffix in enumerate(suffixes):
-                       if size < 1024:
-                               break
-                       size /= 1024
-               return "%.2f %s" % (size, suffix)
+def pp_size(size):
+       suffixes = ["", "KiB", "MiB", "GiB"]
+       for i, suffix in enumerate(suffixes):
+               if size < 1024:
+                       break
+               size /= 1024
+       return "%.2f %s" % (size, suffix)
 
+def copy_with_progress(content_length, infile, outfile):
        def print_status():
                sys.stdout.write("\33[2K\r")
                sys.stdout.write("%s / %s (%s/sec)" % (
                        pp_size(bytes_read),
-                       pp_size(total_size),
+                       pp_size(content_length),
                        pp_size(bytes_read / (now - start_ts)),
                ))
                sys.stdout.flush()
@@ -258,17 +271,45 @@ def main():
        except:
                print >>sys.stderr, "Usage: %s http://youtube.com/watch?v=FOOBAR" % sys.argv[0]
                sys.exit(1)
+
        doc = parse_url(url)
        video_url, filename = get_video_url(doc)
-       video_data = urlopen(video_url)
-       if os.path.isfile(filename):
-               print >>sys.stderr, "Error! File exists:", filename
-               sys.exit(1)
-       outfile = open(filename, "w")
-       total_size = int(video_data.info().getheader("Content-Length"))
        print "Downloading", filename.encode("utf-8")
-       copy_with_progress(total_size, video_data, outfile)
-       video_data.close()
+
+       outfile = open(filename, "a")
+       offset = outfile.tell()
+       if offset > 0:
+               print "Resuming download from", pp_size(offset)
+       total_size = None
+
+       while True:
+               try:
+                       video_data = urlopen(video_url, offset)
+               except urllib2.HTTPError, e:
+                       if e.code == 416:
+                               print "File is complete!"
+                               break
+                       else:
+                               raise
+
+               content_length = int(video_data.info().getheader("Content-Length"))
+               if total_size is None:
+                       total_size = content_length
+
+               try:
+                       copy_with_progress(content_length, video_data, outfile)
+               except IOError:
+                       print
+
+               video_data.close()
+               if outfile.tell() != total_size:
+                       old_offset = offset
+                       offset = outfile.tell()
+                       if old_offset == offset:
+                               time.sleep(1)
+                       print "Restarting download from", pp_size(offset)
+                       continue
+
        outfile.close()