]>
code.delx.au - youtube-cgi/blob - youtube.cgi
3 from __future__
import division
22 MAX_MEMORY_BYTES
= 128 * 1024*1024
23 USER_AGENT
= "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"
40 class VideoUnavailable(Exception):
43 def print_form(url
="", msg
=""):
44 script_url
= "http://%s%s" % (os
.environ
["HTTP_HOST"], os
.environ
["REQUEST_URI"])
45 sys
.stdout
.write("Content-Type: application/xhtml+xml\r\n\r\n")
47 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
48 <html xmlns="http://www.w3.org/1999/xhtml">
50 <title>delx.net.au - YouTube Scraper</title>
51 <link rel="stylesheet" type="text/css" href="/style.css"/>
52 <style type="text/css">
62 <h1>delx.net.au - YouTube Scraper</h1>
64 <form action="" method="get">
65 <p>This page will let you easily download YouTube videos to watch offline. It
66 will automatically grab the highest quality version.</p>
67 <div><input type="text" name="url" value="{1}"/></div>
68 <div><input type="submit" value="Download!"/></div>
70 <p>Tip! Use this bookmarklet: <a href="javascript:(function(){window.location='{2}?url='+escape(location);})()">YouTube Download</a>
71 to easily download videos. Right-click the link and add it to bookmarks,
72 then when you're looking at a YouTube page select that bookmark from your
73 browser's bookmarks menu to download the video straight away.</p>
76 """.replace("{0}", msg
).replace("{1}", url
).replace("{2}", script_url
))
78 cookiejar
= cookielib
.CookieJar()
79 urlopener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(cookiejar
))
82 def urlopen(url
, offset
=None):
84 req
= urllib2
.Request(url
)
86 req
.add_header("Referer", referrer
)
89 req
.add_header("User-Agent", USER_AGENT
)
92 req
.add_header("Range", "bytes=%d-" % offset
)
94 res
= urlopener
.open(req
)
96 content_range
= res
.info().getheader("Content-Range")
98 tokens
= content_range
.split()
99 assert tokens
[0] == "bytes"
100 start
= int(tokens
[1].split("-")[0])
101 assert start
== offset
106 doc
= html
.parse(f
, html
.HTMLParser(encoding
="utf-8", recover
=True))
110 def append_to_qs(url
, params
):
111 r
= list(urlparse
.urlsplit(url
))
112 qs
= urlparse
.parse_qs(r
[3])
114 r
[3] = urllib
.urlencode(qs
, True)
115 url
= urlparse
.urlunsplit(r
)
118 def convert_from_old_itag(player_config
):
119 url_data
= urlparse
.parse_qs(player_config
["args"]["url_encoded_fmt_stream_map"])
121 for itag_url
in url_data
["itag"]:
122 pos
= itag_url
.find("url=")
123 url_data
["url"].append(itag_url
[pos
+4:])
124 player_config
["args"]["url_encoded_fmt_stream_map"] = urllib
.urlencode(url_data
, True)
126 def get_player_config(doc
):
128 for script
in doc
.xpath("//script"):
131 for line
in script
.text
.split("\n"):
132 if "yt.playerConfig =" in line
:
135 if p1
>= 0 and p2
> 0:
136 return json
.loads(line
[p1
+1:p2
])
137 if "'PLAYER_CONFIG': " in line
:
140 player_config
= json
.loads(line
[p1
+1:])
141 convert_from_old_itag(player_config
)
144 def get_best_video(player_config
):
145 url_data
= urlparse
.parse_qs(player_config
["args"]["url_encoded_fmt_stream_map"])
146 url_data
= itertools
.izip_longest(
150 url_data
.get("sig", []),
154 best_extension
= None
155 for video_url
, mimetype
, quality
, signature
in url_data
:
156 mimetype
= mimetype
.split(";")[0]
157 if mimetype
not in MIMETYPES
:
159 extension
= MIMETYPES
[mimetype
]
160 quality
= QUALITIES
.get(quality
.split(",")[0], -1)
161 if best_quality
is None or quality
> best_quality
:
163 video_url
= append_to_qs(video_url
, {"signature": signature
})
165 best_quality
= quality
166 best_extension
= extension
168 return best_url
, best_extension
170 def sanitize_filename(filename
):
172 re
.sub("\s+", " ", filename
.strip())
178 def get_video_url(doc
):
179 unavailable
= doc
.xpath("//div[@id='unavailable-message']/text()")
181 raise VideoUnavailable(unavailable
[0].strip())
183 player_config
= get_player_config(doc
)
184 if not player_config
:
185 raise VideoUnavailable("Could not find video URL")
187 video_url
, extension
= get_best_video(player_config
)
191 title
= doc
.xpath("/html/head/title/text()")[0]
192 filename
= sanitize_filename(title
)
193 filename
+= "." + extension
195 return video_url
, filename
197 def write_video(filename
, video_data
):
198 httpinfo
= video_data
.info()
199 encoded_filename
= urllib
.quote(filename
.encode("utf-8"))
200 sys
.stdout
.write("Content-Disposition: attachment; filename*=UTF-8''%s\r\n" % encoded_filename
)
201 sys
.stdout
.write("Content-Length: %s\r\n" % httpinfo
.getheader("Content-Length"))
202 sys
.stdout
.write("\r\n")
203 shutil
.copyfileobj(video_data
, sys
.stdout
)
211 print_form(url
="http://www.youtube.com/watch?v=FOOBAR")
216 video_url
, filename
= get_video_url(doc
)
217 video_data
= urlopen(video_url
)
218 write_video(filename
, video_data
)
219 except VideoUnavailable
, e
:
222 msg
="<p class='error'>Sorry, there was an error: %s</p>" % cgi
.escape(e
.message
)
227 msg
="<p class='error'>Sorry, there was an error. Check your URL?</p>"
232 suffixes
= ["", "KiB", "MiB", "GiB"]
233 for i
, suffix
in enumerate(suffixes
):
237 return "%.2f %s" % (size
, suffix
)
239 def copy_with_progress(content_length
, infile
, outfile
):
241 sys
.stdout
.write("\33[2K\r")
242 sys
.stdout
.write("%s / %s (%s/sec)" % (
244 pp_size(content_length
),
245 pp_size(bytes_read
/ (now
- start_ts
)),
249 start_ts
= time
.time()
254 if now
- last_ts
> 0.5:
258 buf
= infile
.read(32768)
262 bytes_read
+= len(buf
)
272 print >>sys
.stderr
, "Usage: %s http://youtube.com/watch?v=FOOBAR" % sys
.argv
[0]
276 video_url
, filename
= get_video_url(doc
)
277 print "Downloading", filename
.encode("utf-8")
279 outfile
= open(filename
, "a")
280 offset
= outfile
.tell()
282 print "Resuming download from", pp_size(offset
)
287 video_data
= urlopen(video_url
, offset
)
288 except urllib2
.HTTPError
, e
:
290 print "File is complete!"
295 content_length
= int(video_data
.info().getheader("Content-Length"))
296 if total_size
is None:
297 total_size
= content_length
300 copy_with_progress(content_length
, video_data
, outfile
)
305 if outfile
.tell() != total_size
:
307 offset
= outfile
.tell()
308 if old_offset
== offset
:
310 print "Restarting download from", pp_size(offset
)
317 if __name__
== "__main__":
318 resource
.setrlimit(resource
.RLIMIT_AS
, (MAX_MEMORY_BYTES
, MAX_MEMORY_BYTES
))
319 if os
.environ
.has_key("SCRIPT_NAME"):
324 except KeyboardInterrupt: