]> code.delx.au - youtube-cgi/blob - youtube.cgi
more convenient startup of node.js
[youtube-cgi] / youtube.cgi
1 #!/usr/bin/env python
2
3 from __future__ import division
4
5 import cookielib
6 import cgi
7 import json
8 from lxml import html
9 import os
10 import re
11 import resource
12 import shutil
13 import subprocess
14 import sys
15 import time
16 import urllib
17 import urllib2
18 import urlparse
19
20
21 MAX_MEMORY_BYTES = 128 * 1024*1024
22 USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"
23
24 MIMETYPES = {
25 "video/mp4": "mp4",
26 "video/x-flv": "flv",
27 "video/3gpp": "3gp",
28 }
29
30 QUALITIES = {
31 "hd1080": 5,
32 "hd720": 4,
33 "large": 3,
34 "medium": 2,
35 "small": 1,
36 }
37
38
39 class VideoUnavailable(Exception):
40 pass
41
42 def print_form(url="", msg=""):
43 script_url = "http://%s%s" % (os.environ["HTTP_HOST"], os.environ["REQUEST_URI"])
44 sys.stdout.write("Content-Type: application/xhtml+xml\r\n\r\n")
45 sys.stdout.write("""
46 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
47 <html xmlns="http://www.w3.org/1999/xhtml">
48 <head>
49 <title>delx.net.au - YouTube Scraper</title>
50 <link rel="stylesheet" type="text/css" href="/style.css"/>
51 <style type="text/css">
52 input[type="text"] {
53 width: 100%;
54 }
55 .error {
56 color: red;
57 }
58 </style>
59 </head>
60 <body>
61 <h1>delx.net.au - YouTube Scraper</h1>
62 {0}
63 <form action="" method="get">
64 <p>This page will let you easily download YouTube videos to watch offline. It
65 will automatically grab the highest quality version.</p>
66 <div><input type="text" name="url" value="{1}"/></div>
67 <div><input type="submit" value="Download!"/></div>
68 </form>
69 <p>Tip! Use this bookmarklet: <a href="javascript:(function(){window.location='{2}?url='+escape(location);})()">YouTube Download</a>
70 to easily download videos. Right-click the link and add it to bookmarks,
71 then when you're looking at a YouTube page select that bookmark from your
72 browser's bookmarks menu to download the video straight away.</p>
73 </body>
74 </html>
75 """.replace("{0}", msg).replace("{1}", url).replace("{2}", script_url))
76
77 cookiejar = cookielib.CookieJar()
78 urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
79 referrer = ""
80
81 def urlopen(url, offset=None):
82 global referrer
83 req = urllib2.Request(url)
84 if referrer:
85 req.add_header("Referer", referrer)
86 referrer = url
87
88 req.add_header("User-Agent", USER_AGENT)
89
90 if offset:
91 req.add_header("Range", "bytes=%d-" % offset)
92
93 res = urlopener.open(req)
94
95 content_range = res.info().getheader("Content-Range")
96 if content_range:
97 tokens = content_range.split()
98 assert tokens[0] == "bytes"
99 start = int(tokens[1].split("-")[0])
100 assert start == offset
101 return res
102
103 def parse_url(url):
104 f = urlopen(url)
105 doc = html.parse(f, html.HTMLParser(encoding="utf-8", recover=True))
106 f.close()
107 return doc
108
109 def append_to_qs(url, params):
110 r = list(urlparse.urlsplit(url))
111 qs = urlparse.parse_qs(r[3])
112 qs.update(params)
113 r[3] = urllib.urlencode(qs, True)
114 url = urlparse.urlunsplit(r)
115 return url
116
117 def convert_from_old_itag(player_config):
118 url_data = urlparse.parse_qs(player_config["args"]["url_encoded_fmt_stream_map"])
119 url_data["url"] = []
120 for itag_url in url_data["itag"]:
121 pos = itag_url.find("url=")
122 url_data["url"].append(itag_url[pos+4:])
123 player_config["args"]["url_encoded_fmt_stream_map"] = urllib.urlencode(url_data, True)
124
125 def get_player_config(doc):
126 player_config = None
127 for script in doc.xpath("//script"):
128 if not script.text:
129 continue
130 for line in script.text.split("\n"):
131 if "yt.playerConfig =" in line:
132 p1 = line.find("=")
133 p2 = line.rfind(";")
134 if p1 >= 0 and p2 > 0:
135 return json.loads(line[p1+1:p2])
136 if "ytplayer.config =" in line:
137 p1 = line.find("ytplayer.config =")
138 p2 = line.rfind(";")
139 if p1 >= 0 and p2 > 0:
140 return json.loads(line[p1+18:p2])
141 if "'PLAYER_CONFIG': " in line:
142 p1 = line.find(":")
143 if p1 >= 0:
144 player_config = json.loads(line[p1+1:])
145 convert_from_old_itag(player_config)
146 return player_config
147
148 def decode_signature(js_url, s):
149 script = urlopen(js_url).read()
150 func_name = re.search(R"\b([a-z]+)\([a-z]+\.s\);", script).groups()[0]
151 p1 = script.find("function " + func_name)
152 p2 = script.find("}", p1)
153 func_block = script[p1:p2+1]
154
155 p = subprocess.Popen(
156 "js",
157 shell=True,
158 close_fds=True,
159 stdin=subprocess.PIPE,
160 stdout=subprocess.PIPE
161 )
162 p.stdin.write(func_block + "\n")
163 p.stdin.write("console.log(%s('%s'));\n" % (func_name, s))
164 p.stdin.close()
165
166 signature = p.stdout.read().strip()
167 if p.wait() != 0:
168 raise Exception("js failed to execute: %d" % p.returncode)
169
170 return signature
171
172 def get_best_video(player_config):
173 url_data_list = player_config["args"]["url_encoded_fmt_stream_map"].split(",")
174 js_url = player_config["assets"]["js"]
175
176 best_url = None
177 best_quality = None
178 best_extension = None
179 for url_data in url_data_list:
180 url_data = urlparse.parse_qs(url_data)
181 mimetype = url_data["type"][0].split(";")[0]
182 quality = url_data["quality"][0]
183
184 if quality not in QUALITIES:
185 continue
186 if mimetype not in MIMETYPES:
187 continue
188
189 extension = MIMETYPES[mimetype]
190 quality = QUALITIES.get(quality, -1)
191
192 if best_quality is not None and quality < best_quality:
193 continue
194
195 video_url = url_data["url"][0]
196 if "sig" in url_data:
197 signature = url_data["sig"][0]
198 else:
199 signature = decode_signature(js_url, url_data["s"][0])
200 video_url = append_to_qs(video_url, {"signature": signature})
201
202 best_url = video_url
203 best_quality = quality
204 best_extension = extension
205
206 return best_url, best_extension
207
208 def sanitize_filename(filename):
209 return (
210 re.sub("\s+", " ", filename.strip())
211 .replace("\\", "-")
212 .replace("/", "-")
213 .replace("\0", " ")
214 )
215
216 def get_video_url(doc):
217 unavailable = doc.xpath("//div[@id='unavailable-message']/text()")
218 if unavailable:
219 raise VideoUnavailable(unavailable[0].strip())
220
221 player_config = get_player_config(doc)
222 if not player_config:
223 raise VideoUnavailable("Could not find video URL")
224
225 video_url, extension = get_best_video(player_config)
226 if not video_url:
227 return None, None
228
229 title = doc.xpath("/html/head/title/text()")[0]
230 filename = sanitize_filename(title)
231 filename += "." + extension
232
233 return video_url, filename
234
235 def write_video(filename, video_data):
236 httpinfo = video_data.info()
237 encoded_filename = urllib.quote(filename.encode("utf-8"))
238 sys.stdout.write("Content-Disposition: attachment; filename*=UTF-8''%s\r\n" % encoded_filename)
239 sys.stdout.write("Content-Length: %s\r\n" % httpinfo.getheader("Content-Length"))
240 sys.stdout.write("\r\n")
241 shutil.copyfileobj(video_data, sys.stdout)
242 video_data.close()
243
244 def cgimain():
245 args = cgi.parse()
246 try:
247 url = args["url"][0]
248 except:
249 print_form(url="http://www.youtube.com/watch?v=FOOBAR")
250 return
251
252 try:
253 doc = parse_url(url)
254 video_url, filename = get_video_url(doc)
255 video_data = urlopen(video_url)
256 write_video(filename, video_data)
257 except VideoUnavailable, e:
258 print_form(
259 url=url,
260 msg="<p class='error'>Sorry, there was an error: %s</p>" % cgi.escape(e.message)
261 )
262 except Exception, e:
263 print_form(
264 url=url,
265 msg="<p class='error'>Sorry, there was an error. Check your URL?</p>"
266 )
267 return
268
269 def pp_size(size):
270 suffixes = ["", "KiB", "MiB", "GiB"]
271 for i, suffix in enumerate(suffixes):
272 if size < 1024:
273 break
274 size /= 1024
275 return "%.2f %s" % (size, suffix)
276
277 def copy_with_progress(content_length, infile, outfile):
278 def print_status():
279 rate = 0
280 if now != last_ts:
281 rate = last_bytes_read / (now - last_ts)
282 sys.stdout.write("\33[2K\r")
283 sys.stdout.write("%s / %s (%s/sec)" % (
284 pp_size(bytes_read),
285 pp_size(content_length),
286 pp_size(rate),
287 ))
288 sys.stdout.flush()
289
290 last_ts = 0
291 last_bytes_read = 0
292 bytes_read = 0
293 while True:
294 now = time.time()
295 if now - last_ts > 0.5:
296 print_status()
297 last_ts = now
298 last_bytes_read = 0
299
300 buf = infile.read(32768)
301 if not buf:
302 break
303 outfile.write(buf)
304 last_bytes_read += len(buf)
305 bytes_read += len(buf)
306
307 # Newline at the end
308 print_status()
309 print
310
311 def main():
312 try:
313 url = sys.argv[1]
314 except:
315 print >>sys.stderr, "Usage: %s http://youtube.com/watch?v=FOOBAR" % sys.argv[0]
316 sys.exit(1)
317
318 doc = parse_url(url)
319 video_url, filename = get_video_url(doc)
320 print "Downloading", filename.encode("utf-8")
321
322 outfile = open(filename, "a")
323 offset = outfile.tell()
324 if offset > 0:
325 print "Resuming download from", pp_size(offset)
326 total_size = None
327
328 while True:
329 try:
330 video_data = urlopen(video_url, offset)
331 except urllib2.HTTPError, e:
332 if e.code == 416:
333 print "File is complete!"
334 break
335 else:
336 raise
337
338 content_length = int(video_data.info().getheader("Content-Length"))
339 if total_size is None:
340 total_size = content_length
341
342 try:
343 copy_with_progress(content_length, video_data, outfile)
344 except IOError, e:
345 print
346
347 video_data.close()
348 if outfile.tell() != total_size:
349 old_offset = offset
350 offset = outfile.tell()
351 if old_offset == offset:
352 time.sleep(1)
353 print "Restarting download from", pp_size(offset)
354 else:
355 break
356
357 outfile.close()
358
359
360 if __name__ == "__main__":
361 ### resource.setrlimit(resource.RLIMIT_AS, (MAX_MEMORY_BYTES, MAX_MEMORY_BYTES))
362 if os.environ.has_key("SCRIPT_NAME"):
363 cgimain()
364 else:
365 try:
366 main()
367 except KeyboardInterrupt:
368 print "\nExiting..."
369 sys.exit(1)
370