]> code.delx.au - youtube-cgi/blob - youtube.cgi
show decimal places in copy progress
[youtube-cgi] / youtube.cgi
1 #!/usr/bin/env python
2
3 from __future__ import division
4
5 import cookielib
6 import cgi
7 import itertools
8 import json
9 from lxml import html
10 import os
11 import re
12 import resource
13 import shutil
14 import subprocess
15 import sys
16 import time
17 import urllib
18 import urllib2
19 import urlparse
20
21
22 MAX_MEMORY_BYTES = 128 * 1024*1024
23 USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"
24
25 MIMETYPES = {
26 "video/mp4": "mp4",
27 "video/x-flv": "flv",
28 "video/3gpp": "3gp",
29 }
30
31 QUALITIES = {
32 "hd1080": 5,
33 "hd720": 4,
34 "large": 3,
35 "medium": 2,
36 "small": 1,
37 }
38
39
40 class VideoUnavailable(Exception):
41 pass
42
43 def print_form(url="", msg=""):
44 script_url = "http://%s%s" % (os.environ["HTTP_HOST"], os.environ["REQUEST_URI"])
45 sys.stdout.write("Content-Type: application/xhtml+xml\r\n\r\n")
46 sys.stdout.write("""
47 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
48 <html xmlns="http://www.w3.org/1999/xhtml">
49 <head>
50 <title>delx.net.au - YouTube Scraper</title>
51 <link rel="stylesheet" type="text/css" href="/style.css"/>
52 <style type="text/css">
53 input[type="text"] {
54 width: 100%;
55 }
56 .error {
57 color: red;
58 }
59 </style>
60 </head>
61 <body>
62 <h1>delx.net.au - YouTube Scraper</h1>
63 {0}
64 <form action="" method="get">
65 <p>This page will let you easily download YouTube videos to watch offline. It
66 will automatically grab the highest quality version.</p>
67 <div><input type="text" name="url" value="{1}"/></div>
68 <div><input type="submit" value="Download!"/></div>
69 </form>
70 <p>Tip! Use this bookmarklet: <a href="javascript:(function(){window.location='{2}?url='+escape(location);})()">YouTube Download</a>
71 to easily download videos. Right-click the link and add it to bookmarks,
72 then when you're looking at a YouTube page select that bookmark from your
73 browser's bookmarks menu to download the video straight away.</p>
74 </body>
75 </html>
76 """.replace("{0}", msg).replace("{1}", url).replace("{2}", script_url))
77
78 cookiejar = cookielib.CookieJar()
79 urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
80 referrer = ""
81
82 def urlopen(url):
83 global referrer
84 req = urllib2.Request(url)
85 if referrer:
86 req.add_header("Referer", referrer)
87 referrer = url
88 req.add_header("User-Agent", USER_AGENT)
89 return urlopener.open(req)
90
91 def parse_url(url):
92 f = urlopen(url)
93 doc = html.parse(f, html.HTMLParser(encoding="utf-8", recover=True))
94 f.close()
95 return doc
96
97 def append_to_qs(url, params):
98 r = list(urlparse.urlsplit(url))
99 qs = urlparse.parse_qs(r[3])
100 qs.update(params)
101 r[3] = urllib.urlencode(qs, True)
102 url = urlparse.urlunsplit(r)
103 return url
104
105 def convert_from_old_itag(player_config):
106 url_data = urlparse.parse_qs(player_config["args"]["url_encoded_fmt_stream_map"])
107 url_data["url"] = []
108 for itag_url in url_data["itag"]:
109 pos = itag_url.find("url=")
110 url_data["url"].append(itag_url[pos+4:])
111 player_config["args"]["url_encoded_fmt_stream_map"] = urllib.urlencode(url_data, True)
112
113 def get_player_config(doc):
114 player_config = None
115 for script in doc.xpath("//script"):
116 if not script.text:
117 continue
118 for line in script.text.split("\n"):
119 if "yt.playerConfig =" in line:
120 p1 = line.find("=")
121 p2 = line.rfind(";")
122 if p1 >= 0 and p2 > 0:
123 return json.loads(line[p1+1:p2])
124 if "'PLAYER_CONFIG': " in line:
125 p1 = line.find(":")
126 if p1 >= 0:
127 player_config = json.loads(line[p1+1:])
128 convert_from_old_itag(player_config)
129 return player_config
130
131 def get_best_video(player_config):
132 url_data = urlparse.parse_qs(player_config["args"]["url_encoded_fmt_stream_map"])
133 url_data = itertools.izip_longest(
134 url_data["url"],
135 url_data["type"],
136 url_data["quality"],
137 url_data.get("sig", []),
138 )
139 best_url = None
140 best_quality = None
141 best_extension = None
142 for video_url, mimetype, quality, signature in url_data:
143 mimetype = mimetype.split(";")[0]
144 if mimetype not in MIMETYPES:
145 continue
146 extension = MIMETYPES[mimetype]
147 quality = QUALITIES.get(quality.split(",")[0], -1)
148 if best_quality is None or quality > best_quality:
149 if signature:
150 video_url = append_to_qs(video_url, {"signature": signature})
151 best_url = video_url
152 best_quality = quality
153 best_extension = extension
154
155 return best_url, best_extension
156
157 def sanitize_filename(filename):
158 return (
159 re.sub("\s+", " ", filename.strip())
160 .replace("\\", "-")
161 .replace("/", "-")
162 .replace("\0", " ")
163 )
164
165 def get_video_url(doc):
166 unavailable = doc.xpath("//div[@id='unavailable-message']/text()")
167 if unavailable:
168 raise VideoUnavailable(unavailable[0].strip())
169
170 player_config = get_player_config(doc)
171 if not player_config:
172 raise VideoUnavailable("Could not find video URL")
173
174 video_url, extension = get_best_video(player_config)
175 if not video_url:
176 return None, None
177
178 title = doc.xpath("/html/head/title/text()")[0]
179 filename = sanitize_filename(title)
180 filename += "." + extension
181
182 return video_url, filename
183
184 def write_video(filename, video_data):
185 httpinfo = video_data.info()
186 encoded_filename = urllib.quote(filename.encode("utf-8"))
187 sys.stdout.write("Content-Disposition: attachment; filename*=UTF-8''%s\r\n" % encoded_filename)
188 sys.stdout.write("Content-Length: %s\r\n" % httpinfo.getheader("Content-Length"))
189 sys.stdout.write("\r\n")
190 shutil.copyfileobj(video_data, sys.stdout)
191 video_data.close()
192
193 def cgimain():
194 args = cgi.parse()
195 try:
196 url = args["url"][0]
197 except:
198 print_form(url="http://www.youtube.com/watch?v=FOOBAR")
199 return
200
201 try:
202 doc = parse_url(url)
203 video_url, filename = get_video_url(doc)
204 video_data = urlopen(video_url)
205 write_video(filename, video_data)
206 except VideoUnavailable, e:
207 print_form(
208 url=url,
209 msg="<p class='error'>Sorry, there was an error: %s</p>" % cgi.escape(e.message)
210 )
211 except Exception, e:
212 print_form(
213 url=url,
214 msg="<p class='error'>Sorry, there was an error. Check your URL?</p>"
215 )
216 return
217
218 def copy_with_progress(total_size, infile, outfile):
219 def pp_size(size):
220 suffixes = ["", "KiB", "MiB", "GiB"]
221 for i, suffix in enumerate(suffixes):
222 if size < 1024:
223 break
224 size /= 1024
225 return "%.2f %s" % (size, suffix)
226
227 start_ts = time.time()
228 last_ts = 0
229 bytes_read = 0
230 while True:
231 now = time.time()
232 if now - last_ts > 0.5:
233 last_ts = now
234 sys.stdout.write("\33[2K\r")
235 sys.stdout.write("%s / %s (%s/sec)" % (
236 pp_size(bytes_read),
237 pp_size(total_size),
238 pp_size(bytes_read / (now - start_ts)),
239 ))
240 sys.stdout.flush()
241
242 buf = infile.read(32768)
243 if not buf:
244 break
245 outfile.write(buf)
246 bytes_read += len(buf)
247
248 def main():
249 try:
250 url = sys.argv[1]
251 except:
252 print >>sys.stderr, "Usage: %s http://youtube.com/watch?v=FOOBAR" % sys.argv[0]
253 sys.exit(1)
254 doc = parse_url(url)
255 video_url, filename = get_video_url(doc)
256 video_data = urlopen(video_url)
257 outfile = open(filename, "w")
258 total_size = int(video_data.info().getheader("Content-Length"))
259 print "Downloading", filename.encode("utf-8")
260 copy_with_progress(total_size, video_data, outfile)
261 video_data.close()
262 outfile.close()
263
264
265 if __name__ == "__main__":
266 resource.setrlimit(resource.RLIMIT_AS, (MAX_MEMORY_BYTES, MAX_MEMORY_BYTES))
267 if os.environ.has_key("SCRIPT_NAME"):
268 cgimain()
269 else:
270 try:
271 main()
272 except KeyboardInterrupt:
273 print "\nExiting..."
274 sys.exit(1)
275