]>
code.delx.au - youtube-cgi/blob - youtube.cgi
3 from __future__
import division
21 MAX_MEMORY_BYTES
= 128 * 1024*1024
22 USER_AGENT
= "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"
39 class VideoUnavailable(Exception):
42 def print_form(url
="", msg
=""):
43 script_url
= "http://%s%s" % (os
.environ
["HTTP_HOST"], os
.environ
["REQUEST_URI"])
44 sys
.stdout
.write("Content-Type: application/xhtml+xml\r\n\r\n")
46 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
47 <html xmlns="http://www.w3.org/1999/xhtml">
49 <title>delx.net.au - YouTube Scraper</title>
50 <link rel="stylesheet" type="text/css" href="/style.css"/>
51 <style type="text/css">
61 <h1>delx.net.au - YouTube Scraper</h1>
63 <form action="" method="get">
64 <p>This page will let you easily download YouTube videos to watch offline. It
65 will automatically grab the highest quality version.</p>
66 <div><input type="text" name="url" value="{1}"/></div>
67 <div><input type="submit" value="Download!"/></div>
69 <p>Tip! Use this bookmarklet: <a href="javascript:(function(){window.location='{2}?url='+escape(location);})()">YouTube Download</a>
70 to easily download videos. Right-click the link and add it to bookmarks,
71 then when you're looking at a YouTube page select that bookmark from your
72 browser's bookmarks menu to download the video straight away.</p>
75 """.replace("{0}", msg
).replace("{1}", url
).replace("{2}", script_url
))
77 cookiejar
= cookielib
.CookieJar()
78 urlopener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(cookiejar
))
81 def urlopen(url
, offset
=None):
82 if url
.startswith("//"):
84 if not url
.startswith("http://") and not url
.startswith("https://"):
85 url
= "https://www.youtube.com" + url
88 req
= urllib2
.Request(url
)
92 req
.add_header("Referer", referrer
)
94 req
.add_header("User-Agent", USER_AGENT
)
97 req
.add_header("Range", "bytes=%d-" % offset
)
99 res
= urlopener
.open(req
)
101 content_range
= res
.info().getheader("Content-Range")
103 tokens
= content_range
.split()
104 assert tokens
[0] == "bytes"
105 start
= int(tokens
[1].split("-")[0])
106 assert start
== offset
111 doc
= html
.parse(f
, html
.HTMLParser(encoding
="utf-8", recover
=True))
115 def append_to_qs(url
, params
):
116 r
= list(urlparse
.urlsplit(url
))
117 qs
= urlparse
.parse_qs(r
[3])
119 r
[3] = urllib
.urlencode(qs
, True)
120 url
= urlparse
.urlunsplit(r
)
123 def get_player_config(doc
):
125 for script
in doc
.xpath("//script"):
128 for line
in script
.text
.split("\n"):
129 s
= "ytplayer.config = {"
131 p1
= line
.find(s
) + len(s
) - 1
132 p2
= line
.find("};", p1
) + 1
133 if p1
>= 0 and p2
> 0:
134 return json
.loads(line
[p1
:p2
])
136 def extract_js(script
):
137 PREFIX
= "var _yt_player={};(function(g){var window=this;"
138 SUFFIX
= ";})(_yt_player);\n"
139 assert script
.startswith(PREFIX
)
140 assert script
.endswith(SUFFIX
)
142 return script
[len(PREFIX
):-len(SUFFIX
)]
144 def find_func_name(script
):
145 FUNC_NAME
= R
"([a-zA-Z0-9$]+)"
146 FUNC_PARAMS
= R
"(\([a-zA-Z]+\.s\))"
147 TERMINATOR
= R
"[,;\)]"
148 PATTERN
= FUNC_NAME
+ FUNC_PARAMS
+ TERMINATOR
150 match
= re
.search(PATTERN
, script
)
151 func_name
= match
.groups()[0]
154 def decode_signature(js_url
, signature
):
155 script
= urlopen(js_url
).read()
156 func_name
= find_func_name(script
)
159 "func_name": func_name
,
160 "signature": json
.dumps(signature
),
161 "code": json
.dumps(extract_js(script
)),
163 p
= subprocess
.Popen(
167 stdin
=subprocess
.PIPE
,
168 stdout
=subprocess
.PIPE
170 js_decode_script
= ("""
171 var vm = require('vm');
180 pushState: function(){}
186 signature: %(signature)s,
187 transformed_signature: null,
188 g: function(){} // this is _yt_player
190 sandbox.window = sandbox;
192 var code_string = %(code)s + ';';
193 var exec_string = 'transformed_signature = %(func_name)s(signature);';
194 vm.runInNewContext(code_string + exec_string, sandbox);
196 console.log(sandbox.transformed_signature);
199 p
.stdin
.write(js_decode_script
)
202 transformed_signature
= p
.stdout
.read().strip()
204 raise Exception("js failed to execute: %d" % p
.returncode
)
206 return transformed_signature
208 def get_best_video(player_config
):
209 url_data_list
= player_config
["args"]["url_encoded_fmt_stream_map"].split(",")
210 js_url
= player_config
["assets"]["js"]
214 best_extension
= None
215 for url_data
in url_data_list
:
216 url_data
= urlparse
.parse_qs(url_data
)
217 mimetype
= url_data
["type"][0].split(";")[0]
218 quality
= url_data
["quality"][0]
220 if url_data
.has_key("stereo3d"):
222 if quality
not in QUALITIES
:
224 if mimetype
not in MIMETYPES
:
227 extension
= MIMETYPES
[mimetype
]
228 quality
= QUALITIES
.get(quality
, -1)
230 if best_quality
is not None and quality
< best_quality
:
233 video_url
= url_data
["url"][0]
234 if "sig" in url_data
:
235 signature
= url_data
["sig"][0]
236 elif "s" in url_data
:
237 signature
= decode_signature(js_url
, url_data
["s"][0])
242 video_url
= append_to_qs(video_url
, {"signature": signature
})
245 best_quality
= quality
246 best_extension
= extension
248 return best_url
, best_extension
250 def sanitize_filename(filename
):
252 re
.sub("\s+", " ", filename
.strip())
258 def get_video_url(doc
):
259 unavailable
= doc
.xpath("//div[@id='unavailable-message']/text()")
261 raise VideoUnavailable(unavailable
[0].strip())
263 player_config
= get_player_config(doc
)
264 if not player_config
:
265 raise VideoUnavailable("Could not find video URL")
267 video_url
, extension
= get_best_video(player_config
)
271 title
= doc
.xpath("/html/head/title/text()")[0]
272 filename
= sanitize_filename(title
)
273 filename
+= "." + extension
275 return video_url
, filename
277 def write_video(filename
, video_data
):
278 httpinfo
= video_data
.info()
279 encoded_filename
= urllib
.quote(filename
.encode("utf-8"))
280 sys
.stdout
.write("Content-Disposition: attachment; filename*=UTF-8''%s\r\n" % encoded_filename
)
281 sys
.stdout
.write("Content-Length: %s\r\n" % httpinfo
.getheader("Content-Length"))
282 sys
.stdout
.write("\r\n")
283 shutil
.copyfileobj(video_data
, sys
.stdout
)
291 print_form(url
="http://www.youtube.com/watch?v=FOOBAR")
296 video_url
, filename
= get_video_url(doc
)
297 video_data
= urlopen(video_url
)
298 write_video(filename
, video_data
)
299 except VideoUnavailable
, e
:
302 msg
="<p class='error'>Sorry, there was an error: %s</p>" % cgi
.escape(e
.message
)
307 msg
="<p class='error'>Sorry, there was an error. Check your URL?</p>"
312 suffixes
= ["", "KiB", "MiB", "GiB"]
313 for i
, suffix
in enumerate(suffixes
):
317 return "%.2f %s" % (size
, suffix
)
319 def copy_with_progress(content_length
, infile
, outfile
):
323 rate
= last_bytes_read
/ (now
- last_ts
)
324 sys
.stdout
.write("\33[2K\r")
325 sys
.stdout
.write("%s / %s (%s/sec)" % (
327 pp_size(content_length
),
337 if now
- last_ts
> 0.5:
342 buf
= infile
.read(32768)
346 last_bytes_read
+= len(buf
)
347 bytes_read
+= len(buf
)
357 print >>sys
.stderr
, "Usage: %s http://youtube.com/watch?v=FOOBAR" % sys
.argv
[0]
361 video_url
, filename
= get_video_url(doc
)
362 print "Downloading", filename
.encode("utf-8")
364 outfile
= open(filename
, "a")
365 offset
= outfile
.tell()
367 print "Resuming download from", pp_size(offset
)
372 video_data
= urlopen(video_url
, offset
)
373 except urllib2
.HTTPError
, e
:
375 print "File is complete!"
380 content_length
= int(video_data
.info().getheader("Content-Length"))
381 if total_size
is None:
382 total_size
= content_length
385 copy_with_progress(content_length
, video_data
, outfile
)
390 if outfile
.tell() != total_size
:
392 offset
= outfile
.tell()
393 if old_offset
== offset
:
395 print "Restarting download from", pp_size(offset
)
402 if __name__
== "__main__":
403 ### resource.setrlimit(resource.RLIMIT_AS, (MAX_MEMORY_BYTES, MAX_MEMORY_BYTES))
404 if os
.environ
.has_key("SCRIPT_NAME"):
409 except KeyboardInterrupt: