]>
code.delx.au - youtube-cgi/blob - youtube.cgi
18 USER_AGENT
= "Mozilla/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0"
35 class VideoUnavailable(Exception):
38 class NotYouTube(Exception):
41 def print_form(url
="", msg
=""):
42 script_url
= "https://%s%s" % (os
.environ
["HTTP_HOST"], os
.environ
["REQUEST_URI"])
43 sys
.stdout
.write("Content-Type: text/html\r\n\r\n")
48 <title>delx.net.au - YouTube Scraper</title>
49 <link rel="stylesheet" type="text/css" href="/style.css">
50 <style type="text/css">
60 <h1>delx.net.au - YouTube Scraper</h1>
62 <form action="" method="get">
63 <p>This page will let you easily download YouTube videos to watch offline. It
64 will automatically grab the highest quality version.</p>
65 <div><input type="text" name="url" value="{1}"/></div>
66 <div><input type="submit" value="Download!"/></div>
68 <p>Tip! Use this bookmarklet: <a href="javascript:(function(){window.location='{2}?url='+escape(location);})()">YouTube Download</a>
69 to easily download videos. Right-click the link and add it to bookmarks,
70 then when you're looking at a YouTube page select that bookmark from your
71 browser's bookmarks menu to download the video straight away.</p>
74 """.replace("{0}", msg
).replace("{1}", url
).replace("{2}", script_url
))
76 cookiejar
= http
.cookiejar
.CookieJar()
77 urlopener
= urllib
.request
.build_opener(urllib
.request
.HTTPCookieProcessor(cookiejar
))
80 def urlopen(url
, offset
=None):
81 if url
.startswith("//"):
83 if not url
.startswith("http://") and not url
.startswith("https://"):
84 url
= "https://www.youtube.com" + url
87 req
= urllib
.request
.Request(url
)
91 req
.add_header("Referer", referrer
)
93 req
.add_header("User-Agent", USER_AGENT
)
96 req
.add_header("Range", "bytes=%d-" % offset
)
98 res
= urlopener
.open(req
)
100 content_range
= res
.getheader("Content-Range")
102 tokens
= content_range
.split()
103 assert tokens
[0] == "bytes"
104 start
= int(tokens
[1].split("-")[0])
105 assert start
== offset
108 def validate_url(url
):
109 parsed_url
= urllib
.parse
.urlparse(url
)
110 scheme_ok
= parsed_url
.scheme
== "https"
111 host_ok
= parsed_url
.netloc
.lstrip("www.") in ["youtube.com", "youtu.be"]
113 if scheme_ok
and host_ok
:
118 def parse_url(url
, parser
):
120 parser
.feed(f
.read().decode("utf-8"))
124 def append_to_qs(url
, params
):
125 r
= list(urllib
.parse
.urlsplit(url
))
126 qs
= urllib
.parse
.parse_qs(r
[3])
128 r
[3] = urllib
.parse
.urlencode(qs
, True)
129 url
= urllib
.parse
.urlunsplit(r
)
132 def get_player_config(scripts
):
134 for script
in scripts
:
135 for line
in script
.split("\n"):
136 s
= "ytplayer.config = {"
138 p1
= line
.find(s
) + len(s
) - 1
139 p2
= line
.find("};", p1
) + 1
140 if p1
>= 0 and p2
> 0:
141 return json
.loads(line
[p1
:p2
])
143 def extract_js(script
):
144 PREFIX
= "var _yt_player={};(function(g){var window=this;"
145 SUFFIX
= ";})(_yt_player);\n"
146 assert script
.startswith(PREFIX
)
147 assert script
.endswith(SUFFIX
)
149 return script
[len(PREFIX
):-len(SUFFIX
)]
151 def find_cipher_func(script
):
152 FUNC_NAME
= R
"([a-zA-Z0-9$]+)"
153 DECODE_URI_COMPONENT
= R
"(\(decodeURIComponent)?"
154 FUNC_PARAMS
= R
"(\([a-zA-Z,\.]+\.s\))"
155 TERMINATOR
= R
"[,;\)]"
156 PATTERN
= FUNC_NAME
+ DECODE_URI_COMPONENT
+ FUNC_PARAMS
+ TERMINATOR
158 match
= re
.search(PATTERN
, script
)
159 func_name
= match
.groups()[0]
162 def find_url_func(script
):
163 FUNC_NAME
= R
"([a-zA-Z0-9$]+)"
164 PATTERN
= R
"this\.url\s*=\s*" + FUNC_NAME
+ R
"\s*\(\s*this\s*\)"
166 match
= re
.search(PATTERN
, script
)
167 func_name
= match
.groups()[0]
170 def decode_cipher_url(js_url
, cipher
):
171 cipher
= urllib
.parse
.parse_qs(cipher
)
179 script
= f
.read().decode("utf-8")
182 cipher_func_name
= find_cipher_func(script
)
183 url_func_name
= find_url_func(script
)
186 "cipher_func_name": cipher_func_name
,
187 "url_func_name": url_func_name
,
188 "args": json
.dumps(args
),
189 "code": json
.dumps(extract_js(script
)),
191 p
= subprocess
.Popen(
195 stdin
=subprocess
.PIPE
,
196 stdout
=subprocess
.PIPE
198 js_decode_script
= ("""
199 const vm = require('vm');
201 const fakeGlobal = {};
202 fakeGlobal.window = fakeGlobal;
203 fakeGlobal.location = {
205 host: 'www.youtube.com',
206 hostname: 'www.youtube.com',
207 href: 'https://www.youtube.com',
208 origin: 'https://www.youtube.com',
212 fakeGlobal.history = {
213 pushState: function(){}
215 fakeGlobal.document = {
216 location: fakeGlobal.location
218 fakeGlobal.document = {};
219 fakeGlobal.navigator = {
222 fakeGlobal.XMLHttpRequest = class XMLHttpRequest {};
223 fakeGlobal.matchMedia = () => ({matches: () => {}, media: ''});
224 fakeGlobal.result_url = null;
225 fakeGlobal.g = function(){}; // this is _yt_player
227 const code_string = %(code)s + ';';
228 const exec_string = 'result_url = %(url_func_name)s(%(cipher_func_name)s(...%(args)s));';
229 vm.runInNewContext(code_string + exec_string, fakeGlobal);
231 console.log(fakeGlobal.result_url);
234 p
.stdin
.write(js_decode_script
.encode("utf-8"))
237 result_url
= p
.stdout
.read().decode("utf-8").strip()
239 raise Exception("js failed to execute: %d" % p
.returncode
)
243 def get_best_video(player_config
):
244 js_url
= player_config
["assets"]["js"]
246 player_args
= player_config
["args"]
247 player_response
= json
.loads(player_args
["player_response"])
248 formats
= player_response
["streamingData"]["formats"]
252 best_extension
= None
253 for format_data
in formats
:
254 mimetype
= format_data
["mimeType"].split(";")[0]
255 quality
= format_data
["quality"]
257 if quality
not in QUALITIES
:
259 if mimetype
not in MIMETYPES
:
262 extension
= MIMETYPES
[mimetype
]
263 quality
= QUALITIES
.get(quality
, -1)
265 if best_quality
is not None and quality
< best_quality
:
268 if "signatureCipher" in format_data
:
269 video_url
= decode_cipher_url(js_url
, format_data
["signatureCipher"])
271 video_url
= format_data
["url"]
274 best_quality
= quality
275 best_extension
= extension
277 return best_url
, best_extension
279 def sanitize_filename(filename
):
281 re
.sub("\s+", " ", filename
.strip())
287 def get_video_url(page
):
288 player_config
= get_player_config(page
.scripts
)
289 if not player_config
:
290 raise VideoUnavailable(page
.unavailable_message
or "Could not find video URL")
292 video_url
, extension
= get_best_video(player_config
)
296 title
= player_config
["args"].get("title", None)
298 title
= json
.loads(player_config
["args"]["player_response"])["videoDetails"]["title"]
300 title
= "Unknown title"
302 filename
= sanitize_filename(title
) + "." + extension
304 return video_url
, filename
306 class YouTubeVideoPageParser(html
.parser
.HTMLParser
):
309 self
.unavailable_message
= None
312 def handle_starttag(self
, tag
, attrs
):
314 self
._handle
_unavailable
_message
(tag
, attrs
)
315 self
._handle
_script
(tag
, attrs
)
317 def handle_endtag(self
, tag
):
318 self
.handle_data
= self
._ignore
_data
320 def _ignore_data(self
, _
):
323 def _handle_unavailable_message(self
, tag
, attrs
):
324 if attrs
.get("id", None) == "unavailable-message":
325 self
.handle_data
= self
._handle
_unavailable
_message
_data
327 def _handle_unavailable_message_data(self
, data
):
328 self
.unavailable_message
= data
.strip()
330 def _handle_script(self
, tag
, attrs
):
332 self
.handle_data
= self
._handle
_script
_data
334 def _handle_script_data(self
, data
):
336 self
.scripts
.append(data
)
338 def write_video(filename
, video_data
):
339 quoted_filename
= urllib
.parse
.quote(filename
.encode("utf-8"))
340 sys
.stdout
.buffer.write(
341 b
"Content-Disposition: attachment; filename*=UTF-8''{0}\r\n"
342 .replace(b
"{0}", quoted_filename
.encode("utf-8"))
344 sys
.stdout
.buffer.write(
345 b
"Content-Length: {0}\r\n"
346 .replace(b
"{0}", video_data
.getheader("Content-Length").encode("utf-8"))
348 sys
.stdout
.buffer.write(b
"\r\n")
349 shutil
.copyfileobj(video_data
, sys
.stdout
.buffer)
357 print_form(url
="https://www.youtube.com/watch?v=FOOBAR")
361 page
= YouTubeVideoPageParser()
364 video_url
, filename
= get_video_url(page
)
365 video_data
= urlopen(video_url
)
366 except VideoUnavailable
as e
:
369 msg
="<p class='error'>Sorry, there was an error: %s</p>" % cgi
.escape(e
.args
[0])
374 msg
="<p class='error'>Sorry, that does not look like a YouTube page!</p>"
376 except Exception as e
:
379 msg
="<p class='error'>Sorry, there was an unknown error.</p>"
383 write_video(filename
, video_data
)
386 suffixes
= ["", "KiB", "MiB", "GiB"]
387 for i
, suffix
in enumerate(suffixes
):
391 return "%.2f %s" % (size
, suffix
)
393 def copy_with_progress(content_length
, infile
, outfile
):
397 rate
= last_bytes_read
/ (now
- last_ts
)
398 sys
.stdout
.write("\33[2K\r")
399 sys
.stdout
.write("%s / %s (%s/sec)" % (
401 pp_size(content_length
),
411 if now
- last_ts
> 0.5:
416 buf
= infile
.read(32768)
420 last_bytes_read
+= len(buf
)
421 bytes_read
+= len(buf
)
431 print("Usage: %s https://youtube.com/watch?v=FOOBAR" % sys
.argv
[0], file=sys
.stderr
)
434 page
= YouTubeVideoPageParser()
436 video_url
, filename
= get_video_url(page
)
437 print("Downloading", filename
)
439 outfile
= open(filename
, "ab")
440 offset
= outfile
.tell()
442 print("Resuming download from", pp_size(offset
))
447 video_data
= urlopen(video_url
, offset
)
448 except urllib
.error
.HTTPError
as e
:
450 print("File is complete!")
455 content_length
= int(video_data
.getheader("Content-Length"))
456 if total_size
is None:
457 total_size
= content_length
460 copy_with_progress(content_length
, video_data
, outfile
)
465 if outfile
.tell() != total_size
:
467 offset
= outfile
.tell()
468 if old_offset
== offset
:
470 print("Restarting download from", pp_size(offset
))
477 if __name__
== "__main__":
478 if "SCRIPT_NAME" in os
.environ
:
483 except KeyboardInterrupt:
484 print("\nExiting...")