import urllib.request
-MAX_MEMORY_BYTES = 128 * 1024*1024
-USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"
+USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:67.0) Gecko/20100101 Firefox/67.0"
MIMETYPES = {
"video/mp4": "mp4",
class VideoUnavailable(Exception):
pass
+class NotYouTube(Exception):
+ pass
+
def print_form(url="", msg=""):
- script_url = "http://%s%s" % (os.environ["HTTP_HOST"], os.environ["REQUEST_URI"])
+ script_url = "https://%s%s" % (os.environ["HTTP_HOST"], os.environ["REQUEST_URI"])
sys.stdout.write("Content-Type: text/html\r\n\r\n")
sys.stdout.write("""
<!DOCTYPE html>
assert start == offset
return res
+def validate_url(url):
+ parsed_url = urllib.parse.urlparse(url)
+ scheme_ok = parsed_url.scheme == "https"
+ host_ok = parsed_url.netloc.lstrip("www.") in ["youtube.com", "youtu.be"]
+
+ if scheme_ok and host_ok:
+ return
+ else:
+ raise NotYouTube()
+
def parse_url(url, parser):
f = urlopen(url)
parser.feed(f.read().decode("utf-8"))
def find_func_name(script):
FUNC_NAME = R"([a-zA-Z0-9$]+)"
- FUNC_PARAMS = R"(\([a-zA-Z]+\.s\))"
+ FUNC_PARAMS = R"(\([a-zA-Z,\.]+\.s\))"
TERMINATOR = R"[,;\)]"
PATTERN = FUNC_NAME + FUNC_PARAMS + TERMINATOR
stdout=subprocess.PIPE
)
js_decode_script = ("""
- var vm = require('vm');
+ const vm = require('vm');
- var sandbox = {
+ const sandbox = {
location: {
hash: '',
href: '',
navigator: {
userAgent: ''
},
+ XMLHttpRequest: class XMLHttpRequest {},
+ matchMedia: () => ({matches: () => {}, media: ''}),
signature: %(signature)s,
transformed_signature: null,
g: function(){} // this is _yt_player
};
sandbox.window = sandbox;
- var code_string = %(code)s + ';';
- var exec_string = 'transformed_signature = %(func_name)s(signature);';
+ const code_string = %(code)s + ';';
+ const exec_string = 'transformed_signature = %(func_name)s("", "MARKER", signature);';
vm.runInNewContext(code_string + exec_string, sandbox);
- console.log(sandbox.transformed_signature);
+ function findSignature(obj) {
+ if (typeof obj !== 'object') {
+ return;
+ }
+ for (const [key, value] of Object.entries(obj)) {
+ if (key === 'MARKER') {
+ return value;
+ }
+ const result = findSignature(value);
+ if (result) {
+ return result;
+ }
+ }
+ }
+ console.log(findSignature(sandbox.transformed_signature));
""" % params)
p.stdin.write(js_decode_script.encode("utf-8"))
p.stdin.close()
transformed_signature = p.stdout.read().decode("utf-8").strip()
+ transformed_signature = urllib.parse.unquote(transformed_signature)
if p.wait() != 0:
raise Exception("js failed to execute: %d" % p.returncode)
signature = None
if signature:
- video_url = append_to_qs(video_url, {"signature": signature})
+ sp = url_data.get("sp", ["signature"])[0]
+ video_url = append_to_qs(video_url, {sp: signature})
best_url = video_url
best_quality = quality
if not video_url:
return None, None
- filename = sanitize_filename(page.title)
- filename += "." + extension
+ title = player_config["args"].get("title", None)
+ if not title:
+ title = json.loads(player_config["args"]["player_response"])["videoDetails"]["title"]
+ if not title:
+ title = "Unknown title"
+
+ filename = sanitize_filename(title) + "." + extension
return video_url, filename
class YouTubeVideoPageParser(html.parser.HTMLParser):
def __init__(self):
super().__init__()
- self.title = None
self.unavailable_message = None
self.scripts = []
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
- self._handle_title(tag, attrs)
self._handle_unavailable_message(tag, attrs)
self._handle_script(tag, attrs)
def _ignore_data(self, _):
pass
- def _handle_title(self, tag, attrs):
- if tag == "title":
- self.handle_data = self._handle_title_data
-
- def _handle_title_data(self, data):
- self.title = data.strip()
-
def _handle_unavailable_message(self, tag, attrs):
if attrs.get("id", None) == "unavailable-message":
self.handle_data = self._handle_unavailable_message_data
try:
url = args["url"][0]
except:
- print_form(url="http://www.youtube.com/watch?v=FOOBAR")
+ print_form(url="https://www.youtube.com/watch?v=FOOBAR")
return
try:
page = YouTubeVideoPageParser()
+ validate_url(url)
parse_url(url, page)
video_url, filename = get_video_url(page)
video_data = urlopen(video_url)
url=url,
msg="<p class='error'>Sorry, there was an error: %s</p>" % cgi.escape(e.args[0])
)
+ except NotYouTube:
+ print_form(
+ url=url,
+ msg="<p class='error'>Sorry, that does not look like a YouTube page!</p>"
+ )
except Exception as e:
print_form(
url=url,
- msg="<p class='error'>Sorry, there was an error. Check your URL?</p>"
+ msg="<p class='error'>Sorry, there was an unknown error.</p>"
)
return
try:
url = sys.argv[1]
except:
- print("Usage: %s http://youtube.com/watch?v=FOOBAR" % sys.argv[0], file=sys.stderr)
+ print("Usage: %s https://youtube.com/watch?v=FOOBAR" % sys.argv[0], file=sys.stderr)
sys.exit(1)
page = YouTubeVideoPageParser()