From b5b02738d7fc2ffc88aa20cdffc8bbcea0217115 Mon Sep 17 00:00:00 2001 From: James Bunton Date: Sat, 5 Dec 2020 23:59:28 +1100 Subject: [PATCH] Google seems to care about user agents now... --- youtube.cgi | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/youtube.cgi b/youtube.cgi index 0aadef3..212ceae 100755 --- a/youtube.cgi +++ b/youtube.cgi @@ -15,7 +15,8 @@ import urllib.parse import urllib.request -USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:82.0) Gecko/20100101 Firefox/82.0" +MOZILLA_RELEASE_URL = "https://www.mozilla.org/en-US/firefox/releases/" +USER_AGENT_TEMPLATE = "Mozilla/5.0 (X11; Linux x86_64; rv:83.0) Gecko/20100101 Firefox/%s" MIMETYPES = { "video/mp4": "mp4", @@ -76,8 +77,17 @@ def print_form(url="", msg=""): cookiejar = http.cookiejar.CookieJar() urlopener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookiejar)) referrer = "" +user_agent = None def urlopen(url, offset=None): + global user_agent + if not user_agent: + page = MozillaReleasesPageParser() + with urllib.request.urlopen(MOZILLA_RELEASE_URL) as f: + page.feed(f.read().decode("utf-8")) + page.close() + user_agent = USER_AGENT_TEMPLATE % page.latest_release + if url.startswith("//"): url = "https:" + url if not url.startswith("http://") and not url.startswith("https://"): @@ -90,7 +100,7 @@ def urlopen(url, offset=None): else: req.add_header("Referer", referrer) - req.add_header("User-Agent", USER_AGENT) + req.add_header("User-Agent", user_agent) if offset: req.add_header("Range", "bytes=%d-" % offset) @@ -116,7 +126,7 @@ def validate_url(url): else: raise NotYouTube() -def parse_url(url, parser): +def load_parse_url(url, parser): f = urlopen(url) parser.feed(f.read().decode("utf-8")) parser.close() @@ -229,6 +239,7 @@ def decode_cipher_url(js_url, cipher): fakeGlobal.matchMedia = () => ({matches: () => {}, media: ''}); fakeGlobal.result_url = null; fakeGlobal.g = function(){}; // this is _yt_player + fakeGlobal.TimeRanges = function(){}; const code_string = %(code)s + ';'; const exec_string = 'result_url = %(url_func_name)s(%(cipher_func_name)s(...%(args)s));'; @@ -340,6 +351,16 @@ class YouTubeVideoPageParser(html.parser.HTMLParser): if data: self.scripts.append(data) +class MozillaReleasesPageParser(html.parser.HTMLParser): + def __init__(self): + super().__init__() + self.latest_release = "1.0" + + def handle_starttag(self, tag, attrs): + attrs = dict(attrs) + if attrs.get("data-latest-firefox", None): + self.latest_release = attrs.get("data-latest-firefox", None) + def write_video(filename, video_data): quoted_filename = urllib.parse.quote(filename.encode("utf-8")) sys.stdout.buffer.write( @@ -365,7 +386,9 @@ def cgimain(): try: page = YouTubeVideoPageParser() validate_url(url) - parse_url(url, page) + with urlopen(url) as f: + page.feed(f.read().decode("utf-8")) + page.close() video_url, filename = get_video_url(page) video_data = urlopen(video_url) except VideoUnavailable as e: @@ -437,7 +460,9 @@ def main(): sys.exit(1) page = YouTubeVideoPageParser() - parse_url(url, page) + with urlopen(url) as f: + page.feed(f.read().decode("utf-8")) + page.close() video_url, filename = get_video_url(page) print("Downloading", filename) -- 2.39.2