From: James Bunton Date: Tue, 21 Feb 2017 21:50:07 +0000 (+1100) Subject: Switch to requests to remove custom caching code X-Git-Url: https://code.delx.au/webdl/commitdiff_plain/53da2736a1f1d2bd442932ec8e970fd327a785e0 Switch to requests to remove custom caching code --- diff --git a/brightcove.py b/brightcove.py index ad345f5..7331526 100644 --- a/brightcove.py +++ b/brightcove.py @@ -38,7 +38,7 @@ class BrightcoveVideoNode(Node): "video_id": self.video_id, }) - doc = grab_json(desc_url, 3600) + doc = grab_json(desc_url) video_url = doc and doc["HLSURL"] if not video_url: return @@ -54,7 +54,7 @@ class BrightcoveVideoNode(Node): "video_id": self.video_id, }) - doc = grab_json(desc_url, 3600) + doc = grab_json(desc_url) video_url = doc and doc["hdsManifestUrl"] if not video_url: return @@ -84,7 +84,7 @@ class BrightcoveRootNode(Node): url = self.get_all_videos_url(page_number) page_number += 1 - page = grab_json(url, 3600) + page = grab_json(url) items = page["items"] if len(items) == 0: break diff --git a/common.py b/common.py index 22cee10..43c0e86 100644 --- a/common.py +++ b/common.py @@ -1,17 +1,17 @@ import hashlib -import http.cookiejar import json import logging import lxml.etree import lxml.html import os import re +import requests +import requests_cache import shutil import signal import subprocess import time import urllib.parse -import urllib.request try: @@ -26,12 +26,12 @@ logging.basicConfig( level = logging.INFO if os.environ.get("DEBUG", None) is None else logging.DEBUG, ) -CACHE_DIR = os.path.join( +CACHE_FILE = os.path.join( os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")), - "webdl" + "webdl", + "requests_cache" ) - -USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:21.0) Gecko/20100101 Firefox/21.0" +requests_cache.install_cache(CACHE_FILE, backend='sqlite', expire_after=3600) class Node(object): @@ -83,83 +83,36 @@ def ensure_scheme(url): parts[0] = "http" return urllib.parse.urlunparse(parts) -cookiejar = http.cookiejar.CookieJar() -urlopener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookiejar)) -def _urlopen(url, referrer=None): - url = ensure_scheme(url) - req = urllib.request.Request(url) - req.add_header("User-Agent", USER_AGENT) - if referrer: - req.add_header("Referer", referrer) - return urlopener.open(req) - -def urlopen(url, max_age): - logging.debug("urlopen(%r, %r)", url, max_age) - - if not os.path.isdir(CACHE_DIR): - os.makedirs(CACHE_DIR) - - if max_age <= 0: - return _urlopen(url) - - filename = hashlib.md5(url.encode("utf-8")).hexdigest() - filename = os.path.join(CACHE_DIR, filename) - if os.path.exists(filename): - file_age = int(time.time()) - os.path.getmtime(filename) - if file_age < max_age: - logging.debug("loading from cache: %s", filename) - return open(filename, "rb") - - logging.debug("downloading: %s -> %s", url, filename) - src = _urlopen(url) - dst = open(filename, "wb") - try: - shutil.copyfileobj(src, dst) - except Exception as e: - try: - os.unlink(filename) - except OSError: - pass - raise e - src.close() - dst.close() - - return open(filename, "rb") - -def grab_text(url, max_age): - f = urlopen(url, max_age) - text = f.read().decode("utf-8") - f.close() - return text - -def grab_html(url, max_age): - f = urlopen(url, max_age) - doc = lxml.html.parse(f, lxml.html.HTMLParser(encoding="utf-8", recover=True)) - f.close() +http_session = requests.Session() +http_session.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:21.0) Gecko/20100101 Firefox/21.0" + +def grab_text(url): + logging.debug("grab_text(%r)", url) + request = http_session.prepare_request(requests.Request("GET", url)) + response = http_session.send(request) + return response.text + +def grab_html(url): + logging.debug("grab_html(%r)", url) + request = http_session.prepare_request(requests.Request("GET", url)) + response = http_session.send(request, stream=True) + doc = lxml.html.parse(response.raw, lxml.html.HTMLParser(encoding="utf-8", recover=True)) + response.close() return doc -def grab_xml(url, max_age): - f = urlopen(url, max_age) - doc = lxml.etree.parse(f, lxml.etree.XMLParser(encoding="utf-8", recover=True)) - f.close() +def grab_xml(url): + logging.debug("grab_xml(%r)", url) + request = http_session.prepare_request(requests.Request("GET", url)) + response = http_session.send(request, stream=True) + doc = lxml.etree.parse(response.raw, lxml.etree.XMLParser(encoding="utf-8", recover=True)) + response.close() return doc -def grab_json(url, max_age, skip_assignment=False, skip_function=False): - f = urlopen(url, max_age) - text = f.read().decode("utf-8") - - if skip_assignment: - pos = text.find("=") - text = text[pos+1:] - - elif skip_function: - pos = text.find("(") - rpos = text.rfind(")") - text = text[pos+1:rpos] - - doc = json.loads(text) - f.close() - return doc +def grab_json(url): + logging.debug("grab_json(%r)", url) + request = http_session.prepare_request(requests.Request("GET", url)) + response = http_session.send(request) + return response.json() def exec_subprocess(cmd): logging.debug("Executing: %s", cmd) diff --git a/iview.py b/iview.py index b23af38..cd203c2 100644 --- a/iview.py +++ b/iview.py @@ -1,4 +1,5 @@ from common import grab_json, grab_xml, Node, download_hls +import requests_cache import urllib.parse API_URL = "http://iview.abc.net.au/api" @@ -24,7 +25,8 @@ class IviewEpisodeNode(Node): raise Exception("Missing hls-high program stream for " + self.video_key) def get_auth_details(self): - auth_doc = grab_xml(AUTH_URL, 0) + with requests_cache.disabled(): + auth_doc = grab_xml(AUTH_URL) NS = { "auth": "http://www.abc.net.au/iView/Services/iViewHandshaker", } @@ -40,7 +42,7 @@ class IviewEpisodeNode(Node): return video_url def download(self): - info = grab_json(API_URL + "/programs/" + self.video_key, 3600) + info = grab_json(API_URL + "/programs/" + self.video_key) video_url = self.find_hls_url(info["playlist"]) token, token_hostname= self.get_auth_details() video_url = self.add_auth_token_to_url(video_url, token, token_hostname) @@ -67,7 +69,7 @@ class IviewIndexNode(Node): IviewEpisodeNode(episode_title, series_node, video_key) def fill_children(self): - info = grab_json(self.url, 3600) + info = grab_json(self.url) for index_list in info["index"]: for ep_info in index_list["episodes"]: self.add_episode(ep_info) @@ -86,7 +88,7 @@ class IviewFlatNode(Node): IviewEpisodeNode(episode_title, self, video_key) def fill_children(self): - info = grab_json(self.url, 3600) + info = grab_json(self.url) for ep_info in info: self.add_episode(ep_info) diff --git a/requirements.txt b/requirements.txt index ae9d294..6bd4f7b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ livestreamer pycrypto lxml +requests +requests-cache diff --git a/sbs.py b/sbs.py index 568ef60..2c77cc6 100644 --- a/sbs.py +++ b/sbs.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +import requests_cache from common import grab_html, grab_json, grab_xml, download_hls, Node, append_to_qs import json @@ -20,11 +21,13 @@ class SbsVideoNode(Node): self.can_download = True def download(self): - doc = grab_html(VIDEO_URL % self.video_id, 0) + with requests_cache.disabled(): + doc = grab_html(VIDEO_URL % self.video_id) player_params = self.get_player_params(doc) release_url = player_params["releaseUrls"]["html"] - doc = grab_xml(release_url, 0) + with requests_cache.disabled(): + doc = grab_xml(release_url if not release_url.startswith("//") else "https:" + release_url) video = doc.xpath("//smil:video", namespaces=NS)[0] video_url = video.attrib["src"] if not video_url: @@ -71,7 +74,7 @@ class SbsRootNode(SbsNavNode): amount = 500 while True: url = append_to_qs(FULL_VIDEO_LIST, {"range": "%s-%s" % (offset, offset+amount)}) - data = grab_json(url, 3600) + data = grab_json(url) entries = data["entries"] if len(entries) == 0: break