]> code.delx.au - webdl/commitdiff
Improved caching
authorJames Bunton <jamesbunton@delx.net.au>
Mon, 19 Dec 2011 09:28:38 +0000 (20:28 +1100)
committerJames Bunton <jamesbunton@delx.net.au>
Mon, 19 Dec 2011 09:28:38 +0000 (20:28 +1100)
autograbber.py
common.py
grabber.py
iview.py
sbs.py

index 1c6ef0f0b44c42783c259f99d9c071bd3dadfbd2..9433d4ad2b1e61e442822a2f69dfd2eed9870e5d 100755 (executable)
@@ -36,11 +36,7 @@ def match(download_list, node, pattern):
 
 
 def main():
-       print "Loading episode data...",
-       sys.stdout.flush()
        node = load_root_node()
-       print "done"
-
        download_list = DownloadList("downloaded_auto.txt")
 
        for search in sys.argv[1:]:
index f286333dbccbbfe8807b601e6456e96904ffde84..6b6489657b1432dde32d2166e2603a96e97d3cb5 100644 (file)
--- a/common.py
+++ b/common.py
@@ -2,7 +2,10 @@
 
 from lxml import etree
 import json
-import md5
+try:
+       import hashlib
+except ImportError:
+       import md5 as hashlib
 import os
 import shutil
 import signal
@@ -34,27 +37,35 @@ class Node(object):
 def load_root_node():
        root_node = Node("Root")
 
+       print "Loading iView episode data...",
+       sys.stdout.flush()
        import iview
        iview_node = Node("ABC iView", root_node)
        iview.fill_nodes(iview_node)
+       print "done"
 
+       print "Loading SBS episode data...",
+       sys.stdout.flush()
        import sbs
        sbs_node = Node("SBS", root_node)
        sbs.fill_nodes(sbs_node)
+       print "done"
 
        return root_node
 
 
-def urlopen(url):
-       try:
-               os.mkdir(CACHE_DIR)
-       except OSError:
-               pass
+def urlopen(url, max_age):
+       if not os.path.isdir(CACHE_DIR):
+               os.makedirs(CACHE_DIR)
+
+       if max_age <= 0:
+               return urllib.urlopen(url)
 
-       filename = md5.new(url).hexdigest()
+       filename = hashlib.md5(url).hexdigest()
        filename = os.path.join(CACHE_DIR, filename)
        if os.path.exists(filename):
-               if int(time.time()) - os.path.getmtime(filename) < 24*3600:
+               file_age = int(time.time()) - os.path.getmtime(filename)
+               if file_age < max_age:
                        return open(filename)
 
        src = urllib.urlopen(url)
@@ -65,14 +76,14 @@ def urlopen(url):
 
        return open(filename)
 
-def grab_xml(url):
-       f = urlopen(url)
+def grab_xml(url, max_age):
+       f = urlopen(url, max_age)
        doc = etree.parse(f)
        f.close()
        return doc
 
-def grab_json(url):
-       f = urlopen(url)
+def grab_json(url, max_age):
+       f = urlopen(url, max_age)
        doc = json.load(f)
        f.close()
        return doc
index b4b8310ae4827298020e96de013dd3a11d3fe7e1..4216220a3af89e20f3372fb87a5c5995c83c57fd 100755 (executable)
@@ -27,10 +27,8 @@ def choose(options, allow_multi):
                        pass
 
 def main():
-       print "Loading episode data...",
-       sys.stdout.flush()
        node = load_root_node()
-       print "done"
+
        while True:
                options = {}
                will_download = True
index 93dbc12ae9884874abfc879c19dff365727aa4ea..4319543d6edfa6834242f0451e689a986ebc63c9 100644 (file)
--- a/iview.py
+++ b/iview.py
@@ -2,6 +2,7 @@
 # vim:ts=4:sts=4:sw=4:noet
 
 from common import grab_xml, grab_json, download_rtmp, Node
+from datetime import datetime
 
 BASE_URL = "http://www.abc.net.au/iview/"
 CONFIG_URL = BASE_URL + "xml/config.xml"
@@ -17,7 +18,7 @@ class IviewNode(Node):
                self.can_download = True
        
        def download(self):
-               auth_doc = grab_xml(PARAMS["auth"])
+               auth_doc = grab_xml(PARAMS["auth"], 0)
                vbase = auth_doc.xpath("//auth:server/text()", namespaces=NS)[0]
                token = auth_doc.xpath("//auth:token/text()", namespaces=NS)[0]
                vbase += "?auth=" + token
@@ -28,11 +29,11 @@ class IviewNode(Node):
        
 
 def fill_nodes(root_node):
-       config_doc = grab_xml(CONFIG_URL)
+       config_doc = grab_xml(CONFIG_URL, 24*3600)
        global PARAMS
        PARAMS = dict((p.attrib["name"], p.attrib["value"]) for p in config_doc.xpath("/config/param"))
 
-       categories_doc = grab_xml(BASE_URL + PARAMS["categories"])
+       categories_doc = grab_xml(BASE_URL + PARAMS["categories"], 24*3600)
        categories_map = {}
        for category in categories_doc.xpath("//category[@genre='true']"):
                cid = category.attrib["id"]
@@ -41,17 +42,32 @@ def fill_nodes(root_node):
                categories_map[cid] = category_node
 
        # Create a duplicate of each series within each category that it appears
-       series_list_doc = grab_json(PARAMS["api"] + "seriesIndex")
+       series_list_doc = grab_json(PARAMS["api"] + "seriesIndex", 3600)
+       now = datetime.now()
        for series in series_list_doc:
                categories = series["e"].split()
                sid = series["a"]
+               max_age = None
+               for episode in series["f"]:
+                       air_date = datetime.strptime(episode["f"], "%Y-%m-%d %H:%M:%S")
+                       diff = now - air_date
+                       diff = 24*3600*diff.days + diff.seconds
+                       if max_age is None or diff < max_age:
+                               max_age = diff
+
+               if max_age is None:
+                       continue
+
                series_title = series["b"].replace("&amp;", "&")
                series_nodes = []
                for cid in categories:
                        category_node = categories_map.get(cid, None)
                        if category_node:
                                series_nodes.append(Node(series_title, category_node))
-               series_doc = grab_json(PARAMS["api"] + "series=" + sid)[0]
+               if not series_nodes:
+                       continue
+
+               series_doc = grab_json(PARAMS["api"] + "series=" + sid, max_age)[0]
                for episode in series_doc["f"]:
                        vpath = episode["n"]
                        episode_title = episode["b"].strip()
diff --git a/sbs.py b/sbs.py
index 40da84e032a62224d2863304782fcb6cf44e5fcc..ff20efc82df0593cfb846ef72abc45583cc8a917 100644 (file)
--- a/sbs.py
+++ b/sbs.py
@@ -6,8 +6,8 @@ from common import grab_xml as _grab_xml, download_rtmp, download_urllib, Node
 
 BASE_URL = "http://player.sbs.com.au"
 
-def grab_xml(path):
-       return _grab_xml(BASE_URL + path)
+def grab_xml(path, max_age):
+       return _grab_xml(BASE_URL + path, max_age)
 
 class SbsNode(Node):
        def __init__(self, title, parent, video_desc_url):
@@ -16,7 +16,7 @@ class SbsNode(Node):
                self.can_download = True
 
        def download(self):
-               video = grab_xml(self.video_desc_url)
+               video = grab_xml(self.video_desc_url, 0)
                vbase = video.xpath("//meta/@base")[0]
                bestrate = 0
                bestvpath = None
@@ -33,10 +33,10 @@ class SbsNode(Node):
 
 
 def fill_nodes(root_node):
-       settings = grab_xml("/playerassets/programs/config/standalone_settings.xml")
+       settings = grab_xml("/playerassets/programs/config/standalone_settings.xml", 24*3600)
        menu_url = settings.xpath("/settings/setting[@name='menuURL']/@value")[0]
 
-       root_menu = grab_xml(menu_url)
+       root_menu = grab_xml(menu_url, 3600)
        seen_category_titles = set()
        for menu in root_menu.xpath("//menu"):
                try:
@@ -52,7 +52,7 @@ def fill_nodes(root_node):
                                        i += 1
                        seen_category_titles.add(category_title)
                        category_node = Node(category_title, root_node)
-                       playlist = grab_xml(playlist_url)
+                       playlist = grab_xml(playlist_url, 3600)
                        for video_desc in playlist.xpath("//video"):
                                video_desc_url = video_desc.xpath("@src")[0]
                                video_title = video_desc.xpath("title/text()")[0].strip()