From 6de079c2ee77a4dcf990155be6634fa91e35286c Mon Sep 17 00:00:00 2001 From: James Bunton Date: Fri, 13 Apr 2012 10:46:35 +1000 Subject: [PATCH] Lazy loading plus support for SBS HTTP videos --- autograbber.py | 2 +- common.py | 37 +++++++++++++++++++++-------------- grabber.py | 2 +- iview.py | 53 +++++++++++++++++++++++++------------------------- sbs.py | 46 +++++++++++++++++++++++++++++-------------- 5 files changed, 81 insertions(+), 59 deletions(-) diff --git a/autograbber.py b/autograbber.py index 663b95a..4ee9555 100755 --- a/autograbber.py +++ b/autograbber.py @@ -39,7 +39,7 @@ def match(download_list, node, pattern, count=0): print "No match found for pattern:", "/".join(pattern) return p = pattern[count] - for child in node.children: + for child in node.get_children(): if fnmatch.fnmatch(child.title, p): match(download_list, child, pattern, count+1) diff --git a/common.py b/common.py index 4741444..26f8346 100644 --- a/common.py +++ b/common.py @@ -30,6 +30,9 @@ class Node(object): self.children = [] self.can_download = False + def get_children(self): + return self.children + def download(self): raise NotImplemented @@ -37,19 +40,11 @@ class Node(object): def load_root_node(): root_node = Node("Root") - print "Loading iView episode data...", - sys.stdout.flush() import iview - iview_node = Node("ABC iView", root_node) - iview.fill_nodes(iview_node) - print "done" + iview.fill_nodes(root_node) - print "Loading SBS episode data...", - sys.stdout.flush() import sbs - sbs_node = Node("SBS", root_node) - sbs.fill_nodes(sbs_node) - print "done" + sbs.fill_nodes(root_node) return root_node @@ -61,6 +56,7 @@ def sanify_filename(filename): def urlopen(url, max_age): +### print url if not os.path.isdir(CACHE_DIR): os.makedirs(CACHE_DIR) @@ -112,7 +108,6 @@ def download_rtmp(filename, vbase, vpath, hash_url=None): "-r", vbase, "-y", vpath, ] - print cmd if hash_url is not None: cmd += ["--swfVfy", hash_url] try: @@ -137,16 +132,28 @@ def download_rtmp(filename, vbase, vpath, hash_url=None): def download_urllib(filename, url): filename = sanify_filename(filename) - print "Downloading: %s -> %s" % (url, filename) + print "Downloading: %s" % filename try: src = urllib.urlopen(url) dst = open(filename, "w") - shutil.copyfileobj(src, dst) + while True: + buf = src.read(1024*1024) + if not buf: + break + dst.write(buf) + sys.stdout.write(".") + sys.stdout.flush() return True except KeyboardInterrupt: print "\nCancelled", url finally: - src.close() - dst.close() + try: + src.close() + except: + pass + try: + dst.close() + except: + pass return False diff --git a/grabber.py b/grabber.py index 2687c1d..9fa546f 100755 --- a/grabber.py +++ b/grabber.py @@ -32,7 +32,7 @@ def main(): while True: options = {} will_download = True - for n in node.children: + for n in node.get_children(): options[n.title] = n if not n.can_download: will_download = False diff --git a/iview.py b/iview.py index b48e5e2..f861160 100644 --- a/iview.py +++ b/iview.py @@ -16,7 +16,7 @@ class IviewNode(Node): Node.__init__(self, title, parent) self.vpath = vpath self.can_download = True - + def download(self): auth_doc = grab_xml(PARAMS["auth"], 0) vbase = auth_doc.xpath("//auth:server/text()", namespaces=NS)[0] @@ -26,9 +26,32 @@ class IviewNode(Node): vpath = ext + ":" + vpath filename = self.title + "." + ext return download_rtmp(filename, vbase, vpath, HASH_URL) - + + +class IviewSeries(Node): + def __init__(self, series_title, series_id, parent): + Node.__init__(self, series_title, parent) + self.series_title = series_title + self.series_id = series_id + + def get_children(self): + if self.children: + return self.children + print "DOWNLOADING SERIES" + series_doc = grab_json(PARAMS["api"] + "series=" + self.series_id, 3600)[0] + for episode in series_doc["f"]: + vpath = episode["n"] + episode_title = episode["b"].strip() + if self.series_title != episode_title: + episode_title = self.series_title + " " + episode_title + IviewNode(episode_title, self, vpath) + return self.children + + def fill_nodes(root_node): + root_node = Node("ABC iView", root_node) + config_doc = grab_xml(CONFIG_URL, 24*3600) global PARAMS PARAMS = dict((p.attrib["name"], p.attrib["value"]) for p in config_doc.xpath("/config/param")) @@ -43,37 +66,13 @@ def fill_nodes(root_node): # Create a duplicate of each series within each category that it appears series_list_doc = grab_json(PARAMS["api"] + "seriesIndex", 3600) - now = datetime.now() for series in series_list_doc: categories = series["e"].split() sid = series["a"] - max_age = None - for episode in series["f"]: - air_date = datetime.strptime(episode["f"], "%Y-%m-%d %H:%M:%S") - diff = now - air_date - diff = 24*3600*diff.days + diff.seconds - if max_age is None or diff < max_age: - max_age = diff - - if max_age is None: - continue series_title = series["b"].replace("&", "&") - series_nodes = [] for cid in categories: category_node = categories_map.get(cid, None) if category_node: - series_nodes.append(Node(series_title, category_node)) - if not series_nodes: - continue - - series_doc = grab_json(PARAMS["api"] + "series=" + sid, max_age)[0] - for episode in series_doc["f"]: - vpath = episode["n"] - episode_title = episode["b"].strip() - if series_title != episode_title: - episode_title = series_title + " " + episode_title - for series_node in series_nodes: - IviewNode(episode_title, series_node, vpath) - + IviewSeries(series_title, sid, category_node) diff --git a/sbs.py b/sbs.py index b95a67b..d4570ca 100644 --- a/sbs.py +++ b/sbs.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # vim:ts=4:sts=4:sw=4:noet -from common import grab_json, grab_xml, download_rtmp, Node +from common import grab_json, grab_xml, download_rtmp, download_urllib, Node import collections @@ -41,12 +41,19 @@ class SbsNode(Node): best_url = d["plfile$url"] doc = grab_xml(best_url, 3600) - vbase = doc.xpath("//smil:meta/@base", namespaces=NS)[0] - vpath = doc.xpath("//smil:video/@src", namespaces=NS)[0] - ext = vpath.rsplit(".", 1)[1] - filename = self.title + "." + ext - - return download_rtmp(filename, vbase, vpath) + if doc.xpath("//smil:meta/@base", namespaces=NS): + vbase = doc.xpath("//smil:meta/@base", namespaces=NS)[0] + vpath = doc.xpath("//smil:video/@src", namespaces=NS)[0] + ext = vpath.rsplit(".", 1)[1] + filename = self.title + "." + ext + return download_rtmp(filename, vbase, vpath) + else: + from lxml import etree + url = doc.xpath("//smil:video/@src", namespaces=NS)[0] + ext = url.rsplit(".", 1)[1] + filename = self.title + "." + ext + url += "?v=2.5.14&fp=MAC%2011,1,102,55&r=FLQDD&g=YNANAXRIYFYO" + return download_urllib(filename, url) def fill_entry(get_catnode, entry): title = entry["title"] @@ -77,17 +84,26 @@ def fill_section(get_catnode, section): fill_entry(get_catnode, entry) index += doc["itemsPerPage"] -def fill_nodes(root_node): - catnodes = {} - def get_catnode(name): +class SbsRoot(Node): + def __init__(self, title, parent=None): + Node.__init__(self, title, parent) + self.catnodes = {} + + def get_catnode(self, name): try: - return catnodes[name] + return self.catnodes[name] except KeyError: - n = Node(name, root_node) - catnodes[name] = n + n = Node(name, self) + self.catnodes[name] = n return n - for section in SECTIONS: - fill_section(get_catnode, section) + def get_children(self): + if self.children: + return self.children + for section in SECTIONS: + fill_section(self.get_catnode, section) + return self.children +def fill_nodes(root_node): + SbsRoot("SBS", root_node) -- 2.39.2