code.delx.au - webdl/blob - plus7.py

   1 #!/usr/bin/env python
   2 # vim:ts=4:sts=4:sw=4:noet
   3
   4 import json
   5 from lxml.cssselect import CSSSelector
   6
   7 from common import grab_html, download_rtmp, Node
   8
   9 METADATA = "http://cosmos.bcst.yahoo.com/rest/v2/pops;id=%d;lmsoverride=1;element=stream;bw=1200"
  10 BASE = "http://au.tv.yahoo.com"
  11 BROWSE = BASE + "/plus7/browse/"
  12 HASH_URL = "http://d.yimg.com/nl/australia/au-tv/player.swf"
  13 HASH_URL = "http://d.yimg.com/m/up/ypp/au/player.swf"
  14
  15 class Plus7Node(Node):
  16         def __init__(self, title, parent, url):
  17                 Node.__init__(self, title, parent)
  18                 self.url = url
  19                 self.can_download = True
  20
  21         def get_vid(self):
  22                 doc = grab_html(self.url, 3600)
  23                 for script in doc.xpath("//script"):
  24                         if not script.text:
  25                                 continue
  26                         for line in script.text.split("\n"):
  27                                 if line.find("vid : ") <= 0:
  28                                         continue
  29                                 vid = line[line.find("'")+1 : line.rfind("'")]
  30                                 vid = int(vid)
  31                                 return vid
  32                 raise Exception("Could not find vid on page " + self.url)
  33
  34         def download(self):
  35                 vid = self.get_vid()
  36                 doc = grab_html(METADATA % vid, 0)
  37                 content = doc.xpath("//content")[0]
  38                 vbase = content.attrib["url"]
  39                 vpath = content.attrib["path"]
  40                 filename = self.title + ".flv"
  41                 return download_rtmp(filename, vbase, vpath, HASH_URL)
  42
  43
  44 class Plus7Series(Node):
  45         def __init__(self, title, parent, url):
  46                 Node.__init__(self, title, parent)
  47                 self.url = url
  48
  49         def fill_children(self):
  50                 doc = grab_html(self.url, 3600)
  51                 for item in CSSSelector("#related-episodes div.itemdetails")(doc):
  52                         title = CSSSelector("span.title")(item)[0].text
  53                         subtitle = CSSSelector("span.subtitle")(item)[0].xpath("string()")
  54                         if subtitle and subtitle.strip():
  55                                 title += " - " + subtitle.strip().replace("  ", " ")
  56                         url = CSSSelector("a")(item)[0].attrib["href"]
  57                         Plus7Node(title, self, BASE + url)
  58
  59 class Plus7Root(Node):
  60         def __init__(self, parent=None):
  61                 Node.__init__(self, "Yahoo Plus7", parent)
  62
  63         def fill_children(self):
  64                 doc = grab_html(BROWSE, 3600)
  65                 shows = []
  66                 for script in doc.xpath("//script"):
  67                         if not script.text or not script.text.startswith("var shows = "):
  68                                 continue
  69                         shows = script.text[12:]
  70                         shows = shows.rstrip("; \n")
  71                         shows = json.loads(shows)
  72                 for show in shows:
  73                         Plus7Series(show["title"], self, show["url"])
  74
  75 def fill_nodes(root_node):
  76         Plus7Root(root_node)
  77