]> code.delx.au - webdl/blob - plus7.py
Improve Plus7 titles
[webdl] / plus7.py
1 #!/usr/bin/env python
2 # vim:ts=4:sts=4:sw=4:noet
3
4 import json
5 from lxml.cssselect import CSSSelector
6
7 from common import grab_html, download_rtmp, Node
8
9 METADATA = "http://cosmos.bcst.yahoo.com/rest/v2/pops;id=%d;lmsoverride=1;element=stream;bw=1200"
10 BASE = "http://au.tv.yahoo.com"
11 BROWSE = BASE + "/plus7/browse/"
12 HASH_URL = "http://d.yimg.com/nl/australia/au-tv/player.swf"
13 HASH_URL = "http://d.yimg.com/m/up/ypp/au/player.swf"
14
15 def extract_and_remove(tokens, key):
16 lowertokens = [x.lower() for x in tokens]
17 pos = lowertokens.index(key)
18
19 value = int(tokens[pos+1])
20 tokens = tokens[:pos] + tokens[pos+2:]
21
22 return value, tokens
23
24
25 def demangle_title(title):
26 # Postman Pat - Postman Pat and the Runaway Kite Series 1 Episode 1
27 tokens = title.split()
28
29 try:
30 season, tokens = extract_and_remove(tokens, "series")
31 episode, tokens = extract_and_remove(tokens, "episode")
32 except ValueError:
33 return title
34
35 try:
36 i = tokens.index("-") + 1
37 except ValueError:
38 i = 0
39 tokens.insert(i, "%sx%s - " % (season, str(episode).zfill(2)))
40
41 return " ".join(tokens)
42
43 class Plus7Node(Node):
44 def __init__(self, title, parent, url):
45 Node.__init__(self, demangle_title(title), parent)
46 self.url = url
47 self.can_download = True
48
49 def get_vid(self):
50 doc = grab_html(self.url, 3600)
51 for script in doc.xpath("//script"):
52 if not script.text:
53 continue
54 for line in script.text.split("\n"):
55 if line.find("vid : ") <= 0:
56 continue
57 vid = line[line.find("'")+1 : line.rfind("'")]
58 vid = int(vid)
59 return vid
60 raise Exception("Could not find vid on page " + self.url)
61
62 def download(self):
63 vid = self.get_vid()
64 doc = grab_html(METADATA % vid, 0)
65 content = doc.xpath("//content")[0]
66 vbase = content.attrib["url"]
67 vpath = content.attrib["path"]
68 filename = self.title + ".flv"
69 return download_rtmp(filename, vbase, vpath, HASH_URL)
70
71
72 class Plus7Series(Node):
73 def __init__(self, title, parent, url):
74 Node.__init__(self, title, parent)
75 self.url = url
76
77 def fill_children(self):
78 doc = grab_html(self.url, 3600)
79 for item in CSSSelector("#related-episodes div.itemdetails")(doc):
80 title = CSSSelector("span.title")(item)[0].text
81 subtitle = CSSSelector("span.subtitle")(item)[0].xpath("string()")
82 if subtitle and subtitle.strip():
83 title += " - " + subtitle.strip().replace(" ", " ")
84 url = CSSSelector("a")(item)[0].attrib["href"]
85 Plus7Node(title, self, BASE + url)
86
87 class Plus7Root(Node):
88 def __init__(self, parent=None):
89 Node.__init__(self, "Yahoo Plus7", parent)
90
91 def fill_children(self):
92 doc = grab_html(BROWSE, 3600)
93 shows = []
94 for script in doc.xpath("//script"):
95 if not script.text or not script.text.startswith("var shows = "):
96 continue
97 shows = script.text[12:]
98 shows = shows.rstrip("; \n")
99 shows = json.loads(shows)
100 for show in shows:
101 Plus7Series(show["title"], self, show["url"])
102
103 def fill_nodes(root_node):
104 Plus7Root(root_node)
105