]> code.delx.au - webdl/blob - plus7.py
Fixed Plus7 downloader hyphens
[webdl] / plus7.py
1 #!/usr/bin/env python
2 # vim:ts=4:sts=4:sw=4:noet
3
4 import json
5 from lxml.cssselect import CSSSelector
6
7 from common import grab_html, download_rtmp, Node
8
9 METADATA = "http://cosmos.bcst.yahoo.com/rest/v2/pops;id=%d;lmsoverride=1;element=stream;bw=1200"
10 BASE = "http://au.tv.yahoo.com"
11 BROWSE = BASE + "/plus7/browse/"
12 HASH_URL = "http://d.yimg.com/nl/australia/au-tv/player.swf"
13 HASH_URL = "http://d.yimg.com/m/up/ypp/au/player.swf"
14
15 def extract_and_remove(tokens, key):
16 lowertokens = [x.lower() for x in tokens]
17 pos = lowertokens.index(key)
18
19 value = int(tokens[pos+1])
20 tokens = tokens[:pos] + tokens[pos+2:]
21
22 return value, tokens
23
24
25 def demangle_title(title, subtitle):
26 tokens = title.split()
27 insert_pos = len(tokens)
28 if subtitle:
29 insert_pos += 1
30 tokens += ["-"] + subtitle.split()
31
32 try:
33 season, tokens = extract_and_remove(tokens, "series")
34 episode, tokens = extract_and_remove(tokens, "episode")
35 if insert_pos < len(tokens):
36 tokens.insert(insert_pos, "-")
37 tokens.insert(insert_pos, "%sx%s" % (season, str(episode).zfill(2)))
38 except ValueError:
39 pass
40
41 return " ".join(tokens)
42
43 class Plus7Node(Node):
44 def __init__(self, title, parent, url):
45 Node.__init__(self, title, parent)
46 self.url = url
47 self.can_download = True
48
49 def get_vid(self):
50 doc = grab_html(self.url, 3600)
51 for script in doc.xpath("//script"):
52 if not script.text:
53 continue
54 for line in script.text.split("\n"):
55 if line.find("vid : ") <= 0:
56 continue
57 vid = line[line.find("'")+1 : line.rfind("'")]
58 vid = int(vid)
59 return vid
60 raise Exception("Could not find vid on page " + self.url)
61
62 def download(self):
63 vid = self.get_vid()
64 doc = grab_html(METADATA % vid, 0)
65 content = doc.xpath("//content")[0]
66 vbase = content.attrib["url"]
67 vpath = content.attrib["path"]
68 filename = self.title + ".flv"
69 return download_rtmp(filename, vbase, vpath, HASH_URL)
70
71
72 class Plus7Series(Node):
73 def __init__(self, title, parent, url):
74 Node.__init__(self, title, parent)
75 self.url = url
76
77 def fill_children(self):
78 doc = grab_html(self.url, 3600)
79 for item in CSSSelector("#related-episodes div.itemdetails")(doc):
80 title = CSSSelector("span.title")(item)[0].text
81 subtitle = CSSSelector("span.subtitle")(item)[0].xpath("string()")
82 title = demangle_title(title, subtitle)
83 url = CSSSelector("a")(item)[0].attrib["href"]
84 Plus7Node(title, self, BASE + url)
85
86 class Plus7Root(Node):
87 def __init__(self, parent=None):
88 Node.__init__(self, "Yahoo Plus7", parent)
89
90 def fill_children(self):
91 doc = grab_html(BROWSE, 3600)
92 shows = []
93 for script in doc.xpath("//script"):
94 if not script.text or not script.text.startswith("var shows = "):
95 continue
96 shows = script.text[12:]
97 shows = shows.rstrip("; \n")
98 shows = json.loads(shows)
99 for show in shows:
100 Plus7Series(show["title"], self, show["url"])
101
102 def fill_nodes(root_node):
103 Plus7Root(root_node)
104