]> code.delx.au - webdl/blob - plus7.py
Improved error message for SBS
[webdl] / plus7.py
1 #!/usr/bin/env python
2 # vim:ts=4:sts=4:sw=4:noet
3
4 import json
5 import random
6 import string
7 import urllib
8 from lxml.cssselect import CSSSelector
9
10 from common import grab_html, grab_json, download_rtmp, Node
11
12 BASE = "http://au.tv.yahoo.com"
13 BROWSE = BASE + "/plus7/browse/"
14
15 METADATA_BASE = "http://video.query.yahoo.com/v1/public/yql?"
16 METADATA_QUERY = {
17 'q': 'SELECT streams,status FROM yahoo.media.video.streams WHERE id="%s" AND format="mp4,flv" AND protocol="rtmp,http" AND plrs="%s" AND offnetwork="false" AND site="autv_plus7" AND lang="en-AU" AND region="AU" AND override="none";',
18 'callback': 'jsonp_callback',
19 'env': 'prod',
20 'format': 'json'
21 }
22
23 HASH_URL = "http://d.yimg.com/m/up/ypp/au/player.swf"
24
25
26 def extract_and_remove(tokens, key):
27 lowertokens = [x.lower() for x in tokens]
28 pos = lowertokens.index(key)
29
30 value = int(tokens[pos+1])
31 tokens = tokens[:pos] + tokens[pos+2:]
32
33 return value, tokens
34
35
36 def demangle_title(title, subtitle):
37 tokens = title.split()
38 insert_pos = len(tokens)
39 if subtitle:
40 insert_pos += 1
41 tokens += ["-"] + subtitle.split()
42
43 try:
44 season, tokens = extract_and_remove(tokens, "series")
45 episode, tokens = extract_and_remove(tokens, "episode")
46 if insert_pos < len(tokens):
47 tokens.insert(insert_pos, "-")
48 tokens.insert(insert_pos, "%sx%s" % (season, str(episode).zfill(2)))
49 except ValueError:
50 pass
51
52 return " ".join(tokens)
53
54 class Plus7Node(Node):
55 def __init__(self, title, parent, url):
56 Node.__init__(self, title, parent)
57 self.url = url
58 self.can_download = True
59
60 def get_video_id(self):
61 doc = grab_html(self.url, 3600)
62 for script in doc.xpath("//script"):
63 if not script.text:
64 continue
65 for line in script.text.split(";"):
66 line = line.strip()
67 if line.find("new Y.VideoPlatform.VideoPlayer") <= 0:
68 continue
69
70 ### vidparams = line[line.find("(")+1 : line.rfind(")")]
71 ### vidparams = json.loads(vidparams)
72 ### return vidparams["playlist"]["mediaItems"][0]["id"]
73
74 # Cannot parse it as JSON :(
75 pos1 = line.find('"mediaItems":')
76 if pos1 < 0:
77 continue
78 pos2 = line.find('"id":', pos1)
79 if pos2 < 0:
80 continue
81 pos3 = line.find('"', pos2+5)
82 pos4 = line.find('"', pos2+6)
83 if pos3 < 0 or pos4 < 0:
84 continue
85 return line[pos3+1:pos4]
86
87 raise Exception("Could not find video id on page " + self.url)
88
89 def generate_session(self):
90 return "".join([random.choice(string.ascii_letters) for x in xrange(22)])
91
92 def download(self):
93 vid_id = self.get_video_id()
94 qs = dict(METADATA_QUERY.items()) # copy..
95 qs["q"] = qs["q"] % (vid_id, self.generate_session())
96 url = METADATA_BASE + urllib.urlencode(qs)
97 doc = grab_json(url, 0, skip_function=True)
98 stream_data = doc["query"]["results"]["mediaObj"][0]["streams"][0]
99 vbase = stream_data["host"]
100 vpath = stream_data["path"]
101 filename = self.title + ".flv"
102 return download_rtmp(filename, vbase, vpath, HASH_URL)
103
104
105 class Plus7Series(Node):
106 def __init__(self, title, parent, url):
107 Node.__init__(self, title, parent)
108 self.url = url
109
110 def fill_children(self):
111 doc = grab_html(self.url, 3600)
112 for item in CSSSelector("#related-episodes div.itemdetails")(doc):
113 title = CSSSelector("span.title")(item)[0].text
114 subtitle = CSSSelector("span.subtitle")(item)[0].xpath("string()")
115 title = demangle_title(title, subtitle)
116 url = CSSSelector("a")(item)[0].attrib["href"]
117 Plus7Node(title, self, BASE + url)
118
119 class Plus7Root(Node):
120 def __init__(self, parent):
121 Node.__init__(self, "Yahoo Plus7", parent)
122
123 def fill_children(self):
124 doc = grab_html(BROWSE, 3600)
125 shows = []
126 for script in doc.xpath("//script"):
127 if not script.text or not script.text.startswith("var shows = "):
128 continue
129 shows = script.text[12:]
130 shows = shows.rstrip("; \n")
131 shows = json.loads(shows)
132 for show in shows:
133 Plus7Series(show["title"], self, show["url"])
134
135 def fill_nodes(root_node):
136 Plus7Root(root_node)
137