]>
code.delx.au - webdl/blob - plus7.py
2 # vim:ts=4:sts=4:sw=4:noet
8 from lxml
. cssselect
import CSSSelector
10 from common
import grab_html
, grab_json
, download_rtmp
, Node
12 BASE
= "http://au.tv.yahoo.com"
13 BROWSE
= BASE
+ "/plus7/browse/"
15 METADATA_BASE
= "http://video.query.yahoo.com/v1/public/yql?"
17 'q' : 'SELECT streams,status FROM yahoo.media.video.streams WHERE id=" %s " AND format="mp4,flv" AND protocol="rtmp,http" AND plrs=" %s " AND offnetwork="false" AND site="autv_plus7" AND lang="en-AU" AND region="AU" AND override="none";' ,
18 'callback' : 'jsonp_callback' ,
23 HASH_URL
= "http://d.yimg.com/m/up/ypp/au/player.swf"
26 def extract_and_remove ( tokens
, key
):
27 lowertokens
= [ x
. lower () for x
in tokens
]
28 pos
= lowertokens
. index ( key
)
30 value
= int ( tokens
[ pos
+ 1 ])
31 tokens
= tokens
[: pos
] + tokens
[ pos
+ 2 :]
36 def demangle_title ( title
, subtitle
):
37 tokens
= title
. split ()
38 insert_pos
= len ( tokens
)
41 tokens
+= [ "-" ] + subtitle
. split ()
44 season
, tokens
= extract_and_remove ( tokens
, "series" )
45 episode
, tokens
= extract_and_remove ( tokens
, "episode" )
46 if insert_pos
< len ( tokens
):
47 tokens
. insert ( insert_pos
, "-" )
48 tokens
. insert ( insert_pos
, " %sx%s " % ( season
, str ( episode
). zfill ( 2 )))
52 return " " . join ( tokens
)
54 class Plus7Node ( Node
):
55 def __init__ ( self
, title
, parent
, url
):
56 Node
.__ init
__ ( self
, title
, parent
)
58 self
. can_download
= True
60 def get_video_id ( self
):
61 doc
= grab_html ( self
. url
, 3600 )
62 for script
in doc
. xpath ( "//script" ):
65 for line
in script
. text
. split ( ";" ):
67 if line
. find ( "new Y.VideoPlatform.VideoPlayer" ) <= 0 :
70 ### vidparams = line[line.find("(")+1 : line.rfind(")")]
71 ### vidparams = json.loads(vidparams)
72 ### return vidparams["playlist"]["mediaItems"][0]["id"]
74 # Cannot parse it as JSON :(
75 pos1
= line
. find ( '"mediaItems":' )
78 pos2
= line
. find ( '"id":' , pos1
)
81 pos3
= line
. find ( '"' , pos2
+ 5 )
82 pos4
= line
. find ( '"' , pos2
+ 6 )
83 if pos3
< 0 or pos4
< 0 :
85 return line
[ pos3
+ 1 : pos4
]
87 raise Exception ( "Could not find video id on page " + self
. url
)
89 def generate_session ( self
):
90 return "" . join ([ random
. choice ( string
. ascii_letters
) for x
in xrange ( 22 )])
93 vid_id
= self
. get_video_id ()
94 qs
= dict ( METADATA_QUERY
. items ()) # copy..
95 qs
[ "q" ] = qs
[ "q" ] % ( vid_id
, self
. generate_session ())
96 url
= METADATA_BASE
+ urllib
. urlencode ( qs
)
97 doc
= grab_json ( url
, 0 , skip_function
= True )
98 stream_data
= doc
[ "query" ][ "results" ][ "mediaObj" ][ 0 ][ "streams" ][ 0 ]
99 vbase
= stream_data
[ "host" ]
100 vpath
= stream_data
[ "path" ]
101 filename
= self
. title
+ ".flv"
102 return download_rtmp ( filename
, vbase
, vpath
, HASH_URL
)
105 class Plus7Series ( Node
):
106 def __init__ ( self
, title
, parent
, url
):
107 Node
.__ init
__ ( self
, title
, parent
)
110 def fill_children ( self
):
111 doc
= grab_html ( self
. url
, 3600 )
112 for item
in CSSSelector ( "#related-episodes div.itemdetails" )( doc
):
113 title
= CSSSelector ( "span.title" )( item
)[ 0 ]. text
114 subtitle
= CSSSelector ( "span.subtitle" )( item
)[ 0 ]. xpath ( "string()" )
115 title
= demangle_title ( title
, subtitle
)
116 url
= CSSSelector ( "a" )( item
)[ 0 ]. attrib
[ "href" ]
117 Plus7Node ( title
, self
, BASE
+ url
)
119 class Plus7Root ( Node
):
120 def __init__ ( self
, parent
):
121 Node
.__ init
__ ( self
, "Yahoo Plus7 (broken!)" , parent
)
123 def fill_children ( self
):
124 doc
= grab_html ( BROWSE
, 3600 )
126 for script
in doc
. xpath ( "//script" ):
127 if not script
. text
or not script
. text
. startswith ( "var shows = " ):
129 shows
= script
. text
[ 12 :]
130 shows
= shows
. rstrip ( "; \n " )
131 shows
= json
. loads ( shows
)
133 Plus7Series ( show
[ "title" ], self
, show
[ "url" ])
135 def fill_nodes ( root_node
):