]>
code.delx.au - webdl/blob - sbs.py
2 from common
import grab_html
, grab_json
, grab_xml
, download_hls
, download_mpd
, Node
, append_to_qs
9 BASE
= "https://www.sbs.com.au"
10 FULL_VIDEO_LIST
= BASE
+ "/api/video_feed/f/Bgtm9B/sbs-section-programs/"
11 VIDEO_URL
= BASE
+ "/ondemand/video/single/%s"
14 "smil": "http://www.w3.org/2005/SMIL21/Language",
18 class SbsVideoNode(Node
):
19 def __init__(self
, title
, parent
, url
):
20 Node
.__init
__(self
, title
, parent
)
21 self
.video_id
= url
.split("/")[-1]
22 self
.can_download
= True
25 with requests_cache
.disabled():
26 doc
= grab_html(VIDEO_URL
% self
.video_id
)
27 player_params
= self
.get_player_params(doc
)
29 error
= player_params
.get("error", None)
31 print("Cannot download:", error
)
34 release_url
= player_params
["releaseUrls"]["html"]
35 filename
= self
.title
+ ".ts"
37 hls_url
= self
.get_hls_url(release_url
)
39 return download_hls(filename
, hls_url
)
41 return download_mpd(filename
, release_url
)
43 def get_player_params(self
, doc
):
44 for script
in doc
.xpath("//script"):
47 for line
in script
.text
.split("\n"):
48 s
= "var playerParams = {"
50 p1
= line
.find(s
) + len(s
) - 1
51 p2
= line
.find("};", p1
) + 1
52 if p1
>= 0 and p2
> 0:
53 return json
.loads(line
[p1
:p2
])
54 raise Exception("Unable to find player params for %s: %s" % (self
.video_id
, self
.title
))
56 def get_hls_url(self
, release_url
):
57 with requests_cache
.disabled():
58 doc
= grab_xml("https:" + release_url
.replace("http:", "").replace("https:", ""))
59 video
= doc
.xpath("//smil:video", namespaces
=NS
)
62 video_url
= video
[0].attrib
["src"]
65 class SbsNavNode(Node
):
66 def create_video_node(self
, entry_data
):
67 SbsVideoNode(entry_data
["title"], self
, entry_data
["id"])
69 def find_existing_child(self
, path
):
70 for child
in self
.children
:
71 if child
.title
== path
:
74 class SbsRootNode(SbsNavNode
):
75 def __init__(self
, parent
):
76 Node
.__init
__(self
, "SBS", parent
)
78 def fill_children(self
):
79 all_video_entries
= self
.load_all_video_entries()
80 category_and_entry_data
= self
.explode_videos_to_unique_categories(all_video_entries
)
81 for category_path
, entry_data
in category_and_entry_data
:
82 nav_node
= self
.create_nav_node(self
, category_path
)
83 nav_node
.create_video_node(entry_data
)
85 def load_all_video_entries(self
):
89 "Channel/SBS VICELAND",
90 "Channel/SBS World Movies",
91 "Channel/Web Exclusive",
95 for channel
in channels
:
96 self
.load_all_video_entries_for_channel(all_entries
, channel
)
98 all_entries
= list(all_entries
.values())
99 print(" SBS fetched", len(all_entries
))
102 def load_all_video_entries_for_channel(self
, all_entries
, channel
):
105 duplicate_warning
= False
108 entries
= self
.fetch_entries_page(channel
, offset
, page_size
)
109 if len(entries
) == 0:
112 for entry
in entries
:
114 if guid
in entries
and not duplicate_warning
:
115 # https://bitbucket.org/delx/webdl/issues/102/recent-sbs-series-missing
116 logging
.warn("SBS returned a duplicate response, data is probably missing. Try decreasing page_size.")
117 duplicate_warning
= True
119 all_entries
[guid
] = entry
122 if os
.isatty(sys
.stdout
.fileno()):
123 sys
.stdout
.write(".")
126 def fetch_entries_page(self
, channel
, offset
, page_size
):
127 url
= append_to_qs(FULL_VIDEO_LIST
, {
128 "range": "%s-%s" % (offset
, offset
+page_size
-1),
129 "byCategories": channel
,
131 data
= grab_json(url
)
132 if "entries" not in data
:
133 raise Exception("Missing data in SBS response", data
)
134 return data
["entries"]
136 def explode_videos_to_unique_categories(self
, all_video_entries
):
137 for entry_data
in all_video_entries
:
138 for category_data
in entry_data
["media$categories"]:
139 category_path
= self
.calculate_category_path(
140 category_data
["media$scheme"],
141 category_data
["media$name"],
144 yield category_path
, entry_data
146 def calculate_category_path(self
, scheme
, name
):
151 name
= name
.split("/")
152 if name
[0] != scheme
:
153 name
.insert(0, scheme
)
156 def create_nav_node(self
, parent
, category_path
):
157 if not category_path
:
160 current_path
= category_path
[0]
161 current_node
= parent
.find_existing_child(current_path
)
163 current_node
= SbsNavNode(current_path
, parent
)
164 return self
.create_nav_node(current_node
, category_path
[1:])
166 def fill_nodes(root_node
):
167 SbsRootNode(root_node
)