import json
import logging
+import os
import sys
BASE = "https://www.sbs.com.au"
FULL_VIDEO_LIST = BASE + "/api/video_feed/f/Bgtm9B/sbs-section-programs/"
VIDEO_URL = BASE + "/ondemand/video/single/%s"
+PARAMS_URL = BASE + "/api/video_pdkvars/id/%s?form=json"
NS = {
"smil": "http://www.w3.org/2005/SMIL21/Language",
def download(self):
with requests_cache.disabled():
doc = grab_html(VIDEO_URL % self.video_id)
- player_params = self.get_player_params(doc)
+ player_params = grab_json(PARAMS_URL % self.video_id)
error = player_params.get("error", None)
if error:
else:
return download_mpd(filename, release_url)
- def get_player_params(self, doc):
- for script in doc.xpath("//script"):
- if not script.text:
- continue
- for line in script.text.split("\n"):
- s = "var playerParams = {"
- if s in line:
- p1 = line.find(s) + len(s) - 1
- p2 = line.find("};", p1) + 1
- if p1 >= 0 and p2 > 0:
- return json.loads(line[p1:p2])
- raise Exception("Unable to find player params for %s: %s" % (self.video_id, self.title))
-
def get_hls_url(self, release_url):
with requests_cache.disabled():
doc = grab_xml("https:" + release_url.replace("http:", "").replace("https:", ""))
nav_node.create_video_node(entry_data)
def load_all_video_entries(self):
+ channels = [
+ "Channel/SBS1",
+ "Channel/SBS Food",
+ "Channel/SBS VICELAND",
+ "Channel/SBS World Movies",
+ "Channel/Web Exclusive",
+ ]
+
+ all_entries = {}
+ for channel in channels:
+ self.load_all_video_entries_for_channel(all_entries, channel)
+
+ all_entries = list(all_entries.values())
+ print(" SBS fetched", len(all_entries))
+ return all_entries
+
+ def load_all_video_entries_for_channel(self, all_entries, channel):
offset = 1
page_size = 500
- results = {}
duplicate_warning = False
while True:
- entries = self.fetch_entries_page(offset, page_size)
+ entries = self.fetch_entries_page(channel, offset, page_size)
if len(entries) == 0:
break
for entry in entries:
guid = entry["guid"]
- if guid in results and not duplicate_warning:
+ if guid in entries and not duplicate_warning:
# https://bitbucket.org/delx/webdl/issues/102/recent-sbs-series-missing
logging.warn("SBS returned a duplicate response, data is probably missing. Try decreasing page_size.")
duplicate_warning = True
- results[guid] = entry
+ all_entries[guid] = entry
offset += page_size
- sys.stdout.write(".")
- sys.stdout.flush()
-
- print()
- return list(results.values())
-
- def fetch_entries_page(self, offset, page_size):
- url = append_to_qs(FULL_VIDEO_LIST, {"range": "%s-%s" % (offset, offset+page_size-1)})
+ if os.isatty(sys.stdout.fileno()):
+ sys.stdout.write(".")
+ sys.stdout.flush()
+
+ def fetch_entries_page(self, channel, offset, page_size):
+ url = append_to_qs(FULL_VIDEO_LIST, {
+ "range": "%s-%s" % (offset, offset+page_size-1),
+ "byCategories": channel,
+ })
data = grab_json(url)
if "entries" not in data:
raise Exception("Missing data in SBS response", data)