]> code.delx.au - webdl/blob - sbs.py
Fixed silly bug in removing ffmpeg detection
[webdl] / sbs.py
1 import requests_cache
2 from common import grab_html, grab_json, grab_xml, download_hls, Node, append_to_qs
3
4 import json
5 import logging
6 import os
7 import sys
8
9 BASE = "https://www.sbs.com.au"
10 FULL_VIDEO_LIST = BASE + "/api/video_feed/f/Bgtm9B/sbs-section-programs/"
11 VIDEO_SMIL_URL = BASE + "/api/v3/video_smil?id="
12
13 NS = {
14 "smil": "http://www.w3.org/2005/SMIL21/Language",
15 }
16
17
18 class SbsVideoNode(Node):
19 def __init__(self, title, parent, url):
20 Node.__init__(self, title, parent)
21 self.video_id = url.split("/")[-1]
22 self.can_download = True
23
24 def download(self):
25 filename = self.title + ".ts"
26
27 with requests_cache.disabled():
28 doc = grab_xml(VIDEO_SMIL_URL + self.video_id)
29 video_el = doc.xpath("//smil:video", namespaces=NS)
30 if not video_el:
31 print("Cannot find video:", error)
32 return False
33 video_url = video_el[0].attrib["src"]
34
35 return download_hls(filename, video_url)
36
37 class SbsNavNode(Node):
38 def create_video_node(self, entry_data):
39 SbsVideoNode(entry_data["title"], self, entry_data["id"])
40
41 def find_existing_child(self, path):
42 for child in self.children:
43 if child.title == path:
44 return child
45
46 class SbsRootNode(SbsNavNode):
47 def __init__(self, parent):
48 Node.__init__(self, "SBS", parent)
49
50 def fill_children(self):
51 all_video_entries = self.load_all_video_entries()
52 category_and_entry_data = self.explode_videos_to_unique_categories(all_video_entries)
53 for category_path, entry_data in category_and_entry_data:
54 nav_node = self.create_nav_node(self, category_path)
55 nav_node.create_video_node(entry_data)
56
57 def load_all_video_entries(self):
58 channels = [
59 "Channel/NITV",
60 "Channel/SBS1",
61 "Channel/SBS Food",
62 "Channel/SBS VICELAND",
63 "Channel/SBS World Movies",
64 "Channel/Web Exclusive",
65 ]
66
67 all_entries = {}
68 for channel in channels:
69 self.load_all_video_entries_for_channel(all_entries, channel)
70
71 all_entries = list(all_entries.values())
72 print(" SBS fetched", len(all_entries))
73 return all_entries
74
75 def load_all_video_entries_for_channel(self, all_entries, channel):
76 offset = 1
77 page_size = 500
78 duplicate_warning = False
79
80 while True:
81 entries = self.fetch_entries_page(channel, offset, page_size)
82 if len(entries) == 0:
83 break
84
85 for entry in entries:
86 guid = entry["guid"]
87 if guid in entries and not duplicate_warning:
88 # https://bitbucket.org/delx/webdl/issues/102/recent-sbs-series-missing
89 logging.warn("SBS returned a duplicate response, data is probably missing. Try decreasing page_size.")
90 duplicate_warning = True
91
92 all_entries[guid] = entry
93
94 offset += page_size
95 if os.isatty(sys.stdout.fileno()):
96 sys.stdout.write(".")
97 sys.stdout.flush()
98
99 def fetch_entries_page(self, channel, offset, page_size):
100 url = append_to_qs(FULL_VIDEO_LIST, {
101 "range": "%s-%s" % (offset, offset+page_size-1),
102 "byCategories": channel,
103 })
104 data = grab_json(url)
105 if "entries" not in data:
106 raise Exception("Missing data in SBS response", data)
107 return data["entries"]
108
109 def explode_videos_to_unique_categories(self, all_video_entries):
110 for entry_data in all_video_entries:
111 for category_data in entry_data["media$categories"]:
112 category_path = self.calculate_category_path(
113 category_data["media$scheme"],
114 category_data["media$name"],
115 )
116 if category_path:
117 yield category_path, entry_data
118
119 def calculate_category_path(self, scheme, name):
120 if not scheme:
121 return
122 if scheme == name:
123 return
124 name = name.split("/")
125 if name[0] != scheme:
126 name.insert(0, scheme)
127 return name
128
129 def create_nav_node(self, parent, category_path):
130 if not category_path:
131 return parent
132
133 current_path = category_path[0]
134 current_node = parent.find_existing_child(current_path)
135 if not current_node:
136 current_node = SbsNavNode(current_path, parent)
137 return self.create_nav_node(current_node, category_path[1:])
138
139 def fill_nodes(root_node):
140 SbsRootNode(root_node)