]> code.delx.au - webdl/blob - common.py
cabfd936c30f65e8fb11df8758888f455ac14222
[webdl] / common.py
1 import python2_compat
2
3 import hashlib
4 import http.cookiejar
5 import json
6 import logging
7 import lxml.etree
8 import lxml.html
9 import os
10 import re
11 import shutil
12 import signal
13 import subprocess
14 import time
15 import urllib.parse
16 import urllib.request
17
18
19 try:
20 import autosocks
21 autosocks.try_autosocks()
22 except ImportError:
23 pass
24
25
26 logging.basicConfig(
27 format = "%(levelname)s %(message)s",
28 level = logging.INFO if os.environ.get("DEBUG", None) is None else logging.DEBUG,
29 )
30
31 CACHE_DIR = os.path.join(
32 os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")),
33 "webdl"
34 )
35
36 USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:21.0) Gecko/20100101 Firefox/21.0"
37
38
39 class Node(object):
40 def __init__(self, title, parent=None):
41 self.title = title
42 if parent:
43 parent.children.append(self)
44 self.parent = parent
45 self.children = []
46 self.can_download = False
47
48 def get_children(self):
49 if not self.children:
50 self.fill_children()
51 return self.children
52
53 def fill_children(self):
54 pass
55
56 def download(self):
57 raise NotImplemented
58
59
60 def load_root_node():
61 root_node = Node("Root")
62
63 import iview
64 iview.fill_nodes(root_node)
65
66 import sbs
67 sbs.fill_nodes(root_node)
68
69 import brightcove
70 brightcove.fill_nodes(root_node)
71
72 return root_node
73
74 valid_chars = frozenset("-_.()!@#%^ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
75 def sanify_filename(filename):
76 filename = "".join(c for c in filename if c in valid_chars)
77 assert len(filename) > 0
78 return filename
79
80 cookiejar = http.cookiejar.CookieJar()
81 urlopener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookiejar))
82 def _urlopen(url, referrer=None):
83 req = urllib.request.Request(url)
84 req.add_header("User-Agent", USER_AGENT)
85 if referrer:
86 req.add_header("Referer", referrer)
87 return urlopener.open(req)
88
89 def urlopen(url, max_age):
90 logging.debug("urlopen(%r, %r)", url, max_age)
91
92 if not os.path.isdir(CACHE_DIR):
93 os.makedirs(CACHE_DIR)
94
95 if max_age <= 0:
96 return _urlopen(url)
97
98 filename = hashlib.md5(url.encode("utf-8")).hexdigest()
99 filename = os.path.join(CACHE_DIR, filename)
100 if os.path.exists(filename):
101 file_age = int(time.time()) - os.path.getmtime(filename)
102 if file_age < max_age:
103 logging.debug("loading from cache: %s", filename)
104 return open(filename, "rb")
105
106 logging.debug("downloading: %s -> %s", url, filename)
107 src = _urlopen(url)
108 dst = open(filename, "wb")
109 try:
110 shutil.copyfileobj(src, dst)
111 except Exception as e:
112 try:
113 os.unlink(filename)
114 except OSError:
115 pass
116 raise e
117 src.close()
118 dst.close()
119
120 return open(filename, "rb")
121
122 def grab_text(url, max_age):
123 f = urlopen(url, max_age)
124 text = f.read().decode("utf-8")
125 f.close()
126 return text
127
128 def grab_html(url, max_age):
129 f = urlopen(url, max_age)
130 doc = lxml.html.parse(f, lxml.html.HTMLParser(encoding="utf-8", recover=True))
131 f.close()
132 return doc
133
134 def grab_xml(url, max_age):
135 f = urlopen(url, max_age)
136 doc = lxml.etree.parse(f, lxml.etree.XMLParser(encoding="utf-8", recover=True))
137 f.close()
138 return doc
139
140 def grab_json(url, max_age, skip_assignment=False, skip_function=False):
141 f = urlopen(url, max_age)
142 text = f.read().decode("utf-8")
143
144 if skip_assignment:
145 pos = text.find("=")
146 text = text[pos+1:]
147
148 elif skip_function:
149 pos = text.find("(")
150 rpos = text.rfind(")")
151 text = text[pos+1:rpos]
152
153 doc = json.loads(text)
154 f.close()
155 return doc
156
157 def exec_subprocess(cmd):
158 logging.debug("Executing: %s", cmd)
159 try:
160 p = subprocess.Popen(cmd)
161 ret = p.wait()
162 if ret != 0:
163 logging.error("%s exited with error code: %s", cmd[0], ret)
164 return False
165 else:
166 return True
167 except OSError as e:
168 logging.error("Failed to run: %s -- %s", cmd[0], e)
169 except KeyboardInterrupt:
170 logging.info("Cancelled: %s", cmd)
171 try:
172 p.terminate()
173 p.wait()
174 except KeyboardInterrupt:
175 p.send_signal(signal.SIGKILL)
176 p.wait()
177 return False
178
179
180 def check_command_exists(cmd):
181 try:
182 subprocess.check_output(cmd)
183 return True
184 except Exception:
185 return False
186
187 def generate_remux_cmd(infile, outfile):
188 if check_command_exists(["avconv", "--help"]):
189 return [
190 "avconv",
191 "-i", infile,
192 "-bsf:a", "aac_adtstoasc",
193 "-acodec", "copy",
194 "-vcodec", "copy",
195 outfile,
196 ]
197
198 if check_command_exists(["ffmpeg", "--help"]):
199 return [
200 "ffmpeg",
201 "-i", infile,
202 "-bsf:a", "aac_adtstoasc",
203 "-acodec", "copy",
204 "-vcodec", "copy",
205 outfile,
206 ]
207
208 raise Exception("You must install ffmpeg or libav-tools")
209
210 def remux(infile, outfile):
211 logging.info("Converting %s to mp4", infile)
212 cmd = generate_remux_cmd(infile, outfile)
213 if not exec_subprocess(cmd):
214 # failed, error has already been logged
215 return False
216 try:
217 flv_size = os.stat(infile).st_size
218 mp4_size = os.stat(outfile).st_size
219 if abs(flv_size - mp4_size) < 0.1 * flv_size:
220 os.unlink(infile)
221 return True
222 else:
223 logging.error("The size of %s is suspicious, did the remux fail?", outfile)
224 return False
225 except Exception as e:
226 logging.error("Conversion failed! %s", e)
227 return False
228
229 def convert_to_mp4(filename):
230 with open(filename, "rb") as f:
231 fourcc = f.read(4)
232 basename, ext = os.path.splitext(filename)
233
234 if ext == ".mp4" and fourcc == b"FLV\x01":
235 os.rename(filename, basename + ".flv")
236 ext = ".flv"
237 filename = basename + ext
238
239 if ext in (".flv", ".ts"):
240 filename_mp4 = basename + ".mp4"
241 return remux(filename, filename_mp4)
242
243 return ext == ".mp4"
244
245
246 def download_hds(filename, video_url, pvswf=None):
247 filename = sanify_filename(filename)
248 logging.info("Downloading: %s", filename)
249
250 video_url = video_url.replace("http://", "hds://")
251 if pvswf:
252 param = "%s pvswf=%s" % (video_url, pvswf)
253 else:
254 param = video_url
255
256 cmd = [
257 "livestreamer",
258 "-o", filename,
259 param,
260 "best",
261 ]
262 if exec_subprocess(cmd):
263 return convert_to_mp4(filename)
264 else:
265 return False
266
267 def download_hls(filename, video_url):
268 filename = sanify_filename(filename)
269 video_url = video_url.replace("http://", "hlsvariant://")
270 logging.info("Downloading: %s", filename)
271
272 cmd = [
273 "livestreamer",
274 "-o", filename,
275 video_url,
276 "best",
277 ]
278 if exec_subprocess(cmd):
279 return convert_to_mp4(filename)
280 else:
281 return False
282
283 def download_http(filename, video_url):
284 filename = sanify_filename(filename)
285 logging.info("Downloading: %s", filename)
286
287 cmd = [
288 "curl",
289 "--fail", "--retry", "3",
290 "-o", filename,
291 video_url,
292 ]
293 if exec_subprocess(cmd):
294 return convert_to_mp4(filename)
295 else:
296 return False
297
298 def natural_sort(l, key=None):
299 ignore_list = ["a", "the"]
300 def key_func(k):
301 if key is not None:
302 k = key(k)
303 k = k.lower()
304 newk = []
305 for c in re.split("([0-9]+)", k):
306 c = c.strip()
307 if c.isdigit():
308 newk.append(c.zfill(5))
309 else:
310 for subc in c.split():
311 if subc not in ignore_list:
312 newk.append(subc)
313 return newk
314
315 return sorted(l, key=key_func)
316
317 def append_to_qs(url, params):
318 r = list(urllib.parse.urlsplit(url))
319 qs = urllib.parse.parse_qs(r[3])
320 for k, v in params.items():
321 if v is not None:
322 qs[k] = v
323 elif k in qs:
324 del qs[k]
325 r[3] = urllib.parse.urlencode(sorted(qs.items()), True)
326 url = urllib.parse.urlunsplit(r)
327 return url
328