]> code.delx.au - webdl/blob - common.py
Disable Plus7
[webdl] / common.py
1 from lxml import etree, html
2 import cookielib
3 import json
4 try:
5 import hashlib
6 except ImportError:
7 import md5 as hashlib
8 import os
9 import re
10 import shutil
11 import signal
12 import subprocess
13 import sys
14 import tempfile
15 import time
16 import urllib
17 import urllib2
18 import urlparse
19
20
21 try:
22 import autosocks
23 autosocks.try_autosocks()
24 except ImportError:
25 pass
26
27 CACHE_DIR = os.path.join(os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")), "webdl")
28 USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:21.0) Gecko/20100101 Firefox/21.0"
29
30 class Node(object):
31 def __init__(self, title, parent=None):
32 self.title = title
33 if parent:
34 parent.children.append(self)
35 self.parent = parent
36 self.children = []
37 self.can_download = False
38
39 def get_children(self):
40 if not self.children:
41 self.fill_children()
42 return self.children
43
44 def fill_children(self):
45 pass
46
47 def download(self):
48 raise NotImplemented
49
50
51 def load_root_node():
52 root_node = Node("Root")
53
54 import iview
55 iview.fill_nodes(root_node)
56
57 import sbs
58 sbs.fill_nodes(root_node)
59
60 ### import plus7
61 ### plus7.fill_nodes(root_node)
62
63 import brightcove
64 brightcove.fill_nodes(root_node)
65
66 return root_node
67
68 valid_chars = frozenset("-_.()!@#%^ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
69 def sanify_filename(filename):
70 filename = filename.encode("ascii", "ignore")
71 filename = "".join(c for c in filename if c in valid_chars)
72 return filename
73
74 cookiejar = cookielib.CookieJar()
75 urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
76 def _urlopen(url, referrer=None):
77 req = urllib2.Request(url)
78 req.add_header("User-Agent", USER_AGENT)
79 if referrer:
80 req.add_header("Referer", referrer)
81 return urlopener.open(req)
82
83 def urlopen(url, max_age):
84 ### print url
85 if not os.path.isdir(CACHE_DIR):
86 os.makedirs(CACHE_DIR)
87
88 if max_age <= 0:
89 return _urlopen(url)
90
91 filename = hashlib.md5(url).hexdigest()
92 filename = os.path.join(CACHE_DIR, filename)
93 if os.path.exists(filename):
94 file_age = int(time.time()) - os.path.getmtime(filename)
95 if file_age < max_age:
96 return open(filename)
97
98 src = _urlopen(url)
99 dst = open(filename, "wb")
100 try:
101 shutil.copyfileobj(src, dst)
102 except Exception, e:
103 try:
104 os.unlink(filename)
105 except OSError:
106 pass
107 raise e
108 src.close()
109 dst.close()
110
111 return open(filename)
112
113 def grab_text(url, max_age):
114 f = urlopen(url, max_age)
115 text = f.read().decode("utf-8")
116 f.close()
117 return text
118
119 def grab_html(url, max_age):
120 f = urlopen(url, max_age)
121 doc = html.parse(f, html.HTMLParser(encoding="utf-8", recover=True))
122 f.close()
123 return doc
124
125 def grab_xml(url, max_age):
126 f = urlopen(url, max_age)
127 doc = etree.parse(f, etree.XMLParser(encoding="utf-8", recover=True))
128 f.close()
129 return doc
130
131 def grab_json(url, max_age, skip_assignment=False, skip_function=False):
132 f = urlopen(url, max_age)
133 if skip_assignment:
134 text = f.read()
135 pos = text.find("=")
136 doc = json.loads(text[pos+1:])
137 elif skip_function:
138 text = f.read()
139 pos = text.find("(")
140 rpos = text.rfind(")")
141 doc = json.loads(text[pos+1:rpos])
142 else:
143 doc = json.load(f)
144 f.close()
145 return doc
146
147 def exec_subprocess(cmd):
148 try:
149 p = subprocess.Popen(cmd)
150 ret = p.wait()
151 if ret != 0:
152 print >>sys.stderr, cmd[0], "exited with error code:", ret
153 return False
154 else:
155 return True
156 except OSError, e:
157 print >>sys.stderr, "Failed to run", cmd[0], e
158 except KeyboardInterrupt:
159 print "Cancelled", cmd
160 try:
161 p.terminate()
162 p.wait()
163 except KeyboardInterrupt:
164 p.send_signal(signal.SIGKILL)
165 p.wait()
166 return False
167
168
169 def check_command_exists(cmd):
170 try:
171 subprocess.check_output(cmd)
172 return True
173 except Exception:
174 return False
175
176 def generate_remux_cmd(infile, outfile):
177 if check_command_exists(["avconv", "--help"]):
178 return [
179 "avconv",
180 "-i", infile,
181 "-bsf:a", "aac_adtstoasc",
182 "-acodec", "copy",
183 "-vcodec", "copy",
184 outfile,
185 ]
186
187 if check_command_exists(["ffmpeg", "--help"]):
188 return [
189 "ffmpeg",
190 "-i", infile,
191 "-bsf:a", "aac_adtstoasc",
192 "-acodec", "copy",
193 "-vcodec", "copy",
194 outfile,
195 ]
196
197 raise Exception("You must install ffmpeg or libav-tools")
198
199 def remux(infile, outfile):
200 print "Converting %s to mp4" % infile
201 cmd = generate_remux_cmd(infile, outfile)
202 if not exec_subprocess(cmd):
203 # failed, error has already been logged
204 return False
205 try:
206 flv_size = os.stat(infile).st_size
207 mp4_size = os.stat(outfile).st_size
208 if abs(flv_size - mp4_size) < 0.1 * flv_size:
209 os.unlink(infile)
210 return True
211 else:
212 print >>sys.stderr, "The size of", outfile, "is suspicious, did avconv fail?"
213 return False
214 except Exception, e:
215 print >>sys.stderr, "Conversion failed", e
216 return False
217
218 def convert_to_mp4(filename):
219 with open(filename) as f:
220 fourcc = f.read(4)
221 basename, ext = os.path.splitext(filename)
222
223 if ext == ".mp4" and fourcc == "FLV\x01":
224 os.rename(filename, basename + ".flv")
225 ext = ".flv"
226 filename = basename + ext
227
228 if ext in (".flv", ".ts"):
229 filename_mp4 = basename + ".mp4"
230 return remux(filename, filename_mp4)
231
232 return ext == ".mp4"
233
234
235 def download_hds(filename, video_url, pvswf=None):
236 filename = sanify_filename(filename)
237 video_url = video_url.replace("http://", "hds://")
238 print "Downloading: %s" % filename
239 cmd = [
240 "livestreamer",
241 "-o", filename,
242 "%s pvswf=%s" % (video_url, pvswf),
243 "best",
244 ]
245 if exec_subprocess(cmd):
246 return convert_to_mp4(filename)
247 else:
248 return False
249
250 def download_rtmp(filename, vbase, vpath, hash_url=None):
251 filename = sanify_filename(filename)
252 print "Downloading: %s" % filename
253 if vpath.endswith(".flv"):
254 vpath = vpath[:-4]
255 cmd = [
256 "rtmpdump",
257 "-o", filename,
258 "-r", vbase,
259 "-y", vpath,
260 ]
261 if hash_url is not None:
262 cmd += ["--swfVfy", hash_url]
263 if exec_subprocess(cmd):
264 return convert_to_mp4(filename)
265 else:
266 return False
267
268 def download_hls_get_stream(url):
269 def parse_bandwidth(line):
270 params = line.split(":", 1)[1].split(",")
271 for kv in params:
272 k, v = kv.split("=", 1)
273 if k == "BANDWIDTH":
274 return int(v)
275 return 0
276
277 m3u8 = grab_text(url, 0)
278 best_bandwidth = None
279 best_url = None
280 for line in m3u8.split("\n"):
281 if line.startswith("#EXT-X-STREAM-INF:"):
282 bandwidth = parse_bandwidth(line)
283 if best_bandwidth is None or bandwidth > best_bandwidth:
284 best_bandwidth = bandwidth
285 best_url = None
286 elif not line.startswith("#"):
287 if best_url is None:
288 best_url = line.strip()
289
290 if not best_url:
291 raise Exception("Failed to find best stream for HLS: " + url)
292
293 return best_url
294
295 def download_hls_segments(outf, url):
296 m3u8 = grab_text(url, 0)
297
298 fail_if_not_last_segment = None
299 for line in m3u8.split("\n"):
300 if not line.strip() or line.startswith("#"):
301 continue
302
303 if fail_if_not_last_segment:
304 raise e
305
306 try:
307 download_hls_fetch_segment(outf, line)
308 except urllib2.HTTPError, e:
309 fail_if_not_last_segment = e
310 continue
311 sys.stdout.write(".")
312 sys.stdout.flush()
313
314 sys.stdout.write("\n")
315
316 def download_hls_fetch_segment(outf, segment_url):
317 try:
318 src = _urlopen(segment_url)
319 shutil.copyfileobj(src, outf)
320 except:
321 raise
322 finally:
323 try:
324 src.close()
325 except:
326 pass
327
328 def download_hls(filename, m3u8_master_url, hack_url_func=None):
329 filename = sanify_filename(filename)
330 print "Downloading: %s" % filename
331
332 if hack_url_func is None:
333 hack_url_func = lambda url: url
334
335 tmpdir = tempfile.mkdtemp(prefix="webdl-hls")
336
337 try:
338 best_stream_url = download_hls_get_stream(hack_url_func(m3u8_master_url))
339 ts_file = open(filename, "wb")
340 download_hls_segments(ts_file, hack_url_func(best_stream_url))
341 except KeyboardInterrupt:
342 print "\nCancelled", m3u8_master_url
343 return False
344 finally:
345 shutil.rmtree(tmpdir)
346 try:
347 ts_file.close()
348 except:
349 pass
350
351 return convert_to_mp4(filename)
352
353 def natural_sort(l, key=None):
354 ignore_list = ["a", "the"]
355 def key_func(k):
356 if key is not None:
357 k = key(k)
358 k = k.lower()
359 newk = []
360 for c in re.split("([0-9]+)", k):
361 c = c.strip()
362 if c.isdigit():
363 newk.append(int(c))
364 else:
365 for subc in c.split():
366 if subc not in ignore_list:
367 newk.append(subc)
368 return newk
369
370 return sorted(l, key=key_func)
371
372 def append_to_qs(url, params):
373 r = list(urlparse.urlsplit(url))
374 qs = urlparse.parse_qs(r[3])
375 for k, v in params.iteritems():
376 if v is not None:
377 qs[k] = v
378 elif qs.has_key(k):
379 del qs[k]
380 r[3] = urllib.urlencode(qs, True)
381 url = urlparse.urlunsplit(r)
382 return url
383