]> code.delx.au - webdl/blob - common.py
Use livestreamer for iview HLS instead of my own code
[webdl] / common.py
1 from lxml import etree, html
2 import cookielib
3 import json
4 try:
5 import hashlib
6 except ImportError:
7 import md5 as hashlib
8 import os
9 import re
10 import shutil
11 import signal
12 import subprocess
13 import sys
14 import tempfile
15 import time
16 import urllib
17 import urllib2
18 import urlparse
19
20
21 try:
22 import autosocks
23 autosocks.try_autosocks()
24 except ImportError:
25 pass
26
27 CACHE_DIR = os.path.join(os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")), "webdl")
28 USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:21.0) Gecko/20100101 Firefox/21.0"
29
30 class Node(object):
31 def __init__(self, title, parent=None):
32 self.title = title
33 if parent:
34 parent.children.append(self)
35 self.parent = parent
36 self.children = []
37 self.can_download = False
38
39 def get_children(self):
40 if not self.children:
41 self.fill_children()
42 return self.children
43
44 def fill_children(self):
45 pass
46
47 def download(self):
48 raise NotImplemented
49
50
51 def load_root_node():
52 root_node = Node("Root")
53
54 import iview
55 iview.fill_nodes(root_node)
56
57 import sbs
58 sbs.fill_nodes(root_node)
59
60 ### import plus7
61 ### plus7.fill_nodes(root_node)
62
63 import brightcove
64 brightcove.fill_nodes(root_node)
65
66 return root_node
67
68 valid_chars = frozenset("-_.()!@#%^ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
69 def sanify_filename(filename):
70 filename = filename.encode("ascii", "ignore")
71 filename = "".join(c for c in filename if c in valid_chars)
72 return filename
73
74 cookiejar = cookielib.CookieJar()
75 urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
76 def _urlopen(url, referrer=None):
77 req = urllib2.Request(url)
78 req.add_header("User-Agent", USER_AGENT)
79 if referrer:
80 req.add_header("Referer", referrer)
81 return urlopener.open(req)
82
83 def urlopen(url, max_age):
84 ### print url
85 if not os.path.isdir(CACHE_DIR):
86 os.makedirs(CACHE_DIR)
87
88 if max_age <= 0:
89 return _urlopen(url)
90
91 filename = hashlib.md5(url).hexdigest()
92 filename = os.path.join(CACHE_DIR, filename)
93 if os.path.exists(filename):
94 file_age = int(time.time()) - os.path.getmtime(filename)
95 if file_age < max_age:
96 return open(filename)
97
98 src = _urlopen(url)
99 dst = open(filename, "wb")
100 try:
101 shutil.copyfileobj(src, dst)
102 except Exception, e:
103 try:
104 os.unlink(filename)
105 except OSError:
106 pass
107 raise e
108 src.close()
109 dst.close()
110
111 return open(filename)
112
113 def grab_text(url, max_age):
114 f = urlopen(url, max_age)
115 text = f.read().decode("utf-8")
116 f.close()
117 return text
118
119 def grab_html(url, max_age):
120 f = urlopen(url, max_age)
121 doc = html.parse(f, html.HTMLParser(encoding="utf-8", recover=True))
122 f.close()
123 return doc
124
125 def grab_xml(url, max_age):
126 f = urlopen(url, max_age)
127 doc = etree.parse(f, etree.XMLParser(encoding="utf-8", recover=True))
128 f.close()
129 return doc
130
131 def grab_json(url, max_age, skip_assignment=False, skip_function=False):
132 f = urlopen(url, max_age)
133 if skip_assignment:
134 text = f.read()
135 pos = text.find("=")
136 doc = json.loads(text[pos+1:])
137 elif skip_function:
138 text = f.read()
139 pos = text.find("(")
140 rpos = text.rfind(")")
141 doc = json.loads(text[pos+1:rpos])
142 else:
143 doc = json.load(f)
144 f.close()
145 return doc
146
147 def exec_subprocess(cmd):
148 try:
149 p = subprocess.Popen(cmd)
150 ret = p.wait()
151 if ret != 0:
152 print >>sys.stderr, cmd[0], "exited with error code:", ret
153 return False
154 else:
155 return True
156 except OSError, e:
157 print >>sys.stderr, "Failed to run", cmd[0], e
158 except KeyboardInterrupt:
159 print "Cancelled", cmd
160 try:
161 p.terminate()
162 p.wait()
163 except KeyboardInterrupt:
164 p.send_signal(signal.SIGKILL)
165 p.wait()
166 return False
167
168
169 def check_command_exists(cmd):
170 try:
171 subprocess.check_output(cmd)
172 return True
173 except Exception:
174 return False
175
176 def generate_remux_cmd(infile, outfile):
177 if check_command_exists(["avconv", "--help"]):
178 return [
179 "avconv",
180 "-i", infile,
181 "-bsf:a", "aac_adtstoasc",
182 "-acodec", "copy",
183 "-vcodec", "copy",
184 outfile,
185 ]
186
187 if check_command_exists(["ffmpeg", "--help"]):
188 return [
189 "ffmpeg",
190 "-i", infile,
191 "-bsf:a", "aac_adtstoasc",
192 "-acodec", "copy",
193 "-vcodec", "copy",
194 outfile,
195 ]
196
197 raise Exception("You must install ffmpeg or libav-tools")
198
199 def remux(infile, outfile):
200 print "Converting %s to mp4" % infile
201 cmd = generate_remux_cmd(infile, outfile)
202 if not exec_subprocess(cmd):
203 # failed, error has already been logged
204 return False
205 try:
206 flv_size = os.stat(infile).st_size
207 mp4_size = os.stat(outfile).st_size
208 if abs(flv_size - mp4_size) < 0.1 * flv_size:
209 os.unlink(infile)
210 return True
211 else:
212 print >>sys.stderr, "The size of", outfile, "is suspicious, did avconv fail?"
213 return False
214 except Exception, e:
215 print >>sys.stderr, "Conversion failed", e
216 return False
217
218 def convert_to_mp4(filename):
219 with open(filename) as f:
220 fourcc = f.read(4)
221 basename, ext = os.path.splitext(filename)
222
223 if ext == ".mp4" and fourcc == "FLV\x01":
224 os.rename(filename, basename + ".flv")
225 ext = ".flv"
226 filename = basename + ext
227
228 if ext in (".flv", ".ts"):
229 filename_mp4 = basename + ".mp4"
230 return remux(filename, filename_mp4)
231
232 return ext == ".mp4"
233
234
235 def download_hds(filename, video_url, pvswf=None):
236 filename = sanify_filename(filename)
237 video_url = video_url.replace("http://", "hds://")
238 print "Downloading: %s" % filename
239 cmd = [
240 "livestreamer",
241 "-o", filename,
242 "%s pvswf=%s" % (video_url, pvswf),
243 "best",
244 ]
245 if exec_subprocess(cmd):
246 return convert_to_mp4(filename)
247 else:
248 return False
249
250 def download_hls(filename, video_url):
251 filename = sanify_filename(filename)
252 video_url = video_url.replace("http://", "hlsvariant://")
253 print "Downloading: %s" % filename
254 cmd = [
255 "livestreamer",
256 "-o", filename,
257 video_url,
258 "best",
259 ]
260 if exec_subprocess(cmd):
261 return convert_to_mp4(filename)
262 else:
263 return False
264
265 def download_rtmp(filename, vbase, vpath, hash_url=None):
266 filename = sanify_filename(filename)
267 print "Downloading: %s" % filename
268 if vpath.endswith(".flv"):
269 vpath = vpath[:-4]
270 cmd = [
271 "rtmpdump",
272 "-o", filename,
273 "-r", vbase,
274 "-y", vpath,
275 ]
276 if hash_url is not None:
277 cmd += ["--swfVfy", hash_url]
278 if exec_subprocess(cmd):
279 return convert_to_mp4(filename)
280 else:
281 return False
282
283 def natural_sort(l, key=None):
284 ignore_list = ["a", "the"]
285 def key_func(k):
286 if key is not None:
287 k = key(k)
288 k = k.lower()
289 newk = []
290 for c in re.split("([0-9]+)", k):
291 c = c.strip()
292 if c.isdigit():
293 newk.append(int(c))
294 else:
295 for subc in c.split():
296 if subc not in ignore_list:
297 newk.append(subc)
298 return newk
299
300 return sorted(l, key=key_func)
301
302 def append_to_qs(url, params):
303 r = list(urlparse.urlsplit(url))
304 qs = urlparse.parse_qs(r[3])
305 for k, v in params.iteritems():
306 if v is not None:
307 qs[k] = v
308 elif qs.has_key(k):
309 del qs[k]
310 r[3] = urllib.urlencode(qs, True)
311 url = urlparse.urlunsplit(r)
312 return url
313