]> code.delx.au - webdl/blob - common.py
Added license
[webdl] / common.py
1 from lxml import etree, html
2 import cookielib
3 import json
4 try:
5 import hashlib
6 except ImportError:
7 import md5 as hashlib
8 import os
9 import re
10 import shutil
11 import signal
12 import subprocess
13 import sys
14 import tempfile
15 import time
16 import urllib
17 import urllib2
18 import urlparse
19
20
21 try:
22 import autosocks
23 autosocks.try_autosocks()
24 except ImportError:
25 pass
26
27 CACHE_DIR = os.path.join(os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")), "webdl")
28 USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:21.0) Gecko/20100101 Firefox/21.0"
29
30 class Node(object):
31 def __init__(self, title, parent=None):
32 self.title = title
33 if parent:
34 parent.children.append(self)
35 self.parent = parent
36 self.children = []
37 self.can_download = False
38
39 def get_children(self):
40 if not self.children:
41 self.fill_children()
42 return self.children
43
44 def fill_children(self):
45 pass
46
47 def download(self):
48 raise NotImplemented
49
50
51 def load_root_node():
52 root_node = Node("Root")
53
54 import iview
55 iview.fill_nodes(root_node)
56
57 import sbs
58 sbs.fill_nodes(root_node)
59
60 import brightcove
61 brightcove.fill_nodes(root_node)
62
63 return root_node
64
65 valid_chars = frozenset("-_.()!@#%^ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
66 def sanify_filename(filename):
67 filename = filename.encode("ascii", "ignore")
68 filename = "".join(c for c in filename if c in valid_chars)
69 return filename
70
71 cookiejar = cookielib.CookieJar()
72 urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
73 def _urlopen(url, referrer=None):
74 req = urllib2.Request(url)
75 req.add_header("User-Agent", USER_AGENT)
76 if referrer:
77 req.add_header("Referer", referrer)
78 return urlopener.open(req)
79
80 def urlopen(url, max_age):
81 ### print url
82 if not os.path.isdir(CACHE_DIR):
83 os.makedirs(CACHE_DIR)
84
85 if max_age <= 0:
86 return _urlopen(url)
87
88 filename = hashlib.md5(url).hexdigest()
89 filename = os.path.join(CACHE_DIR, filename)
90 if os.path.exists(filename):
91 file_age = int(time.time()) - os.path.getmtime(filename)
92 if file_age < max_age:
93 return open(filename)
94
95 src = _urlopen(url)
96 dst = open(filename, "wb")
97 try:
98 shutil.copyfileobj(src, dst)
99 except Exception, e:
100 try:
101 os.unlink(filename)
102 except OSError:
103 pass
104 raise e
105 src.close()
106 dst.close()
107
108 return open(filename)
109
110 def grab_text(url, max_age):
111 f = urlopen(url, max_age)
112 text = f.read().decode("utf-8")
113 f.close()
114 return text
115
116 def grab_html(url, max_age):
117 f = urlopen(url, max_age)
118 doc = html.parse(f, html.HTMLParser(encoding="utf-8", recover=True))
119 f.close()
120 return doc
121
122 def grab_xml(url, max_age):
123 f = urlopen(url, max_age)
124 doc = etree.parse(f, etree.XMLParser(encoding="utf-8", recover=True))
125 f.close()
126 return doc
127
128 def grab_json(url, max_age, skip_assignment=False, skip_function=False):
129 f = urlopen(url, max_age)
130 if skip_assignment:
131 text = f.read()
132 pos = text.find("=")
133 doc = json.loads(text[pos+1:])
134 elif skip_function:
135 text = f.read()
136 pos = text.find("(")
137 rpos = text.rfind(")")
138 doc = json.loads(text[pos+1:rpos])
139 else:
140 doc = json.load(f)
141 f.close()
142 return doc
143
144 def exec_subprocess(cmd):
145 try:
146 p = subprocess.Popen(cmd)
147 ret = p.wait()
148 if ret != 0:
149 print >>sys.stderr, cmd[0], "exited with error code:", ret
150 return False
151 else:
152 return True
153 except OSError, e:
154 print >>sys.stderr, "Failed to run", cmd[0], e
155 except KeyboardInterrupt:
156 print "Cancelled", cmd
157 try:
158 p.terminate()
159 p.wait()
160 except KeyboardInterrupt:
161 p.send_signal(signal.SIGKILL)
162 p.wait()
163 return False
164
165
166 def check_command_exists(cmd):
167 try:
168 subprocess.check_output(cmd)
169 return True
170 except Exception:
171 return False
172
173 def generate_remux_cmd(infile, outfile):
174 if check_command_exists(["avconv", "--help"]):
175 return [
176 "avconv",
177 "-i", infile,
178 "-bsf:a", "aac_adtstoasc",
179 "-acodec", "copy",
180 "-vcodec", "copy",
181 outfile,
182 ]
183
184 if check_command_exists(["ffmpeg", "--help"]):
185 return [
186 "ffmpeg",
187 "-i", infile,
188 "-bsf:a", "aac_adtstoasc",
189 "-acodec", "copy",
190 "-vcodec", "copy",
191 outfile,
192 ]
193
194 raise Exception("You must install ffmpeg or libav-tools")
195
196 def remux(infile, outfile):
197 print "Converting %s to mp4" % infile
198 cmd = generate_remux_cmd(infile, outfile)
199 if not exec_subprocess(cmd):
200 # failed, error has already been logged
201 return False
202 try:
203 flv_size = os.stat(infile).st_size
204 mp4_size = os.stat(outfile).st_size
205 if abs(flv_size - mp4_size) < 0.1 * flv_size:
206 os.unlink(infile)
207 return True
208 else:
209 print >>sys.stderr, "The size of", outfile, "is suspicious, did avconv fail?"
210 return False
211 except Exception, e:
212 print >>sys.stderr, "Conversion failed", e
213 return False
214
215 def convert_to_mp4(filename):
216 with open(filename) as f:
217 fourcc = f.read(4)
218 basename, ext = os.path.splitext(filename)
219
220 if ext == ".mp4" and fourcc == "FLV\x01":
221 os.rename(filename, basename + ".flv")
222 ext = ".flv"
223 filename = basename + ext
224
225 if ext in (".flv", ".ts"):
226 filename_mp4 = basename + ".mp4"
227 return remux(filename, filename_mp4)
228
229 return ext == ".mp4"
230
231
232 def download_hds(filename, video_url, pvswf=None):
233 filename = sanify_filename(filename)
234 print "Downloading: %s" % filename
235
236 video_url = video_url.replace("http://", "hds://")
237 if pvswf:
238 param = "%s pvswf=%s" % (video_url, pvswf)
239 else:
240 param = video_url
241
242 cmd = [
243 "livestreamer",
244 "-o", filename,
245 param,
246 "best",
247 ]
248 if exec_subprocess(cmd):
249 return convert_to_mp4(filename)
250 else:
251 return False
252
253 def download_hls(filename, video_url):
254 filename = sanify_filename(filename)
255 video_url = video_url.replace("http://", "hlsvariant://")
256 print "Downloading: %s" % filename
257 cmd = [
258 "livestreamer",
259 "-o", filename,
260 video_url,
261 "best",
262 ]
263 if exec_subprocess(cmd):
264 return convert_to_mp4(filename)
265 else:
266 return False
267
268 def natural_sort(l, key=None):
269 ignore_list = ["a", "the"]
270 def key_func(k):
271 if key is not None:
272 k = key(k)
273 k = k.lower()
274 newk = []
275 for c in re.split("([0-9]+)", k):
276 c = c.strip()
277 if c.isdigit():
278 newk.append(int(c))
279 else:
280 for subc in c.split():
281 if subc not in ignore_list:
282 newk.append(subc)
283 return newk
284
285 return sorted(l, key=key_func)
286
287 def append_to_qs(url, params):
288 r = list(urlparse.urlsplit(url))
289 qs = urlparse.parse_qs(r[3])
290 for k, v in params.iteritems():
291 if v is not None:
292 qs[k] = v
293 elif qs.has_key(k):
294 del qs[k]
295 r[3] = urllib.urlencode(qs, True)
296 url = urlparse.urlunsplit(r)
297 return url
298