]> code.delx.au - webdl/blob - common.py
Channel 9 & 10 (Brightcove API) support
[webdl] / common.py
1 # vim:ts=4:sts=4:sw=4:noet
2
3 from lxml import etree, html
4 import cookielib
5 import json
6 try:
7 import hashlib
8 except ImportError:
9 import md5 as hashlib
10 import os
11 import re
12 import shutil
13 import signal
14 import subprocess
15 import sys
16 import tempfile
17 import time
18 import urllib
19 import urllib2
20 import urlparse
21
22
23 try:
24 import autosocks
25 autosocks.try_autosocks()
26 except ImportError:
27 pass
28
29 CACHE_DIR = os.path.expanduser("~/.cache/webdl")
30 USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:21.0) Gecko/20100101 Firefox/21.0"
31
32 class Node(object):
33 def __init__(self, title, parent=None):
34 self.title = title
35 if parent:
36 parent.children.append(self)
37 self.parent = parent
38 self.children = []
39 self.can_download = False
40
41 def get_children(self):
42 if not self.children:
43 self.fill_children()
44 return self.children
45
46 def fill_children(self):
47 pass
48
49 def download(self):
50 raise NotImplemented
51
52
53 def load_root_node():
54 root_node = Node("Root")
55
56 import iview
57 iview.fill_nodes(root_node)
58
59 import sbs
60 sbs.fill_nodes(root_node)
61
62 import plus7
63 plus7.fill_nodes(root_node)
64
65 import brightcove
66 brightcove.fill_nodes(root_node)
67
68 return root_node
69
70 valid_chars = frozenset("-_.()!@#%^ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
71 def sanify_filename(filename):
72 filename = filename.encode("ascii", "ignore")
73 filename = "".join(c for c in filename if c in valid_chars)
74 return filename
75
76 cookiejar = cookielib.CookieJar()
77 urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
78 def _urlopen(url, referrer=None):
79 req = urllib2.Request(url)
80 req.add_header("User-Agent", USER_AGENT)
81 if referrer:
82 req.add_header("Referer", referrer)
83 return urlopener.open(req)
84
85 def urlopen(url, max_age):
86 ### print url
87 if not os.path.isdir(CACHE_DIR):
88 os.makedirs(CACHE_DIR)
89
90 if max_age <= 0:
91 return _urlopen(url)
92
93 filename = hashlib.md5(url).hexdigest()
94 filename = os.path.join(CACHE_DIR, filename)
95 if os.path.exists(filename):
96 file_age = int(time.time()) - os.path.getmtime(filename)
97 if file_age < max_age:
98 return open(filename)
99
100 src = _urlopen(url)
101 dst = open(filename, "w")
102 try:
103 shutil.copyfileobj(src, dst)
104 except Exception, e:
105 try:
106 os.unlink(filename)
107 except OSError:
108 pass
109 raise e
110 src.close()
111 dst.close()
112
113 return open(filename)
114
115 def grab_html(url, max_age):
116 f = urlopen(url, max_age)
117 doc = html.parse(f, html.HTMLParser(encoding="utf-8", recover=True))
118 f.close()
119 return doc
120
121 def grab_xml(url, max_age):
122 f = urlopen(url, max_age)
123 doc = etree.parse(f, etree.XMLParser(encoding="utf-8", recover=True))
124 f.close()
125 return doc
126
127 def grab_json(url, max_age, skip_assignment=False, skip_function=False):
128 f = urlopen(url, max_age)
129 if skip_assignment:
130 text = f.read()
131 pos = text.find("=")
132 doc = json.loads(text[pos+1:])
133 elif skip_function:
134 text = f.read()
135 pos = text.find("(")
136 rpos = text.rfind(")")
137 doc = json.loads(text[pos+1:rpos])
138 else:
139 doc = json.load(f)
140 f.close()
141 return doc
142
143 def exec_subprocess(cmd):
144 try:
145 p = subprocess.Popen(cmd)
146 ret = p.wait()
147 if ret != 0:
148 print >>sys.stderr, cmd[0], "exited with error code:", ret
149 return False
150 else:
151 return True
152 except OSError, e:
153 print >>sys.stderr, "Failed to run", cmd[0], e
154 except KeyboardInterrupt:
155 print "Cancelled", cmd
156 try:
157 p.terminate()
158 p.wait()
159 except KeyboardInterrupt:
160 p.send_signal(signal.SIGKILL)
161 p.wait()
162 return False
163
164
165 def convert_flv_mp4(orig_filename):
166 basename = os.path.splitext(orig_filename)[0]
167 flv_filename = basename + ".flv"
168 mp4_filename = basename + ".mp4"
169 if orig_filename != flv_filename:
170 os.rename(orig_filename, flv_filename)
171 print "Converting %s to mp4" % flv_filename
172 cmd = [
173 "ffmpeg",
174 "-i", flv_filename,
175 "-acodec", "copy",
176 "-vcodec", "copy",
177 mp4_filename,
178 ]
179 if not exec_subprocess(cmd):
180 return
181 try:
182 flv_size = os.stat(flv_filename).st_size
183 mp4_size = os.stat(mp4_filename).st_size
184 if abs(flv_size - mp4_size) < 0.05 * flv_size:
185 os.unlink(flv_filename)
186 else:
187 print >>sys.stderr, "The size of", mp4_filename, "is suspicious, did ffmpeg fail?"
188 except Exception, e:
189 print "Conversion failed", e
190
191 def convert_filename(filename):
192 if os.path.splitext(filename.lower())[1] in (".mp4", ".flv"):
193 f = open(filename)
194 fourcc = f.read(4)
195 f.close()
196 if fourcc == "FLV\x01":
197 convert_flv_mp4(filename)
198
199 def download_rtmp(filename, vbase, vpath, hash_url=None):
200 filename = sanify_filename(filename)
201 print "Downloading: %s" % filename
202 if vpath.endswith(".flv"):
203 vpath = vpath[:-4]
204 cmd = [
205 "rtmpdump",
206 "-o", filename,
207 "-r", vbase,
208 "-y", vpath,
209 ]
210 if hash_url is not None:
211 cmd += ["--swfVfy", hash_url]
212 if exec_subprocess(cmd):
213 convert_filename(filename)
214 return True
215 else:
216 return False
217
218 def download_urllib(filename, url, referrer=None):
219 filename = sanify_filename(filename)
220 print "Downloading: %s" % filename
221 try:
222 src = _urlopen(url, referrer)
223 dst = open(filename, "w")
224 while True:
225 buf = src.read(1024*1024)
226 if not buf:
227 break
228 dst.write(buf)
229 sys.stdout.write(".")
230 sys.stdout.flush()
231 print
232 convert_filename(filename)
233 return True
234 except KeyboardInterrupt:
235 print "\nCancelled", url
236 finally:
237 try:
238 src.close()
239 except:
240 pass
241 try:
242 dst.close()
243 except:
244 pass
245 return False
246
247 def natural_sort(l, key=None):
248 ignore_list = ["a", "the"]
249 def key_func(k):
250 if key is not None:
251 k = key(k)
252 k = k.lower()
253 newk = []
254 for c in re.split("([0-9]+)", k):
255 c = c.strip()
256 if c.isdigit():
257 newk.append(int(c))
258 else:
259 for subc in c.split():
260 if subc not in ignore_list:
261 newk.append(subc)
262 return newk
263
264 return sorted(l, key=key_func)
265
266 def append_to_qs(url, params):
267 r = list(urlparse.urlsplit(url))
268 qs = urlparse.parse_qs(r[3])
269 for k, v in params.iteritems():
270 if v is not None:
271 qs[k] = v
272 elif qs.has_key(k):
273 del qs[k]
274 r[3] = urllib.urlencode(qs, True)
275 url = urlparse.urlunsplit(r)
276 return url
277