]> code.delx.au - webdl/blob - common.py
iView fixed for metered ISPs like Optus
[webdl] / common.py
1 # vim:ts=4:sts=4:sw=4:noet
2
3 from lxml import etree, html
4 import cookielib
5 import json
6 try:
7 import hashlib
8 except ImportError:
9 import md5 as hashlib
10 import os
11 import re
12 import shutil
13 import signal
14 import subprocess
15 import sys
16 import tempfile
17 import time
18 import urllib
19 import urllib2
20 import urlparse
21
22
23 try:
24 import autosocks
25 autosocks.try_autosocks()
26 except ImportError:
27 pass
28
29 CACHE_DIR = os.path.expanduser("~/.cache/webdl")
30 USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"
31
32 class Node(object):
33 def __init__(self, title, parent=None):
34 self.title = title
35 if parent:
36 parent.children.append(self)
37 self.parent = parent
38 self.children = []
39 self.can_download = False
40
41 def get_children(self):
42 if not self.children:
43 self.fill_children()
44 return self.children
45
46 def fill_children(self):
47 pass
48
49 def download(self):
50 raise NotImplemented
51
52
53 def load_root_node():
54 root_node = Node("Root")
55
56 import iview
57 iview.fill_nodes(root_node)
58
59 import sbs
60 sbs.fill_nodes(root_node)
61
62 import plus7
63 plus7.fill_nodes(root_node)
64
65 return root_node
66
67 valid_chars = frozenset("-_.()!@#%^ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
68 def sanify_filename(filename):
69 filename = filename.encode("ascii", "ignore")
70 filename = "".join(c for c in filename if c in valid_chars)
71 return filename
72
73 cookiejar = cookielib.CookieJar()
74 urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
75 def _urlopen(url, referrer=None):
76 req = urllib2.Request(url)
77 req.add_header("User-Agent", USER_AGENT)
78 if referrer:
79 req.add_header("Referer", referrer)
80 return urlopener.open(req)
81
82 def urlopen(url, max_age):
83 ### print url
84 if not os.path.isdir(CACHE_DIR):
85 os.makedirs(CACHE_DIR)
86
87 if max_age <= 0:
88 return _urlopen(url)
89
90 filename = hashlib.md5(url).hexdigest()
91 filename = os.path.join(CACHE_DIR, filename)
92 if os.path.exists(filename):
93 file_age = int(time.time()) - os.path.getmtime(filename)
94 if file_age < max_age:
95 return open(filename)
96
97 src = _urlopen(url)
98 dst = open(filename, "w")
99 try:
100 shutil.copyfileobj(src, dst)
101 except Exception, e:
102 try:
103 os.unlink(filename)
104 except OSError:
105 pass
106 raise e
107 src.close()
108 dst.close()
109
110 return open(filename)
111
112 def grab_html(url, max_age):
113 f = urlopen(url, max_age)
114 doc = html.parse(f, html.HTMLParser(encoding="utf-8", recover=True))
115 f.close()
116 return doc
117
118 def grab_xml(url, max_age):
119 f = urlopen(url, max_age)
120 doc = etree.parse(f, etree.XMLParser(encoding="utf-8", recover=True))
121 f.close()
122 return doc
123
124 def grab_json(url, max_age, skip_assignment=False, skip_function=False):
125 f = urlopen(url, max_age)
126 if skip_assignment:
127 text = f.read()
128 pos = text.find("=")
129 doc = json.loads(text[pos+1:])
130 elif skip_function:
131 text = f.read()
132 pos = text.find("(")
133 rpos = text.rfind(")")
134 doc = json.loads(text[pos+1:rpos])
135 else:
136 doc = json.load(f)
137 f.close()
138 return doc
139
140 def exec_subprocess(cmd):
141 try:
142 p = subprocess.Popen(cmd)
143 ret = p.wait()
144 if ret != 0:
145 print >>sys.stderr, cmd[0], "exited with error code:", ret
146 return False
147 else:
148 return True
149 except OSError, e:
150 print >>sys.stderr, "Failed to run", cmd[0], e
151 except KeyboardInterrupt:
152 print "Cancelled", cmd
153 try:
154 p.terminate()
155 p.wait()
156 except KeyboardInterrupt:
157 p.send_signal(signal.SIGKILL)
158 p.wait()
159 return False
160
161
162 def convert_flv_mp4(orig_filename):
163 basename = os.path.splitext(orig_filename)[0]
164 flv_filename = basename + ".flv"
165 mp4_filename = basename + ".mp4"
166 if orig_filename != flv_filename:
167 os.rename(orig_filename, flv_filename)
168 print "Converting %s to mp4" % flv_filename
169 cmd = [
170 "ffmpeg",
171 "-i", flv_filename,
172 "-acodec", "copy",
173 "-vcodec", "copy",
174 mp4_filename,
175 ]
176 if not exec_subprocess(cmd):
177 return
178 try:
179 flv_size = os.stat(flv_filename).st_size
180 mp4_size = os.stat(mp4_filename).st_size
181 if abs(flv_size - mp4_size) < 0.05 * flv_size:
182 os.unlink(flv_filename)
183 else:
184 print >>sys.stderr, "The size of", mp4_filename, "is suspicious, did ffmpeg fail?"
185 except Exception, e:
186 print "Conversion failed", e
187
188 def convert_filename(filename):
189 if os.path.splitext(filename.lower())[1] in (".mp4", ".flv"):
190 f = open(filename)
191 fourcc = f.read(4)
192 f.close()
193 if fourcc == "FLV\x01":
194 convert_flv_mp4(filename)
195
196 def download_rtmp(filename, vbase, vpath, hash_url=None):
197 filename = sanify_filename(filename)
198 print "Downloading: %s" % filename
199 if vpath.endswith(".flv"):
200 vpath = vpath[:-4]
201 cmd = [
202 "rtmpdump",
203 "-o", filename,
204 "-r", vbase,
205 "-y", vpath,
206 ]
207 if hash_url is not None:
208 cmd += ["--swfVfy", hash_url]
209 if exec_subprocess(cmd):
210 convert_filename(filename)
211 return True
212 else:
213 return False
214
215 def download_urllib(filename, url, referrer=None):
216 filename = sanify_filename(filename)
217 print "Downloading: %s" % filename
218 try:
219 src = _urlopen(url, referrer)
220 dst = open(filename, "w")
221 while True:
222 buf = src.read(1024*1024)
223 if not buf:
224 break
225 dst.write(buf)
226 sys.stdout.write(".")
227 sys.stdout.flush()
228 print
229 convert_filename(filename)
230 return True
231 except KeyboardInterrupt:
232 print "\nCancelled", url
233 finally:
234 try:
235 src.close()
236 except:
237 pass
238 try:
239 dst.close()
240 except:
241 pass
242 return False
243
244 def natural_sort(l, key=None):
245 ignore_list = ["a", "the"]
246 def key_func(k):
247 if key is not None:
248 k = key(k)
249 k = k.lower()
250 newk = []
251 for c in re.split("([0-9]+)", k):
252 c = c.strip()
253 if c.isdigit():
254 newk.append(int(c))
255 else:
256 for subc in c.split():
257 if subc not in ignore_list:
258 newk.append(subc)
259 return newk
260
261 return sorted(l, key=key_func)
262
263 def append_to_qs(url, params):
264 r = list(urlparse.urlsplit(url))
265 qs = urlparse.parse_qs(r[3])
266 for k, v in params.iteritems():
267 if v is not None:
268 qs[k] = v
269 elif qs.has_key(k):
270 del qs[k]
271 r[3] = urllib.urlencode(qs, True)
272 url = urlparse.urlunsplit(r)
273 return url
274