]> code.delx.au - webdl/blob - common.py
Remove print statement
[webdl] / common.py
1 # vim:ts=4:sts=4:sw=4:noet
2
3 from lxml import etree, html
4 import cookielib
5 import json
6 try:
7 import hashlib
8 except ImportError:
9 import md5 as hashlib
10 import os
11 import re
12 import shutil
13 import signal
14 import subprocess
15 import sys
16 import tempfile
17 import time
18 import urllib
19 import urllib2
20 import urlparse
21
22
23 import autosocks
24 autosocks.try_autosocks()
25
26 CACHE_DIR = os.path.expanduser("~/.cache/webdl")
27 USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"
28
29 class Node(object):
30 def __init__(self, title, parent=None):
31 self.title = title
32 if parent:
33 parent.children.append(self)
34 self.parent = parent
35 self.children = []
36 self.can_download = False
37 self.sort_children = False
38
39 def get_children(self):
40 if not self.children:
41 self.fill_children()
42 return self.children
43
44 def fill_children(self):
45 pass
46
47 def download(self):
48 raise NotImplemented
49
50
51 def load_root_node():
52 root_node = Node("Root")
53
54 import iview
55 iview.fill_nodes(root_node)
56
57 import sbs
58 sbs.fill_nodes(root_node)
59
60 import plus7
61 plus7.fill_nodes(root_node)
62
63 return root_node
64
65 valid_chars = frozenset("-_.()!@#%^ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
66 def sanify_filename(filename):
67 filename = filename.encode("ascii", "ignore")
68 filename = "".join(c for c in filename if c in valid_chars)
69 return filename
70
71 cookiejar = cookielib.CookieJar()
72 urlopener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
73 def _urlopen(url, referrer=None):
74 req = urllib2.Request(url)
75 req.add_header("User-Agent", USER_AGENT)
76 if referrer:
77 req.add_header("Referer", referrer)
78 return urlopener.open(req)
79
80 def urlopen(url, max_age):
81 ### print url
82 if not os.path.isdir(CACHE_DIR):
83 os.makedirs(CACHE_DIR)
84
85 if max_age <= 0:
86 return _urlopen(url)
87
88 filename = hashlib.md5(url).hexdigest()
89 filename = os.path.join(CACHE_DIR, filename)
90 if os.path.exists(filename):
91 file_age = int(time.time()) - os.path.getmtime(filename)
92 if file_age < max_age:
93 return open(filename)
94
95 src = _urlopen(url)
96 dst = open(filename, "w")
97 try:
98 shutil.copyfileobj(src, dst)
99 except Exception, e:
100 try:
101 os.unlink(filename)
102 except OSError:
103 pass
104 raise e
105 src.close()
106 dst.close()
107
108 return open(filename)
109
110 def grab_html(url, max_age):
111 f = urlopen(url, max_age)
112 doc = html.parse(f, html.HTMLParser(encoding="utf-8", recover=True))
113 f.close()
114 return doc
115
116 def grab_xml(url, max_age):
117 f = urlopen(url, max_age)
118 doc = etree.parse(f, etree.XMLParser(encoding="utf-8", recover=True))
119 f.close()
120 return doc
121
122 def grab_json(url, max_age, skip_assignment=False):
123 f = urlopen(url, max_age)
124 if skip_assignment:
125 text = f.read()
126 pos = text.find("=")
127 doc = json.loads(text[pos+1:])
128 else:
129 doc = json.load(f)
130 f.close()
131 return doc
132
133 def exec_subprocess(cmd):
134 try:
135 p = subprocess.Popen(cmd)
136 ret = p.wait()
137 if ret != 0:
138 print >>sys.stderr, cmd[0], "exited with error code:", ret
139 return False
140 else:
141 return True
142 except OSError, e:
143 print >>sys.stderr, "Failed to run", cmd[0], e
144 except KeyboardInterrupt:
145 print "Cancelled", cmd
146 try:
147 p.terminate()
148 p.wait()
149 except KeyboardInterrupt:
150 p.send_signal(signal.SIGKILL)
151 p.wait()
152 return False
153
154
155 def convert_flv_mp4(orig_filename):
156 basename = os.path.splitext(orig_filename)[0]
157 flv_filename = basename + ".flv"
158 mp4_filename = basename + ".mp4"
159 if orig_filename != flv_filename:
160 os.rename(orig_filename, flv_filename)
161 print "Converting %s to mp4" % flv_filename
162 cmd = [
163 "ffmpeg",
164 "-i", flv_filename,
165 "-acodec", "copy",
166 "-vcodec", "copy",
167 mp4_filename,
168 ]
169 if not exec_subprocess(cmd):
170 return
171 try:
172 flv_size = os.stat(flv_filename).st_size
173 mp4_size = os.stat(mp4_filename).st_size
174 if abs(flv_size - mp4_size) < 0.05 * flv_size:
175 os.unlink(flv_filename)
176 else:
177 print >>sys.stderr, "The size of", mp4_filename, "is suspicious, did ffmpeg fail?"
178 except Exception, e:
179 print "Conversion failed", e
180
181 def convert_filename(filename):
182 if os.path.splitext(filename.lower())[1] in (".mp4", ".flv"):
183 f = open(filename)
184 fourcc = f.read(4)
185 f.close()
186 if fourcc == "FLV\x01":
187 convert_flv_mp4(filename)
188
189 def download_rtmp(filename, vbase, vpath, hash_url=None):
190 filename = sanify_filename(filename)
191 print "Downloading: %s" % filename
192 if vpath.endswith(".flv"):
193 vpath = vpath[:-4]
194 cmd = [
195 "rtmpdump",
196 "-o", filename,
197 "-r", vbase,
198 "-y", vpath,
199 ]
200 if hash_url is not None:
201 cmd += ["--swfVfy", hash_url]
202 if exec_subprocess(cmd):
203 convert_filename(filename)
204 return True
205 else:
206 return False
207
208 def download_urllib(filename, url, referrer=None):
209 filename = sanify_filename(filename)
210 print "Downloading: %s" % filename
211 try:
212 src = _urlopen(url, referrer)
213 dst = open(filename, "w")
214 while True:
215 buf = src.read(1024*1024)
216 if not buf:
217 break
218 dst.write(buf)
219 sys.stdout.write(".")
220 sys.stdout.flush()
221 convert_filename(filename)
222 return True
223 except KeyboardInterrupt:
224 print "\nCancelled", url
225 finally:
226 try:
227 src.close()
228 except:
229 pass
230 try:
231 dst.close()
232 except:
233 pass
234 return False
235
236 def natural_sort(l, key=None):
237 ignore_list = ["a", "the"]
238 def key_func(k):
239 if key is not None:
240 k = key(k)
241 k = k.lower()
242 newk = []
243 for c in re.split("([0-9]+)", k):
244 c = c.strip()
245 if c.isdigit():
246 newk.append(int(c))
247 else:
248 for subc in c.split():
249 if subc not in ignore_list:
250 newk.append(subc)
251 return newk
252
253 return sorted(l, key=key_func)
254
255 def append_to_qs(url, params):
256 r = list(urlparse.urlsplit(url))
257 qs = urlparse.parse_qs(r[3])
258 for k, v in params.iteritems():
259 if v is not None:
260 qs[k] = v
261 elif qs.has_key(k):
262 del qs[k]
263 r[3] = urllib.urlencode(qs, True)
264 url = urlparse.urlunsplit(r)
265 return url
266