]> code.delx.au - webdl/blob - common.py
1ce0008aa4c1d2ca3808d8dc883fb5b696e94614
[webdl] / common.py
1 # vim:ts=4:sts=4:sw=4:noet
2
3 from lxml import etree, html
4 import json
5 try:
6 import hashlib
7 except ImportError:
8 import md5 as hashlib
9 import os
10 import shutil
11 import signal
12 import subprocess
13 import sys
14 import tempfile
15 import time
16 import urllib
17
18
19 import autosocks
20 autosocks.try_autosocks()
21
22 CACHE_DIR = os.path.expanduser("~/.cache/webdl")
23
24 class Node(object):
25 def __init__(self, title, parent=None):
26 self.title = title
27 if parent:
28 parent.children.append(self)
29 self.parent = parent
30 self.children = []
31 self.can_download = False
32
33 def get_children(self):
34 return self.children
35
36 def download(self):
37 raise NotImplemented
38
39
40 def load_root_node():
41 root_node = Node("Root")
42
43 import iview
44 iview.fill_nodes(root_node)
45
46 import sbs
47 sbs.fill_nodes(root_node)
48
49 return root_node
50
51 valid_chars = frozenset("-_.() abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
52 def sanify_filename(filename):
53 filename = filename.encode("ascii", "ignore")
54 filename = "".join(c for c in filename if c in valid_chars)
55 return filename
56
57
58 def urlopen(url, max_age):
59 ### print url
60 if not os.path.isdir(CACHE_DIR):
61 os.makedirs(CACHE_DIR)
62
63 if max_age <= 0:
64 return urllib.urlopen(url)
65
66 filename = hashlib.md5(url).hexdigest()
67 filename = os.path.join(CACHE_DIR, filename)
68 if os.path.exists(filename):
69 file_age = int(time.time()) - os.path.getmtime(filename)
70 if file_age < max_age:
71 return open(filename)
72
73 src = urllib.urlopen(url)
74 dst = open(filename, "w")
75 try:
76 shutil.copyfileobj(src, dst)
77 except Exception, e:
78 try:
79 os.unlink(filename)
80 except OSError:
81 pass
82 raise e
83 src.close()
84 dst.close()
85
86 return open(filename)
87
88 def grab_html(url, max_age):
89 f = urlopen(url, max_age)
90 doc = html.parse(f)
91 f.close()
92 return doc
93
94 def grab_xml(url, max_age):
95 f = urlopen(url, max_age)
96 doc = etree.parse(f)
97 f.close()
98 return doc
99
100 def grab_json(url, max_age):
101 f = urlopen(url, max_age)
102 doc = json.load(f)
103 f.close()
104 return doc
105
106 def exec_subprocess(cmd):
107 try:
108 p = subprocess.Popen(cmd)
109 ret = p.wait()
110 if ret != 0:
111 print >>sys.stderr, cmd[0], "exited with error code:", ret
112 return False
113 else:
114 return True
115 except OSError, e:
116 print >>sys.stderr, "Failed to run", cmd[0], e
117 except KeyboardInterrupt:
118 print "Cancelled", cmd
119 try:
120 p.terminate()
121 p.wait()
122 except KeyboardInterrupt:
123 p.send_signal(signal.SIGKILL)
124 p.wait()
125 return False
126
127
128 def convert_flv_mp4(orig_filename):
129 basename = os.path.splitext(orig_filename)[0]
130 flv_filename = basename + ".flv"
131 mp4_filename = basename + ".mp4"
132 os.rename(orig_filename, flv_filename)
133 print "Converting %s to mp4" % flv_filename
134 cmd = [
135 "ffmpeg",
136 "-i", flv_filename,
137 "-acodec", "copy",
138 "-vcodec", "copy",
139 mp4_filename,
140 ]
141 if not exec_subprocess(cmd):
142 return
143 try:
144 flv_size = os.stat(flv_filename).st_size
145 mp4_size = os.stat(mp4_filename).st_size
146 if abs(flv_size - mp4_size) < 0.05 * flv_size:
147 os.unlink(flv_filename)
148 else:
149 print >>sys.stderr, "The size of", mp4_filename, "is suspicious, did ffmpeg fail?"
150 except Exception, e:
151 print "Conversion failed", e
152
153 def convert_filename(filename):
154 if filename.lower().endswith(".mp4"):
155 f = open(filename)
156 fourcc = f.read(4)
157 f.close()
158 if fourcc == "FLV\x01":
159 convert_flv_mp4(filename)
160
161 def download_rtmp(filename, vbase, vpath, hash_url=None):
162 filename = sanify_filename(filename)
163 print "Downloading: %s" % filename
164 if vpath.endswith(".flv"):
165 vpath = vpath[:-4]
166 cmd = [
167 "rtmpdump",
168 "-o", filename,
169 "-r", vbase,
170 "-y", vpath,
171 ]
172 if hash_url is not None:
173 cmd += ["--swfVfy", hash_url]
174 success = exec_subprocess(cmd)
175 convert_filename(filename)
176 return success
177
178 def download_urllib(filename, url):
179 filename = sanify_filename(filename)
180 print "Downloading: %s" % filename
181 try:
182 src = urllib.urlopen(url)
183 dst = open(filename, "w")
184 while True:
185 buf = src.read(1024*1024)
186 if not buf:
187 break
188 dst.write(buf)
189 sys.stdout.write(".")
190 sys.stdout.flush()
191 convert_filename(filename)
192 return True
193 except KeyboardInterrupt:
194 print "\nCancelled", url
195 finally:
196 try:
197 src.close()
198 except:
199 pass
200 try:
201 dst.close()
202 except:
203 pass
204 return False
205