]>
code.delx.au - webdl/blob - common.py
1 # vim:ts=4:sts=4:sw=4:noet
3 from lxml
import etree
, html
25 autosocks
.try_autosocks()
29 CACHE_DIR
= os
.path
.expanduser("~/.cache/webdl")
30 USER_AGENT
= "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:21.0) Gecko/20100101 Firefox/21.0"
33 def __init__(self
, title
, parent
=None):
36 parent
.children
.append(self
)
39 self
.can_download
= False
41 def get_children(self
):
46 def fill_children(self
):
54 root_node
= Node("Root")
57 iview
.fill_nodes(root_node
)
60 sbs
.fill_nodes(root_node
)
63 plus7
.fill_nodes(root_node
)
66 brightcove
.fill_nodes(root_node
)
70 valid_chars
= frozenset("-_.()!@#%^ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
71 def sanify_filename(filename
):
72 filename
= filename
.encode("ascii", "ignore")
73 filename
= "".join(c
for c
in filename
if c
in valid_chars
)
76 cookiejar
= cookielib
.CookieJar()
77 urlopener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(cookiejar
))
78 def _urlopen(url
, referrer
=None):
79 req
= urllib2
.Request(url
)
80 req
.add_header("User-Agent", USER_AGENT
)
82 req
.add_header("Referer", referrer
)
83 return urlopener
.open(req
)
85 def urlopen(url
, max_age
):
87 if not os
.path
.isdir(CACHE_DIR
):
88 os
.makedirs(CACHE_DIR
)
93 filename
= hashlib
.md5(url
).hexdigest()
94 filename
= os
.path
.join(CACHE_DIR
, filename
)
95 if os
.path
.exists(filename
):
96 file_age
= int(time
.time()) - os
.path
.getmtime(filename
)
97 if file_age
< max_age
:
101 dst
= open(filename
, "w")
103 shutil
.copyfileobj(src
, dst
)
113 return open(filename
)
115 def grab_text(url
, max_age
):
116 f
= urlopen(url
, max_age
)
117 text
= f
.read().decode("utf-8")
121 def grab_html(url
, max_age
):
122 f
= urlopen(url
, max_age
)
123 doc
= html
.parse(f
, html
.HTMLParser(encoding
="utf-8", recover
=True))
127 def grab_xml(url
, max_age
):
128 f
= urlopen(url
, max_age
)
129 doc
= etree
.parse(f
, etree
.XMLParser(encoding
="utf-8", recover
=True))
133 def grab_json(url
, max_age
, skip_assignment
=False, skip_function
=False):
134 f
= urlopen(url
, max_age
)
138 doc
= json
.loads(text
[pos
+1:])
142 rpos
= text
.rfind(")")
143 doc
= json
.loads(text
[pos
+1:rpos
])
149 def exec_subprocess(cmd
):
151 p
= subprocess
.Popen(cmd
)
154 print >>sys
.stderr
, cmd
[0], "exited with error code:", ret
159 print >>sys
.stderr
, "Failed to run", cmd
[0], e
160 except KeyboardInterrupt:
161 print "Cancelled", cmd
165 except KeyboardInterrupt:
166 p
.send_signal(signal
.SIGKILL
)
171 def convert_flv_mp4(orig_filename
):
172 basename
= os
.path
.splitext(orig_filename
)[0]
173 flv_filename
= basename
+ ".flv"
174 mp4_filename
= basename
+ ".mp4"
175 if orig_filename
!= flv_filename
:
176 os
.rename(orig_filename
, flv_filename
)
177 print "Converting %s to mp4" % flv_filename
185 if not exec_subprocess(cmd
):
188 flv_size
= os
.stat(flv_filename
).st_size
189 mp4_size
= os
.stat(mp4_filename
).st_size
190 if abs(flv_size
- mp4_size
) < 0.05 * flv_size
:
191 os
.unlink(flv_filename
)
193 print >>sys
.stderr
, "The size of", mp4_filename
, "is suspicious, did ffmpeg fail?"
195 print "Conversion failed", e
197 def convert_filename(filename
):
198 if os
.path
.splitext(filename
.lower())[1] in (".mp4", ".flv"):
202 if fourcc
== "FLV\x01":
203 convert_flv_mp4(filename
)
205 def download_rtmp(filename
, vbase
, vpath
, hash_url
=None):
206 filename
= sanify_filename(filename
)
207 print "Downloading: %s" % filename
208 if vpath
.endswith(".flv"):
216 if hash_url
is not None:
217 cmd
+= ["--swfVfy", hash_url
]
218 if exec_subprocess(cmd
):
219 convert_filename(filename
)
224 def download_urllib(filename
, url
, referrer
=None):
225 filename
= sanify_filename(filename
)
226 print "Downloading: %s" % filename
228 src
= _urlopen(url
, referrer
)
229 dst
= open(filename
, "w")
231 buf
= src
.read(1024*1024)
235 sys
.stdout
.write(".")
238 convert_filename(filename
)
240 except KeyboardInterrupt:
241 print "\nCancelled", url
253 def natural_sort(l
, key
=None):
254 ignore_list
= ["a", "the"]
260 for c
in re
.split("([0-9]+)", k
):
265 for subc
in c
.split():
266 if subc
not in ignore_list
:
270 return sorted(l
, key
=key_func
)
272 def append_to_qs(url
, params
):
273 r
= list(urlparse
.urlsplit(url
))
274 qs
= urlparse
.parse_qs(r
[3])
275 for k
, v
in params
.iteritems():
280 r
[3] = urllib
.urlencode(qs
, True)
281 url
= urlparse
.urlunsplit(r
)