]>
code.delx.au - webdl/blob - common.py
1 from lxml
import etree
, html
23 autosocks
.try_autosocks()
27 CACHE_DIR
= os
.path
.expanduser("~/.cache/webdl")
28 USER_AGENT
= "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:21.0) Gecko/20100101 Firefox/21.0"
31 def __init__(self
, title
, parent
=None):
34 parent
.children
.append(self
)
37 self
.can_download
= False
39 def get_children(self
):
44 def fill_children(self
):
52 root_node
= Node("Root")
55 iview
.fill_nodes(root_node
)
58 sbs
.fill_nodes(root_node
)
61 plus7
.fill_nodes(root_node
)
64 brightcove
.fill_nodes(root_node
)
68 valid_chars
= frozenset("-_.()!@#%^ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
69 def sanify_filename(filename
):
70 filename
= filename
.encode("ascii", "ignore")
71 filename
= "".join(c
for c
in filename
if c
in valid_chars
)
74 cookiejar
= cookielib
.CookieJar()
75 urlopener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(cookiejar
))
76 def _urlopen(url
, referrer
=None):
77 req
= urllib2
.Request(url
)
78 req
.add_header("User-Agent", USER_AGENT
)
80 req
.add_header("Referer", referrer
)
81 return urlopener
.open(req
)
83 def urlopen(url
, max_age
):
85 if not os
.path
.isdir(CACHE_DIR
):
86 os
.makedirs(CACHE_DIR
)
91 filename
= hashlib
.md5(url
).hexdigest()
92 filename
= os
.path
.join(CACHE_DIR
, filename
)
93 if os
.path
.exists(filename
):
94 file_age
= int(time
.time()) - os
.path
.getmtime(filename
)
95 if file_age
< max_age
:
99 dst
= open(filename
, "w")
101 shutil
.copyfileobj(src
, dst
)
111 return open(filename
)
113 def grab_text(url
, max_age
):
114 f
= urlopen(url
, max_age
)
115 text
= f
.read().decode("utf-8")
119 def grab_html(url
, max_age
):
120 f
= urlopen(url
, max_age
)
121 doc
= html
.parse(f
, html
.HTMLParser(encoding
="utf-8", recover
=True))
125 def grab_xml(url
, max_age
):
126 f
= urlopen(url
, max_age
)
127 doc
= etree
.parse(f
, etree
.XMLParser(encoding
="utf-8", recover
=True))
131 def grab_json(url
, max_age
, skip_assignment
=False, skip_function
=False):
132 f
= urlopen(url
, max_age
)
136 doc
= json
.loads(text
[pos
+1:])
140 rpos
= text
.rfind(")")
141 doc
= json
.loads(text
[pos
+1:rpos
])
147 def exec_subprocess(cmd
):
149 p
= subprocess
.Popen(cmd
)
152 print >>sys
.stderr
, cmd
[0], "exited with error code:", ret
157 print >>sys
.stderr
, "Failed to run", cmd
[0], e
158 except KeyboardInterrupt:
159 print "Cancelled", cmd
163 except KeyboardInterrupt:
164 p
.send_signal(signal
.SIGKILL
)
169 def convert_flv_mp4(orig_filename
):
170 basename
= os
.path
.splitext(orig_filename
)[0]
171 flv_filename
= basename
+ ".flv"
172 mp4_filename
= basename
+ ".mp4"
173 if orig_filename
!= flv_filename
:
174 os
.rename(orig_filename
, flv_filename
)
175 print "Converting %s to mp4" % flv_filename
176 if not avconv_remux(flv_filename
, mp4_filename
):
177 # failed, error has already been logged
180 flv_size
= os
.stat(flv_filename
).st_size
181 mp4_size
= os
.stat(mp4_filename
).st_size
182 if abs(flv_size
- mp4_size
) < 0.05 * flv_size
:
183 os
.unlink(flv_filename
)
185 print >>sys
.stderr
, "The size of", mp4_filename
, "is suspicious, did avconv fail?"
187 print "Conversion failed", e
189 def avconv_remux(infile
, outfile
):
197 return exec_subprocess(cmd
)
199 def convert_filename(filename
):
200 if os
.path
.splitext(filename
.lower())[1] in (".mp4", ".flv"):
204 if fourcc
== "FLV\x01":
205 convert_flv_mp4(filename
)
207 def download_rtmp(filename
, vbase
, vpath
, hash_url
=None):
208 filename
= sanify_filename(filename
)
209 print "Downloading: %s" % filename
210 if vpath
.endswith(".flv"):
218 if hash_url
is not None:
219 cmd
+= ["--swfVfy", hash_url
]
220 if exec_subprocess(cmd
):
221 convert_filename(filename
)
226 def download_urllib(filename
, url
, referrer
=None):
227 filename
= sanify_filename(filename
)
228 print "Downloading: %s" % filename
230 src
= _urlopen(url
, referrer
)
231 dst
= open(filename
, "w")
233 buf
= src
.read(1024*1024)
237 sys
.stdout
.write(".")
240 except KeyboardInterrupt:
241 print "\nCancelled", url
253 convert_filename(filename
)
256 def download_hls_get_stream(url
, hack_url_func
):
257 url
= hack_url_func(url
)
259 def parse_bandwidth(line
):
260 params
= line
.split(":", 1)[1].split(",")
262 k
, v
= kv
.split("=", 1)
267 m3u8
= grab_text(url
, 0)
268 best_bandwidth
= None
270 for line
in m3u8
.split("\n"):
271 if line
.startswith("#EXT-X-STREAM-INF:"):
272 bandwidth
= parse_bandwidth(line
)
273 if best_bandwidth
is None or bandwidth
> best_bandwidth
:
274 best_bandwidth
= bandwidth
276 elif not line
.startswith("#"):
278 best_url
= line
.strip()
281 raise Exception("Failed to find best stream for HLS: " + url
)
285 def download_hls_segments(tmpdir
, url
, hack_url_func
):
286 m3u8
= grab_text(url
, 0)
289 local_m3u8_filename
= tmpdir
+ "/index.m3u8"
290 local_m3u8
= open(local_m3u8_filename
, "w")
293 fail_if_not_last_segment
= None
294 for line
in m3u8
.split("\n"):
297 if line
.startswith("#"):
298 local_m3u8
.write(line
+ "\n")
301 if fail_if_not_last_segment
:
304 outfile
= "%s/segment_%d.ts" % (tmpdir
, i
)
307 download_hls_fetch_segment(hack_url_func(line
), outfile
)
308 except urllib2
.HTTPError
, e
:
309 fail_if_not_last_segment
= e
311 local_m3u8
.write(outfile
+ "\n")
312 sys
.stdout
.write(".")
315 sys
.stdout
.write("\n")
318 return local_m3u8_filename
320 def download_hls_fetch_segment(segment
, outfile
):
322 src
= _urlopen(segment
)
323 dst
= open(outfile
, "w")
324 shutil
.copyfileobj(src
, dst
)
326 print >>sys
.stderr
, "Failed to fetch", segment
338 def download_hls(filename
, m3u8_master_url
, hack_url_func
=None):
339 if hack_url_func
is None:
340 hack_url_func
= lambda url
: url
342 tmpdir
= tempfile
.mkdtemp(prefix
="webdl-hls")
343 filename
= sanify_filename(filename
)
345 print "Downloading: %s" % filename
348 best_stream_url
= download_hls_get_stream(m3u8_master_url
, hack_url_func
)
349 local_m3u8
= download_hls_segments(tmpdir
, best_stream_url
, hack_url_func
)
350 avconv_remux(local_m3u8
, filename
)
352 except KeyboardInterrupt:
353 print "\nCancelled", m3u8_master_url
356 shutil
.rmtree(tmpdir
)
358 def natural_sort(l
, key
=None):
359 ignore_list
= ["a", "the"]
365 for c
in re
.split("([0-9]+)", k
):
370 for subc
in c
.split():
371 if subc
not in ignore_list
:
375 return sorted(l
, key
=key_func
)
377 def append_to_qs(url
, params
):
378 r
= list(urlparse
.urlsplit(url
))
379 qs
= urlparse
.parse_qs(r
[3])
380 for k
, v
in params
.iteritems():
385 r
[3] = urllib
.urlencode(qs
, True)
386 url
= urlparse
.urlunsplit(r
)