]>
code.delx.au - webdl/blob - common.py
1 from lxml
import etree
, html
23 autosocks
.try_autosocks()
27 CACHE_DIR
= os
.path
.join(os
.environ
.get("XDG_CACHE_HOME", os
.path
.expanduser("~/.cache")), "webdl")
28 USER_AGENT
= "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:21.0) Gecko/20100101 Firefox/21.0"
31 def __init__(self
, title
, parent
=None):
34 parent
.children
.append(self
)
37 self
.can_download
= False
39 def get_children(self
):
44 def fill_children(self
):
52 root_node
= Node("Root")
55 iview
.fill_nodes(root_node
)
58 sbs
.fill_nodes(root_node
)
61 brightcove
.fill_nodes(root_node
)
65 valid_chars
= frozenset("-_.()!@#%^ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
66 def sanify_filename(filename
):
67 filename
= filename
.encode("ascii", "ignore")
68 filename
= "".join(c
for c
in filename
if c
in valid_chars
)
71 cookiejar
= cookielib
.CookieJar()
72 urlopener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(cookiejar
))
73 def _urlopen(url
, referrer
=None):
74 req
= urllib2
.Request(url
)
75 req
.add_header("User-Agent", USER_AGENT
)
77 req
.add_header("Referer", referrer
)
78 return urlopener
.open(req
)
80 def urlopen(url
, max_age
):
82 if not os
.path
.isdir(CACHE_DIR
):
83 os
.makedirs(CACHE_DIR
)
88 filename
= hashlib
.md5(url
).hexdigest()
89 filename
= os
.path
.join(CACHE_DIR
, filename
)
90 if os
.path
.exists(filename
):
91 file_age
= int(time
.time()) - os
.path
.getmtime(filename
)
92 if file_age
< max_age
:
96 dst
= open(filename
, "wb")
98 shutil
.copyfileobj(src
, dst
)
108 return open(filename
)
110 def grab_text(url
, max_age
):
111 f
= urlopen(url
, max_age
)
112 text
= f
.read().decode("utf-8")
116 def grab_html(url
, max_age
):
117 f
= urlopen(url
, max_age
)
118 doc
= html
.parse(f
, html
.HTMLParser(encoding
="utf-8", recover
=True))
122 def grab_xml(url
, max_age
):
123 f
= urlopen(url
, max_age
)
124 doc
= etree
.parse(f
, etree
.XMLParser(encoding
="utf-8", recover
=True))
128 def grab_json(url
, max_age
, skip_assignment
=False, skip_function
=False):
129 f
= urlopen(url
, max_age
)
133 doc
= json
.loads(text
[pos
+1:])
137 rpos
= text
.rfind(")")
138 doc
= json
.loads(text
[pos
+1:rpos
])
144 def exec_subprocess(cmd
):
146 p
= subprocess
.Popen(cmd
)
149 print >>sys
.stderr
, cmd
[0], "exited with error code:", ret
154 print >>sys
.stderr
, "Failed to run", cmd
[0], e
155 except KeyboardInterrupt:
156 print "Cancelled", cmd
160 except KeyboardInterrupt:
161 p
.send_signal(signal
.SIGKILL
)
166 def check_command_exists(cmd
):
168 subprocess
.check_output(cmd
)
173 def generate_remux_cmd(infile
, outfile
):
174 if check_command_exists(["avconv", "--help"]):
178 "-bsf:a", "aac_adtstoasc",
184 if check_command_exists(["ffmpeg", "--help"]):
188 "-bsf:a", "aac_adtstoasc",
194 raise Exception("You must install ffmpeg or libav-tools")
196 def remux(infile
, outfile
):
197 print "Converting %s to mp4" % infile
198 cmd
= generate_remux_cmd(infile
, outfile
)
199 if not exec_subprocess(cmd
):
200 # failed, error has already been logged
203 flv_size
= os
.stat(infile
).st_size
204 mp4_size
= os
.stat(outfile
).st_size
205 if abs(flv_size
- mp4_size
) < 0.1 * flv_size
:
209 print >>sys
.stderr
, "The size of", outfile
, "is suspicious, did avconv fail?"
212 print >>sys
.stderr
, "Conversion failed", e
215 def convert_to_mp4(filename
):
216 with
open(filename
) as f
:
218 basename
, ext
= os
.path
.splitext(filename
)
220 if ext
== ".mp4" and fourcc
== "FLV\x01":
221 os
.rename(filename
, basename
+ ".flv")
223 filename
= basename
+ ext
225 if ext
in (".flv", ".ts"):
226 filename_mp4
= basename
+ ".mp4"
227 return remux(filename
, filename_mp4
)
232 def download_hds(filename
, video_url
, pvswf
=None):
233 filename
= sanify_filename(filename
)
234 print "Downloading: %s" % filename
236 video_url
= video_url
.replace("http://", "hds://")
238 param
= "%s pvswf=%s" % (video_url
, pvswf
)
248 if exec_subprocess(cmd
):
249 return convert_to_mp4(filename
)
253 def download_hls(filename
, video_url
):
254 filename
= sanify_filename(filename
)
255 video_url
= video_url
.replace("http://", "hlsvariant://")
256 print "Downloading: %s" % filename
263 if exec_subprocess(cmd
):
264 return convert_to_mp4(filename
)
268 def natural_sort(l
, key
=None):
269 ignore_list
= ["a", "the"]
275 for c
in re
.split("([0-9]+)", k
):
280 for subc
in c
.split():
281 if subc
not in ignore_list
:
285 return sorted(l
, key
=key_func
)
287 def append_to_qs(url
, params
):
288 r
= list(urlparse
.urlsplit(url
))
289 qs
= urlparse
.parse_qs(r
[3])
290 for k
, v
in params
.iteritems():
295 r
[3] = urllib
.urlencode(qs
, True)
296 url
= urlparse
.urlunsplit(r
)