]>
code.delx.au - webdl/blob - common.py
21 autosocks
.try_autosocks()
27 format
= "%(levelname)s %(message)s",
28 level
= logging
.INFO
if os
.environ
.get("DEBUG", None) is None else logging
.DEBUG
,
31 CACHE_DIR
= os
.path
.join(
32 os
.environ
.get("XDG_CACHE_HOME", os
.path
.expanduser("~/.cache")),
36 USER_AGENT
= "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:21.0) Gecko/20100101 Firefox/21.0"
40 def __init__(self
, title
, parent
=None):
43 parent
.children
.append(self
)
46 self
.can_download
= False
48 def get_children(self
):
53 def fill_children(self
):
61 root_node
= Node("Root")
64 iview
.fill_nodes(root_node
)
67 sbs
.fill_nodes(root_node
)
70 brightcove
.fill_nodes(root_node
)
74 valid_chars
= frozenset("-_.()!@#%^ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
75 def sanify_filename(filename
):
76 filename
= "".join(c
for c
in filename
if c
in valid_chars
)
77 assert len(filename
) > 0
80 def ensure_scheme(url
):
81 parts
= urllib
.parse
.urlparse(url
)
86 return urllib
.parse
.urlunparse(parts
)
88 cookiejar
= http
.cookiejar
.CookieJar()
89 urlopener
= urllib
.request
.build_opener(urllib
.request
.HTTPCookieProcessor(cookiejar
))
90 def _urlopen(url
, referrer
=None):
91 url
= ensure_scheme(url
)
92 req
= urllib
.request
.Request(url
)
93 req
.add_header("User-Agent", USER_AGENT
)
95 req
.add_header("Referer", referrer
)
96 return urlopener
.open(req
)
98 def urlopen(url
, max_age
):
99 logging
.debug("urlopen(%r, %r)", url
, max_age
)
101 if not os
.path
.isdir(CACHE_DIR
):
102 os
.makedirs(CACHE_DIR
)
107 filename
= hashlib
.md5(url
.encode("utf-8")).hexdigest()
108 filename
= os
.path
.join(CACHE_DIR
, filename
)
109 if os
.path
.exists(filename
):
110 file_age
= int(time
.time()) - os
.path
.getmtime(filename
)
111 if file_age
< max_age
:
112 logging
.debug("loading from cache: %s", filename
)
113 return open(filename
, "rb")
115 logging
.debug("downloading: %s -> %s", url
, filename
)
117 dst
= open(filename
, "wb")
119 shutil
.copyfileobj(src
, dst
)
120 except Exception as e
:
129 return open(filename
, "rb")
131 def grab_text(url
, max_age
):
132 f
= urlopen(url
, max_age
)
133 text
= f
.read().decode("utf-8")
137 def grab_html(url
, max_age
):
138 f
= urlopen(url
, max_age
)
139 doc
= lxml
.html
.parse(f
, lxml
.html
.HTMLParser(encoding
="utf-8", recover
=True))
143 def grab_xml(url
, max_age
):
144 f
= urlopen(url
, max_age
)
145 doc
= lxml
.etree
.parse(f
, lxml
.etree
.XMLParser(encoding
="utf-8", recover
=True))
149 def grab_json(url
, max_age
, skip_assignment
=False, skip_function
=False):
150 f
= urlopen(url
, max_age
)
151 text
= f
.read().decode("utf-8")
159 rpos
= text
.rfind(")")
160 text
= text
[pos
+1:rpos
]
162 doc
= json
.loads(text
)
166 def exec_subprocess(cmd
):
167 logging
.debug("Executing: %s", cmd
)
169 p
= subprocess
.Popen(cmd
)
172 logging
.error("%s exited with error code: %s", cmd
[0], ret
)
177 logging
.error("Failed to run: %s -- %s", cmd
[0], e
)
178 except KeyboardInterrupt:
179 logging
.info("Cancelled: %s", cmd
)
183 except KeyboardInterrupt:
184 p
.send_signal(signal
.SIGKILL
)
189 def check_command_exists(cmd
):
191 subprocess
.check_output(cmd
, stderr
=subprocess
.STDOUT
)
197 for ffmpeg
in ["avconv", "ffmpeg"]:
198 if check_command_exists([ffmpeg
, "--help"]):
201 raise Exception("You must install ffmpeg or libav-tools")
204 for ffprobe
in ["avprobe", "ffprobe"]:
205 if check_command_exists([ffprobe
, "--help"]):
208 raise Exception("You must install ffmpeg or libav-tools")
210 def get_duration(filename
):
211 ffprobe
= find_ffprobe()
216 "-show_format_entry", "duration",
219 output
= subprocess
.check_output(cmd
).decode("utf-8")
220 for line
in output
.split("\n"):
221 if line
.startswith("duration="):
222 return float(line
.split("=")[1]) # ffprobe
223 if re
.match(R
'^[0-9.]*$', line
):
224 return float(line
) # avprobe
226 raise Exception("Unable to determine video duration of " + filename
)
228 def check_video_durations(flv_filename
, mp4_filename
):
229 flv_duration
= get_duration(flv_filename
)
230 mp4_duration
= get_duration(mp4_filename
)
232 if abs(flv_duration
- mp4_duration
) > 1:
234 "The duration of %s is suspicious, did the remux fail? Expected %s == %s",
235 mp4_filename
, flv_duration
, mp4_duration
241 def remux(infile
, outfile
):
242 logging
.info("Converting %s to mp4", infile
)
244 ffmpeg
= find_ffmpeg()
248 "-bsf:a", "aac_adtstoasc",
253 if not exec_subprocess(cmd
):
256 if not check_video_durations(infile
, outfile
):
262 def convert_to_mp4(filename
):
263 with
open(filename
, "rb") as f
:
265 basename
, ext
= os
.path
.splitext(filename
)
267 if ext
== ".mp4" and fourcc
== b
"FLV\x01":
268 os
.rename(filename
, basename
+ ".flv")
270 filename
= basename
+ ext
272 if ext
in (".flv", ".ts"):
273 filename_mp4
= basename
+ ".mp4"
274 return remux(filename
, filename_mp4
)
279 def download_hds(filename
, video_url
, pvswf
=None):
280 filename
= sanify_filename(filename
)
281 logging
.info("Downloading: %s", filename
)
283 video_url
= "hds://" + video_url
285 param
= "%s pvswf=%s" % (video_url
, pvswf
)
295 if exec_subprocess(cmd
):
296 return convert_to_mp4(filename
)
300 def download_hls(filename
, video_url
):
301 filename
= sanify_filename(filename
)
302 video_url
= "hlsvariant://" + video_url
303 logging
.info("Downloading: %s", filename
)
311 if exec_subprocess(cmd
):
312 return convert_to_mp4(filename
)
316 def download_http(filename
, video_url
):
317 filename
= sanify_filename(filename
)
318 logging
.info("Downloading: %s", filename
)
322 "--fail", "--retry", "3",
326 if exec_subprocess(cmd
):
327 return convert_to_mp4(filename
)
331 def natural_sort(l
, key
=None):
332 ignore_list
= ["a", "the"]
338 for c
in re
.split("([0-9]+)", k
):
341 newk
.append(c
.zfill(5))
343 for subc
in c
.split():
344 if subc
not in ignore_list
:
348 return sorted(l
, key
=key_func
)
350 def append_to_qs(url
, params
):
351 r
= list(urllib
.parse
.urlsplit(url
))
352 qs
= urllib
.parse
.parse_qs(r
[3])
353 for k
, v
in params
.items():
358 r
[3] = urllib
.parse
.urlencode(sorted(qs
.items()), True)
359 url
= urllib
.parse
.urlunsplit(r
)