]>
code.delx.au - webdl/blob - common.py
17 USER_AGENT
= "Mozilla/5.0 (X11; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0"
21 autosocks
.try_autosocks()
27 format
= "%(levelname)s %(message)s",
28 level
= logging
.INFO
if os
.environ
.get("DEBUG", None) is None else logging
.DEBUG
,
31 CACHE_FILE
= os
.path
.join(
32 os
.environ
.get("XDG_CACHE_HOME", os
.path
.expanduser("~/.cache")),
36 if not os
.path
.isdir(os
.path
.dirname(CACHE_FILE
)):
37 os
.makedirs(os
.path
.dirname(CACHE_FILE
))
39 requests_cache
.install_cache(CACHE_FILE
, backend
='sqlite', expire_after
=3600)
43 def __init__(self
, title
, parent
=None):
46 parent
.children
.append(self
)
49 self
.can_download
= False
51 def get_children(self
):
54 self
.children
= natural_sort(self
.children
, key
=lambda node
: node
.title
)
57 def fill_children(self
):
65 root_node
= Node("Root")
68 iview
.fill_nodes(root_node
)
71 sbs
.fill_nodes(root_node
)
74 ten
.fill_nodes(root_node
)
78 valid_chars
= frozenset("-_.()!@#%^ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
79 def sanify_filename(filename
):
80 filename
= "".join(c
for c
in filename
if c
in valid_chars
)
81 assert len(filename
) > 0
84 def ensure_scheme(url
):
85 parts
= urllib
.parse
.urlparse(url
)
90 return urllib
.parse
.urlunparse(parts
)
92 http_session
= requests
.Session()
93 http_session
.headers
["User-Agent"] = USER_AGENT
96 logging
.debug("grab_text(%r)", url
)
97 request
= http_session
.prepare_request(requests
.Request("GET", url
))
98 response
= http_session
.send(request
)
102 logging
.debug("grab_html(%r)", url
)
103 request
= http_session
.prepare_request(requests
.Request("GET", url
))
104 response
= http_session
.send(request
, stream
=True)
105 doc
= lxml
.html
.parse(io
.BytesIO(response
.content
), lxml
.html
.HTMLParser(encoding
="utf-8", recover
=True))
110 logging
.debug("grab_xml(%r)", url
)
111 request
= http_session
.prepare_request(requests
.Request("GET", url
))
112 response
= http_session
.send(request
, stream
=True)
113 doc
= lxml
.etree
.parse(io
.BytesIO(response
.content
), lxml
.etree
.XMLParser(encoding
="utf-8", recover
=True))
118 logging
.debug("grab_json(%r)", url
)
119 request
= http_session
.prepare_request(requests
.Request("GET", url
))
120 response
= http_session
.send(request
)
121 return response
.json()
123 def exec_subprocess(cmd
):
124 logging
.debug("Executing: %s", cmd
)
126 p
= subprocess
.Popen(cmd
)
129 logging
.error("%s exited with error code: %s", cmd
[0], ret
)
134 logging
.error("Failed to run: %s -- %s", cmd
[0], e
)
135 except KeyboardInterrupt:
136 logging
.info("Cancelled: %s", cmd
)
140 except KeyboardInterrupt:
141 p
.send_signal(signal
.SIGKILL
)
146 def check_command_exists(cmd
):
148 subprocess
.check_output(cmd
, stderr
=subprocess
.STDOUT
)
154 if check_command_exists(["ffmpeg", "--help"]):
157 if check_command_exists(["avconv", "--help"]):
158 logging
.warn("Detected libav-tools! ffmpeg is recommended")
161 raise Exception("You must install ffmpeg or libav-tools")
164 if check_command_exists(["ffprobe", "--help"]):
167 if check_command_exists(["avprobe", "--help"]):
168 logging
.warn("Detected libav-tools! ffmpeg is recommended")
171 raise Exception("You must install ffmpeg or libav-tools")
173 def get_duration(filename
):
174 ffprobe
= find_ffprobe()
179 "-show_format_entry", "duration",
182 output
= subprocess
.check_output(cmd
).decode("utf-8")
183 for line
in output
.split("\n"):
184 m
= re
.search(R
"([0-9]+)", line
)
187 duration
= m
.group(1)
188 if duration
.isdigit():
192 logging
.debug("Falling back to full decode to find duration: %s % filename")
194 ffmpeg
= find_ffmpeg()
201 output
= subprocess
.check_output(cmd
, stderr
=subprocess
.STDOUT
).decode("utf-8")
203 for line
in re
.split(R
"[\r\n]", output
):
204 m
= re
.search(R
"time=([0-9:]*)\.", line
)
207 [h
, m
, s
] = m
.group(1).split(":")
208 # ffmpeg prints the duration as it reads the file, we want the last one
209 duration
= int(h
) * 3600 + int(m
) * 60 + int(s
)
214 raise Exception("Unable to determine video duration of " + filename
)
216 def check_video_durations(flv_filename
, mp4_filename
):
217 flv_duration
= get_duration(flv_filename
)
218 mp4_duration
= get_duration(mp4_filename
)
220 if abs(flv_duration
- mp4_duration
) > 1:
222 "The duration of %s is suspicious, did the remux fail? Expected %s == %s",
223 mp4_filename
, flv_duration
, mp4_duration
229 def remux(infile
, outfile
):
230 logging
.info("Converting %s to mp4", infile
)
232 ffmpeg
= find_ffmpeg()
236 "-bsf:a", "aac_adtstoasc",
242 if not exec_subprocess(cmd
):
245 if not check_video_durations(infile
, outfile
):
251 def convert_to_mp4(filename
):
252 with
open(filename
, "rb") as f
:
254 basename
, ext
= os
.path
.splitext(filename
)
256 if ext
== ".mp4" and fourcc
== b
"FLV\x01":
257 os
.rename(filename
, basename
+ ".flv")
259 filename
= basename
+ ext
261 if ext
in (".flv", ".ts"):
262 filename_mp4
= basename
+ ".mp4"
263 return remux(filename
, filename_mp4
)
268 def download_hds(filename
, video_url
, pvswf
=None):
269 filename
= sanify_filename(filename
)
270 logging
.info("Downloading: %s", filename
)
272 video_url
= "hds://" + video_url
274 param
= "%s pvswf=%s" % (video_url
, pvswf
)
281 "--output", filename
,
285 if exec_subprocess(cmd
):
286 return convert_to_mp4(filename
)
290 def download_hls(filename
, video_url
):
291 filename
= sanify_filename(filename
)
292 video_url
= "hlsvariant://" + video_url
293 logging
.info("Downloading: %s", filename
)
297 "--http-header", "User-Agent=" + USER_AGENT
,
299 "--output", filename
,
303 if exec_subprocess(cmd
):
304 return convert_to_mp4(filename
)
308 def download_mpd(filename
, video_url
):
309 filename
= sanify_filename(filename
)
310 video_url
= "dash://" + video_url
311 logging
.info("Downloading: %s", filename
)
316 "--output", filename
,
320 if exec_subprocess(cmd
):
321 return convert_to_mp4(filename
)
325 def download_http(filename
, video_url
):
326 filename
= sanify_filename(filename
)
327 logging
.info("Downloading: %s", filename
)
331 "--fail", "--retry", "3",
335 if exec_subprocess(cmd
):
336 return convert_to_mp4(filename
)
340 def natural_sort(l
, key
=None):
341 ignore_list
= ["a", "the"]
347 for c
in re
.split("([0-9]+)", k
):
350 newk
.append(c
.zfill(5))
352 for subc
in c
.split():
353 if subc
not in ignore_list
:
357 return sorted(l
, key
=key_func
)
359 def append_to_qs(url
, params
):
360 r
= list(urllib
.parse
.urlsplit(url
))
361 qs
= urllib
.parse
.parse_qs(r
[3])
362 for k
, v
in params
.items():
367 r
[3] = urllib
.parse
.urlencode(sorted(qs
.items()), True)
368 url
= urllib
.parse
.urlunsplit(r
)