X-Git-Url: https://code.delx.au/webdl/blobdiff_plain/a8c4bbb32465ac02fbc0b32b09c7dcdf1e9342ab..992e749d40b94d33d1703f90afb28d8c8b548dd5:/common.py diff --git a/common.py b/common.py index 58f54e1..cd4c266 100644 --- a/common.py +++ b/common.py @@ -11,9 +11,11 @@ import requests_cache import shutil import signal import subprocess +import sys import time import urllib.parse +USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0" try: import autosocks @@ -25,6 +27,7 @@ except ImportError: logging.basicConfig( format = "%(levelname)s %(message)s", level = logging.INFO if os.environ.get("DEBUG", None) is None else logging.DEBUG, + stream = sys.stdout, ) CACHE_FILE = os.path.join( @@ -50,6 +53,7 @@ class Node(object): def get_children(self): if not self.children: self.fill_children() + self.children = natural_sort(self.children, key=lambda node: node.title) return self.children def fill_children(self): @@ -68,8 +72,8 @@ def load_root_node(): import sbs sbs.fill_nodes(root_node) - import brightcove - brightcove.fill_nodes(root_node) + import ten + ten.fill_nodes(root_node) return root_node @@ -88,7 +92,7 @@ def ensure_scheme(url): return urllib.parse.urlunparse(parts) http_session = requests.Session() -http_session.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:21.0) Gecko/20100101 Firefox/21.0" +http_session.headers["User-Agent"] = USER_AGENT def grab_text(url): logging.debug("grab_text(%r)", url) @@ -100,7 +104,7 @@ def grab_html(url): logging.debug("grab_html(%r)", url) request = http_session.prepare_request(requests.Request("GET", url)) response = http_session.send(request, stream=True) - doc = lxml.html.parse(io.StringIO(response.text), lxml.html.HTMLParser(encoding="utf-8", recover=True)) + doc = lxml.html.parse(io.BytesIO(response.content), lxml.html.HTMLParser(encoding="utf-8", recover=True)) response.close() return doc @@ -108,7 +112,7 @@ def grab_xml(url): logging.debug("grab_xml(%r)", url) request = http_session.prepare_request(requests.Request("GET", url)) response = http_session.send(request, stream=True) - doc = lxml.etree.parse(io.StringIO(response.text), lxml.etree.XMLParser(encoding="utf-8", recover=True)) + doc = lxml.etree.parse(io.BytesIO(response.content), lxml.etree.XMLParser(encoding="utf-8", recover=True)) response.close() return doc @@ -149,16 +153,22 @@ def check_command_exists(cmd): return False def find_ffmpeg(): - for ffmpeg in ["avconv", "ffmpeg"]: - if check_command_exists([ffmpeg, "--help"]): - return ffmpeg + if check_command_exists(["ffmpeg", "--help"]): + return "ffmpeg" + + if check_command_exists(["avconv", "--help"]): + logging.warn("Detected libav-tools! ffmpeg is recommended") + return "avconv" raise Exception("You must install ffmpeg or libav-tools") def find_ffprobe(): - for ffprobe in ["avprobe", "ffprobe"]: - if check_command_exists([ffprobe, "--help"]): - return ffprobe + if check_command_exists(["ffprobe", "--help"]): + return "ffprobe" + + if check_command_exists(["avprobe", "--help"]): + logging.warn("Detected libav-tools! ffmpeg is recommended") + return "avprobe" raise Exception("You must install ffmpeg or libav-tools") @@ -173,12 +183,37 @@ def get_duration(filename): ] output = subprocess.check_output(cmd).decode("utf-8") for line in output.split("\n"): - if line.startswith("duration="): - return float(line.split("=")[1]) # ffprobe - if re.match(R'^[0-9.]*$', line): - return float(line) # avprobe + m = re.search(R"([0-9]+)", line) + if not m: + continue + duration = m.group(1) + if duration.isdigit(): + return int(duration) + - raise Exception("Unable to determine video duration of " + filename) + logging.debug("Falling back to full decode to find duration: %s % filename") + + ffmpeg = find_ffmpeg() + cmd = [ + ffmpeg, + "-i", filename, + "-vn", + "-f", "null", "-", + ] + output = subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode("utf-8") + duration = None + for line in re.split(R"[\r\n]", output): + m = re.search(R"time=([0-9:]*)\.", line) + if not m: + continue + [h, m, s] = m.group(1).split(":") + # ffmpeg prints the duration as it reads the file, we want the last one + duration = int(h) * 3600 + int(m) * 60 + int(s) + + if duration: + return duration + else: + raise Exception("Unable to determine video duration of " + filename) def check_video_durations(flv_filename, mp4_filename): flv_duration = get_duration(flv_filename) @@ -243,9 +278,9 @@ def download_hds(filename, video_url, pvswf=None): param = video_url cmd = [ - "livestreamer", - "-f", - "-o", filename, + "streamlink", + "--force", + "--output", filename, param, "best", ] @@ -260,9 +295,27 @@ def download_hls(filename, video_url): logging.info("Downloading: %s", filename) cmd = [ - "livestreamer", - "-f", - "-o", filename, + "streamlink", + "--http-header", "User-Agent=" + USER_AGENT, + "--force", + "--output", filename, + video_url, + "best", + ] + if exec_subprocess(cmd): + return convert_to_mp4(filename) + else: + return False + +def download_mpd(filename, video_url): + filename = sanify_filename(filename) + video_url = "dash://" + video_url + logging.info("Downloading: %s", filename) + + cmd = [ + "streamlink", + "--force", + "--output", filename, video_url, "best", ]