X-Git-Url: https://code.delx.au/webdl/blobdiff_plain/d31ff7a0e02a17a26a3605581b27f82d8831ae33..dde99023a3a72815f591afebb0ae770b6d30effb:/autograbber.py diff --git a/autograbber.py b/autograbber.py index 7ef6816..1927a26 100755 --- a/autograbber.py +++ b/autograbber.py @@ -4,42 +4,65 @@ from common import load_root_node import fnmatch import logging import os +import shutil import sys -DOWNLOAD_HISTORY_FILES = [ - ".downloaded_auto.txt", - "downloaded_auto.txt", -] +HISTORY_FILENAME = ".history.txt" +PATTERN_FILENAME = ".patterns.txt" +EXCLUDE_FILENAME = ".excludes.txt" class DownloadList(object): def __init__(self): + self.exclude_list = set() + self._load_exclude_list() + self.seen_list = set() - for filename in DOWNLOAD_HISTORY_FILES: - if os.path.isfile(filename): - break - else: - filename = DOWNLOAD_HISTORY_FILES[0] + self._load_history_file() + + self.f = open(HISTORY_FILENAME, "a") + + def _load_exclude_list(self): + try: + with open(EXCLUDE_FILENAME, "r") as f: + for line in f: + self.exclude_list.add(line.strip()) + except Exception as e: + pass + + def _load_history_file(self): + self._move_old_file("downloaded_auto.txt") + self._move_old_file(".downloaded_auto.txt") + try: - self.f = open(filename, "r") - for line in self.f: - self.seen_list.add(line.strip()) - self.f.close() + with open(HISTORY_FILENAME, "r") as f: + for line in f: + self.seen_list.add(line.strip()) except Exception as e: - logging.error("Could not open: %s -- %s", filename, e) - self.f = open(filename, "a") - - def has_seen(self, node): - return node.title in self.seen_list - + logging.error("Could not open history file: %s -- %s", HISTORY_FILENAME, e) + + def _move_old_file(self, old_filename): + if os.path.isfile(old_filename) and not os.path.isfile(HISTORY_FILENAME): + logging.info("Migrating download history from %s to %s", old_filename, HISTORY_FILENAME) + shutil.move(old_filename, HISTORY_FILENAME) + + def wants(self, node): + title = node.title.strip() + if title in self.seen_list: + return False + for exclude in self.exclude_list: + if fnmatch.fnmatch(title, exclude): + return False + return True + def mark_seen(self, node): - self.seen_list.add(node.title) - self.f.write(node.title + "\n") + self.seen_list.add(node.title.strip()) + self.f.write(node.title.strip() + "\n") self.f.flush() def match(download_list, node, pattern, count=0): if node.can_download: - if not download_list.has_seen(node): + if download_list.wants(node): if node.download(): download_list.mark_seen(node) else: @@ -55,7 +78,7 @@ def match(download_list, node, pattern, count=0): match(download_list, child, pattern, count+1) -def main(destdir, patternfile): +def process_one_dir(destdir, patternfile): os.chdir(destdir) node = load_root_node() download_list = DownloadList() @@ -64,15 +87,49 @@ def main(destdir, patternfile): search = line.strip().split("/") match(download_list, node, search) +def check_directories(download_dirs): + result = [] + failed = False + + for d in download_dirs: + d = os.path.abspath(d) + if not os.path.isdir(d): + print("Not a directory!", d) + failed = True + + pattern_filename = os.path.join(d, PATTERN_FILENAME) + if not os.path.isfile(pattern_filename): + print("Missing file!", pattern_filename) + failed = True + + result.append((d, pattern_filename)) + + if failed: + print("Exiting!") + sys.exit(1) + + return result + +def process_dirs(download_dirs): + for download_dir, pattern_filename in check_directories(download_dirs): + logging.info("Processing directory: %s", download_dir) + process_one_dir(download_dir, pattern_filename) + if __name__ == "__main__": - try: + if len(sys.argv) <= 1: + print("Usage: %s download_dir [download_dir ...]" % sys.argv[0]) + sys.exit(1) + + if len(sys.argv) == 3 and os.path.isfile(sys.argv[2]): + # Backwards compatibility with old argument format destdir = os.path.abspath(sys.argv[1]) patternfile = os.path.abspath(sys.argv[2]) - except IndexError: - print("Usage: %s destdir patternfile" % sys.argv[0]) - sys.exit(1) + run = lambda: process_one_dir(destdir, patternfile) + + else: + run = lambda: process_dirs(sys.argv[1:]) + try: - main(destdir, patternfile) + run() except (KeyboardInterrupt, EOFError): print("\nExiting...") -