X-Git-Url: https://code.delx.au/webdl/blobdiff_plain/8caddd92c23d96414c445bc35a62d3f1ed0bc385..dde99023a3a72815f591afebb0ae770b6d30effb:/autograbber.py diff --git a/autograbber.py b/autograbber.py index 663b95a..1927a26 100755 --- a/autograbber.py +++ b/autograbber.py @@ -1,60 +1,135 @@ -#!/usr/bin/env python -# vim:ts=4:sts=4:sw=4:noet +#!/usr/bin/env python3 from common import load_root_node import fnmatch +import logging +import os +import shutil import sys +HISTORY_FILENAME = ".history.txt" +PATTERN_FILENAME = ".patterns.txt" +EXCLUDE_FILENAME = ".excludes.txt" + class DownloadList(object): - def __init__(self, filename): - self.seen_list = set() - try: - self.f = open(filename, "r") - for line in self.f: - self.seen_list.add(line.strip()) - self.f.close() - except Exception, e: - print >>sys.stderr, "Could not open:", filename, e - self.f = open(filename, "a") - - def has_seen(self, node): - return node.title in self.seen_list - - def mark_seen(self, node): - self.seen_list.add(node.title) - self.f.write(node.title + "\n") - self.f.flush() + def __init__(self): + self.exclude_list = set() + self._load_exclude_list() + + self.seen_list = set() + self._load_history_file() + + self.f = open(HISTORY_FILENAME, "a") + + def _load_exclude_list(self): + try: + with open(EXCLUDE_FILENAME, "r") as f: + for line in f: + self.exclude_list.add(line.strip()) + except Exception as e: + pass + + def _load_history_file(self): + self._move_old_file("downloaded_auto.txt") + self._move_old_file(".downloaded_auto.txt") + + try: + with open(HISTORY_FILENAME, "r") as f: + for line in f: + self.seen_list.add(line.strip()) + except Exception as e: + logging.error("Could not open history file: %s -- %s", HISTORY_FILENAME, e) + + def _move_old_file(self, old_filename): + if os.path.isfile(old_filename) and not os.path.isfile(HISTORY_FILENAME): + logging.info("Migrating download history from %s to %s", old_filename, HISTORY_FILENAME) + shutil.move(old_filename, HISTORY_FILENAME) + + def wants(self, node): + title = node.title.strip() + if title in self.seen_list: + return False + for exclude in self.exclude_list: + if fnmatch.fnmatch(title, exclude): + return False + return True + + def mark_seen(self, node): + self.seen_list.add(node.title.strip()) + self.f.write(node.title.strip() + "\n") + self.f.flush() def match(download_list, node, pattern, count=0): - if node.can_download: - if not download_list.has_seen(node): - if node.download(): - download_list.mark_seen(node) - else: - print >>sys.stderr, "Failed to download!", node.title - return - - if count >= len(pattern): - print "No match found for pattern:", "/".join(pattern) - return - p = pattern[count] - for child in node.children: - if fnmatch.fnmatch(child.title, p): - match(download_list, child, pattern, count+1) - - -def main(): - node = load_root_node() - download_list = DownloadList("downloaded_auto.txt") - - for search in sys.argv[1:]: - search = search.split("/") - match(download_list, node, search) + if node.can_download: + if download_list.wants(node): + if node.download(): + download_list.mark_seen(node) + else: + logging.error("Failed to download! %s", node.title) + return + + if count >= len(pattern): + logging.error("No match found for pattern:", "/".join(pattern)) + return + p = pattern[count] + for child in node.get_children(): + if fnmatch.fnmatch(child.title, p): + match(download_list, child, pattern, count+1) + + +def process_one_dir(destdir, patternfile): + os.chdir(destdir) + node = load_root_node() + download_list = DownloadList() + + for line in open(patternfile): + search = line.strip().split("/") + match(download_list, node, search) + +def check_directories(download_dirs): + result = [] + failed = False + + for d in download_dirs: + d = os.path.abspath(d) + if not os.path.isdir(d): + print("Not a directory!", d) + failed = True + + pattern_filename = os.path.join(d, PATTERN_FILENAME) + if not os.path.isfile(pattern_filename): + print("Missing file!", pattern_filename) + failed = True + + result.append((d, pattern_filename)) + + if failed: + print("Exiting!") + sys.exit(1) + + return result + +def process_dirs(download_dirs): + for download_dir, pattern_filename in check_directories(download_dirs): + logging.info("Processing directory: %s", download_dir) + process_one_dir(download_dir, pattern_filename) if __name__ == "__main__": - try: - main() - except (KeyboardInterrupt, EOFError): - print "\nExiting..." + if len(sys.argv) <= 1: + print("Usage: %s download_dir [download_dir ...]" % sys.argv[0]) + sys.exit(1) + + if len(sys.argv) == 3 and os.path.isfile(sys.argv[2]): + # Backwards compatibility with old argument format + destdir = os.path.abspath(sys.argv[1]) + patternfile = os.path.abspath(sys.argv[2]) + run = lambda: process_one_dir(destdir, patternfile) + + else: + run = lambda: process_dirs(sys.argv[1:]) + try: + run() + except (KeyboardInterrupt, EOFError): + print("\nExiting...")