]> code.delx.au - webdl/blobdiff - autograbber.py
Failsafe in case channel 10 returns bad results again
[webdl] / autograbber.py
index 7ef6816fb9583f4a04c6d370db2c2e4a804f30f2..1927a261554f4675ab9380890dbbcdf868f36a60 100755 (executable)
@@ -4,42 +4,65 @@ from common import load_root_node
 import fnmatch
 import logging
 import os
+import shutil
 import sys
 
-DOWNLOAD_HISTORY_FILES = [
-    ".downloaded_auto.txt",
-    "downloaded_auto.txt",
-]
+HISTORY_FILENAME = ".history.txt"
+PATTERN_FILENAME = ".patterns.txt"
+EXCLUDE_FILENAME = ".excludes.txt"
 
 class DownloadList(object):
     def __init__(self):
+        self.exclude_list = set()
+        self._load_exclude_list()
+
         self.seen_list = set()
-        for filename in DOWNLOAD_HISTORY_FILES:
-            if os.path.isfile(filename):
-                break
-        else:
-            filename = DOWNLOAD_HISTORY_FILES[0]
+        self._load_history_file()
+
+        self.f = open(HISTORY_FILENAME, "a")
+
+    def _load_exclude_list(self):
+        try:
+            with open(EXCLUDE_FILENAME, "r") as f:
+                for line in f:
+                    self.exclude_list.add(line.strip())
+        except Exception as e:
+            pass
+
+    def _load_history_file(self):
+        self._move_old_file("downloaded_auto.txt")
+        self._move_old_file(".downloaded_auto.txt")
+
         try:
-            self.f = open(filename, "r")
-            for line in self.f:
-                self.seen_list.add(line.strip())
-            self.f.close()
+            with open(HISTORY_FILENAME, "r") as f:
+                for line in f:
+                    self.seen_list.add(line.strip())
         except Exception as e:
-            logging.error("Could not open: %s -- %s", filename, e)
-        self.f = open(filename, "a")
-    
-    def has_seen(self, node):
-        return node.title in self.seen_list
-    
+            logging.error("Could not open history file: %s -- %s", HISTORY_FILENAME, e)
+
+    def _move_old_file(self, old_filename):
+        if os.path.isfile(old_filename) and not os.path.isfile(HISTORY_FILENAME):
+            logging.info("Migrating download history from %s to %s", old_filename, HISTORY_FILENAME)
+            shutil.move(old_filename, HISTORY_FILENAME)
+
+    def wants(self, node):
+        title = node.title.strip()
+        if title in self.seen_list:
+            return False
+        for exclude in self.exclude_list:
+            if fnmatch.fnmatch(title, exclude):
+                return False
+        return True
+
     def mark_seen(self, node):
-        self.seen_list.add(node.title)
-        self.f.write(node.title + "\n")
+        self.seen_list.add(node.title.strip())
+        self.f.write(node.title.strip() + "\n")
         self.f.flush()
 
 
 def match(download_list, node, pattern, count=0):
     if node.can_download:
-        if not download_list.has_seen(node):
+        if download_list.wants(node):
             if node.download():
                 download_list.mark_seen(node)
             else:
@@ -55,7 +78,7 @@ def match(download_list, node, pattern, count=0):
             match(download_list, child, pattern, count+1)
 
 
-def main(destdir, patternfile):
+def process_one_dir(destdir, patternfile):
     os.chdir(destdir)
     node = load_root_node()
     download_list = DownloadList()
@@ -64,15 +87,49 @@ def main(destdir, patternfile):
         search = line.strip().split("/")
         match(download_list, node, search)
 
+def check_directories(download_dirs):
+    result = []
+    failed = False
+
+    for d in download_dirs:
+        d = os.path.abspath(d)
+        if not os.path.isdir(d):
+            print("Not a directory!", d)
+            failed = True
+
+        pattern_filename = os.path.join(d, PATTERN_FILENAME)
+        if not os.path.isfile(pattern_filename):
+            print("Missing file!", pattern_filename)
+            failed = True
+
+        result.append((d, pattern_filename))
+
+    if failed:
+        print("Exiting!")
+        sys.exit(1)
+
+    return result
+
+def process_dirs(download_dirs):
+    for download_dir, pattern_filename in check_directories(download_dirs):
+        logging.info("Processing directory: %s", download_dir)
+        process_one_dir(download_dir, pattern_filename)
+
 if __name__ == "__main__":
-    try:
+    if len(sys.argv) <= 1:
+        print("Usage: %s download_dir [download_dir ...]" % sys.argv[0])
+        sys.exit(1)
+
+    if len(sys.argv) == 3 and os.path.isfile(sys.argv[2]):
+        # Backwards compatibility with old argument format
         destdir = os.path.abspath(sys.argv[1])
         patternfile = os.path.abspath(sys.argv[2])
-    except IndexError:
-        print("Usage: %s destdir patternfile" % sys.argv[0])
-        sys.exit(1)
+        run = lambda: process_one_dir(destdir, patternfile)
+
+    else:
+        run = lambda: process_dirs(sys.argv[1:])
+
     try:
-        main(destdir, patternfile)
+        run()
     except (KeyboardInterrupt, EOFError):
         print("\nExiting...")
-