]> code.delx.au - webdl/blob - autograbber.py
Fix issues with source listings including leading spaces in program names
[webdl] / autograbber.py
1 #!/usr/bin/env python3
2
3 from common import load_root_node
4 import fnmatch
5 import logging
6 import os
7 import shutil
8 import sys
9
10 HISTORY_FILENAME = ".history.txt"
11 PATTERN_FILENAME = ".patterns.txt"
12
13 class DownloadList(object):
14 def __init__(self):
15 self.seen_list = set()
16 self._load_history_file()
17 self.f = open(HISTORY_FILENAME, "a")
18
19 def _load_history_file(self):
20 self._move_old_file("downloaded_auto.txt")
21 self._move_old_file(".downloaded_auto.txt")
22
23 try:
24 with open(HISTORY_FILENAME, "r") as f:
25 for line in f:
26 self.seen_list.add(line.strip())
27 except Exception as e:
28 logging.error("Could not open history file: %s -- %s", HISTORY_FILENAME, e)
29
30 def _move_old_file(self, old_filename):
31 if os.path.isfile(old_filename) and not os.path.isfile(HISTORY_FILENAME):
32 logging.info("Migrating download history from %s to %s", old_filename, HISTORY_FILENAME)
33 shutil.move(old_filename, HISTORY_FILENAME)
34
35 def has_seen(self, node):
36 return node.title.strip() in self.seen_list
37
38 def mark_seen(self, node):
39 self.seen_list.add(node.title.strip())
40 self.f.write(node.title.strip() + "\n")
41 self.f.flush()
42
43
44 def match(download_list, node, pattern, count=0):
45 if node.can_download:
46 if not download_list.has_seen(node):
47 if node.download():
48 download_list.mark_seen(node)
49 else:
50 logging.error("Failed to download! %s", node.title)
51 return
52
53 if count >= len(pattern):
54 logging.error("No match found for pattern:", "/".join(pattern))
55 return
56 p = pattern[count]
57 for child in node.get_children():
58 if fnmatch.fnmatch(child.title, p):
59 match(download_list, child, pattern, count+1)
60
61
62 def process_one_dir(destdir, patternfile):
63 os.chdir(destdir)
64 node = load_root_node()
65 download_list = DownloadList()
66
67 for line in open(patternfile):
68 search = line.strip().split("/")
69 match(download_list, node, search)
70
71 def check_directories(download_dirs):
72 result = []
73 failed = False
74
75 for d in download_dirs:
76 d = os.path.abspath(d)
77 if not os.path.isdir(d):
78 print("Not a directory!", d)
79 failed = True
80
81 pattern_filename = os.path.join(d, PATTERN_FILENAME)
82 if not os.path.isfile(pattern_filename):
83 print("Missing file!", pattern_filename)
84 failed = True
85
86 result.append((d, pattern_filename))
87
88 if failed:
89 print("Exiting!")
90 sys.exit(1)
91
92 return result
93
94 def process_dirs(download_dirs):
95 for download_dir, pattern_filename in check_directories(download_dirs):
96 logging.info("Processing directory: %s", download_dir)
97 process_one_dir(download_dir, pattern_filename)
98
99 if __name__ == "__main__":
100 if len(sys.argv) <= 1:
101 print("Usage: %s download_dir [download_dir ...]" % sys.argv[0])
102 sys.exit(1)
103
104 if len(sys.argv) == 3 and os.path.isfile(sys.argv[2]):
105 # Backwards compatibility with old argument format
106 destdir = os.path.abspath(sys.argv[1])
107 patternfile = os.path.abspath(sys.argv[2])
108 run = lambda: process_one_dir(destdir, patternfile)
109
110 else:
111 run = lambda: process_dirs(sys.argv[1:])
112
113 try:
114 run()
115 except (KeyboardInterrupt, EOFError):
116 print("\nExiting...")