]> code.delx.au - webdl/blob - autograbber.py
Added license
[webdl] / autograbber.py
1 #!/usr/bin/python2
2
3 from common import load_root_node
4 import fnmatch
5 import os
6 import sys
7
8 DOWNLOAD_HISTORY_FILES = [
9 ".downloaded_auto.txt",
10 "downloaded_auto.txt",
11 ]
12
13 class DownloadList(object):
14 def __init__(self):
15 self.seen_list = set()
16 for filename in DOWNLOAD_HISTORY_FILES:
17 if os.path.isfile(filename):
18 break
19 else:
20 filename = DOWNLOAD_HISTORY_FILES[0]
21 try:
22 self.f = open(filename, "r")
23 for line in self.f:
24 self.seen_list.add(line.decode("utf-8").strip())
25 self.f.close()
26 except Exception, e:
27 print >>sys.stderr, "Could not open:", filename, e
28 self.f = open(filename, "a")
29
30 def has_seen(self, node):
31 return node.title in self.seen_list
32
33 def mark_seen(self, node):
34 self.seen_list.add(node.title)
35 self.f.write(node.title.encode("utf-8") + "\n")
36 self.f.flush()
37
38
39 def match(download_list, node, pattern, count=0):
40 if node.can_download:
41 if not download_list.has_seen(node):
42 if node.download():
43 download_list.mark_seen(node)
44 else:
45 print >>sys.stderr, "Failed to download!", node.title
46 return
47
48 if count >= len(pattern):
49 print "No match found for pattern:", "/".join(pattern)
50 return
51 p = pattern[count]
52 for child in node.get_children():
53 if fnmatch.fnmatch(child.title, p):
54 match(download_list, child, pattern, count+1)
55
56
57 def main(destdir, patternfile):
58 os.chdir(destdir)
59 node = load_root_node()
60 download_list = DownloadList()
61
62 for line in open(patternfile):
63 search = line.strip().split("/")
64 match(download_list, node, search)
65
66 if __name__ == "__main__":
67 try:
68 destdir = os.path.abspath(sys.argv[1])
69 patternfile = os.path.abspath(sys.argv[2])
70 except IndexError:
71 print >>sys.stderr, "Usage: %s destdir patternfile" % sys.argv[0]
72 sys.exit(1)
73 try:
74 main(destdir, patternfile)
75 except (KeyboardInterrupt, EOFError):
76 print "\nExiting..."
77