pass
CACHE_DIR = os.path.expanduser("~/.cache/webdl")
-USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"
+USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:21.0) Gecko/20100101 Firefox/21.0"
class Node(object):
def __init__(self, title, parent=None):
import plus7
plus7.fill_nodes(root_node)
+ import brightcove
+ brightcove.fill_nodes(root_node)
+
return root_node
valid_chars = frozenset("-_.()!@#%^ abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
return open(filename)
+def grab_text(url, max_age):
+ f = urlopen(url, max_age)
+ text = f.read().decode("utf-8")
+ f.close()
+ return text
+
def grab_html(url, max_age):
f = urlopen(url, max_age)
doc = html.parse(f, html.HTMLParser(encoding="utf-8", recover=True))