]> code.delx.au - bg-scripts/blob - lib/twisted_wget.py
RandomBG: Make Listener non-writeable
[bg-scripts] / lib / twisted_wget.py
1 #!/usr/bin/env python2.4
2
3 import GregDebug, base64, os, sys, urlparse
4
5 from twisted.internet import reactor, protocol
6 from twisted.web.client import HTTPClientFactory
7 from twisted.web.http import HTTPClient
8 from twisted.web.client import _parse as parseURL
9
10 __all__ = ('downloadURL', )
11
12 def parseURL(url, defaultPort = None):
13 """Based on twisted.web.client._parse"""
14 parsed = urlparse.urlparse(url)
15 scheme = parsed[0]
16 path = urlparse.urlunparse(('','')+parsed[2:])
17 if defaultPort is None:
18 if scheme == 'https':
19 defaultPort = 443
20 else:
21 defaultPort = 80
22 host, port = parsed[1], defaultPort
23
24 if '@' in host:
25 authUser, host = host.split('@', 1)
26 auth = (authUser, )
27 if ':' in authUser:
28 auth = tuple(authUser.split(':', 1))
29 else:
30 auth = None
31
32 if ':' in host:
33 host, port = host.rsplit(':', 1)
34 port = int(port)
35
36 return scheme, auth, host, port, path
37
38 class HTTPProxyFactory(protocol.ClientFactory):
39 def __init__(self, realFactory, proxyServer, proxyMethod = 'GET', proxyPassword = None):
40 self.realFactory = realFactory
41 self.proxyHost, self.proxyPort = proxyServer
42 self.proxyMethod = proxyMethod
43 self.proxyPassword = proxyPassword
44
45 def buildProtocol(self, addr):
46 protocol = HTTPProxyProtocol(self, self.realFactory.buildProtocol(addr) )
47 return protocol
48
49 def __getattr__(self, key):
50 return getattr(self.realFactory, key)
51
52 class HTTPProxyProtocol(protocol.Protocol):
53 def __init__(self, factory, proxied):
54 self.factory = factory
55 self.proxied = proxied
56 self.proxyPassword = factory.proxyPassword
57 if self.proxyPassword is not None:
58 self.proxyPassword = base64.standard_b64encode('%s:%s' % self.proxyPassword)
59 if factory.proxyMethod == 'GET':
60 self.__connectionMade = self.__connectionMade_GET
61 else:
62 raise NotImplementedError
63
64 def __send(self, value):
65 self.transport.write(value)
66
67 def __getTransportWrites(self, function, *args, **kwargs):
68 temp = self.transport.write
69 request = []
70 self.transport.write = lambda data: request.append(data)
71 function(*args, **kwargs)
72 self.proxied.connectionMade()
73 self.transport.write = temp
74 return request
75
76 def __connectionMade_GET(self):
77 self.factory.realFactory.path = self.factory.realFactory.url
78 self.proxied.makeConnection(self.transport)
79
80 self.__send('GET %s HTTP/1.0\r\n' % self.factory.realFactory.url)
81 if self.proxyPassword is not None:
82 self.__send('Proxy-Authorization: Basic %s\r\n' % self.proxyPassword)
83
84 # Remove the real http client's get request
85 for line in self.__getTransportWrites(self.proxied.connectionMade)[1:]:
86 self.__send(line)
87
88 def connectionMade(self):
89 self.proxied.transport = self.transport
90 self.__connectionMade()
91
92 def dataReceived(self, data):
93 self.proxied.dataReceived(data)
94
95 def connectionLost(self, reason):
96 self.proxied.connectionLost(reason)
97
98 proxies = {}
99 def downloadURL(url, method = 'GET', successBack = None, errorBack = None):
100 factory = HTTPClientFactory(url, method = method)
101 scheme, auth, host, port, path = parseURL(url)
102 if successBack is not None:
103 factory.deferred.addCallback(successBack)
104 if errorBack is not None:
105 factory.deferred.addErrback(errorBack)
106 if scheme in proxies:
107 (host, port), password, factory_type = proxies[scheme]
108 # Change the factory to the proxies one
109 factory = factory_type(realFactory = factory, proxyServer = (host, port), proxyMethod = method, proxyPassword = password)
110
111 reactor.connectTCP(host, port, factory)
112 return factory
113
114 # Note: Does not currently honor the no-proxy variable
115 def parseProxies():
116 for k,v in ( (k,v) for k,v in os.environ.items() if v and k.endswith('_proxy')):
117 proxy_type = k[:-len('_proxy')]
118 if proxy_type == 'http':
119 _, auth, host, port, _ = parseURL(v)
120 proxies[proxy_type] = (host, port), auth, HTTPProxyFactory
121
122 def main(urls):
123 def summerise(string, summerisedLen = 100):
124 if len(string) <= summerisedLen:
125 return string
126 else:
127 summerisedLen -= 5
128 start = summerisedLen // 2
129 return '%s ... %s' % (string[:start], string[-(summerisedLen - start):])
130
131 def s(data):
132 print 'Success: "%r"' % summerise(data)
133 ### print 'factory: (\n\t%s\n)' % '\n\t'.join('%s:%s' % (attr, getattr(factory, attr)) for attr in dir(factory))
134
135 def e(data):
136 print data
137
138 for url in urls:
139 factory = downloadURL(url, successBack = s, errorBack = e)
140 reactor.run()
141
142 # Parse the environment variables for proxy servers
143 parseProxies()
144 if __name__ == "__main__":
145 main(sys.argv[1:])