]> code.delx.au - webdl/commitdiff
Old requests_cache versions break streaming, so don't use it
authorJames Bunton <jamesbunton@delx.net.au>
Tue, 14 Mar 2017 09:19:23 +0000 (20:19 +1100)
committerJames Bunton <jamesbunton@delx.net.au>
Tue, 14 Mar 2017 09:19:23 +0000 (20:19 +1100)
common.py

index f0886c8f808fa734e40c41ffcab896d6578e9fb2..58f54e11807a9ed82ff31ef72a95bd27500eab1d 100644 (file)
--- a/common.py
+++ b/common.py
@@ -1,4 +1,5 @@
 import hashlib
 import hashlib
+import io
 import json
 import logging
 import lxml.etree
 import json
 import logging
 import lxml.etree
@@ -99,7 +100,7 @@ def grab_html(url):
     logging.debug("grab_html(%r)", url)
     request = http_session.prepare_request(requests.Request("GET", url))
     response = http_session.send(request, stream=True)
     logging.debug("grab_html(%r)", url)
     request = http_session.prepare_request(requests.Request("GET", url))
     response = http_session.send(request, stream=True)
-    doc = lxml.html.parse(response.raw, lxml.html.HTMLParser(encoding="utf-8", recover=True))
+    doc = lxml.html.parse(io.StringIO(response.text), lxml.html.HTMLParser(encoding="utf-8", recover=True))
     response.close()
     return doc
 
     response.close()
     return doc
 
@@ -107,7 +108,7 @@ def grab_xml(url):
     logging.debug("grab_xml(%r)", url)
     request = http_session.prepare_request(requests.Request("GET", url))
     response = http_session.send(request, stream=True)
     logging.debug("grab_xml(%r)", url)
     request = http_session.prepare_request(requests.Request("GET", url))
     response = http_session.send(request, stream=True)
-    doc = lxml.etree.parse(response.raw, lxml.etree.XMLParser(encoding="utf-8", recover=True))
+    doc = lxml.etree.parse(io.StringIO(response.text), lxml.etree.XMLParser(encoding="utf-8", recover=True))
     response.close()
     return doc
 
     response.close()
     return doc