From: James Bunton Date: Tue, 14 Mar 2017 09:19:23 +0000 (+1100) Subject: Old requests_cache versions break streaming, so don't use it X-Git-Url: https://code.delx.au/webdl/commitdiff_plain/a8c4bbb32465ac02fbc0b32b09c7dcdf1e9342ab Old requests_cache versions break streaming, so don't use it --- diff --git a/common.py b/common.py index f0886c8..58f54e1 100644 --- a/common.py +++ b/common.py @@ -1,4 +1,5 @@ import hashlib +import io import json import logging import lxml.etree @@ -99,7 +100,7 @@ def grab_html(url): logging.debug("grab_html(%r)", url) request = http_session.prepare_request(requests.Request("GET", url)) response = http_session.send(request, stream=True) - doc = lxml.html.parse(response.raw, lxml.html.HTMLParser(encoding="utf-8", recover=True)) + doc = lxml.html.parse(io.StringIO(response.text), lxml.html.HTMLParser(encoding="utf-8", recover=True)) response.close() return doc @@ -107,7 +108,7 @@ def grab_xml(url): logging.debug("grab_xml(%r)", url) request = http_session.prepare_request(requests.Request("GET", url)) response = http_session.send(request, stream=True) - doc = lxml.etree.parse(response.raw, lxml.etree.XMLParser(encoding="utf-8", recover=True)) + doc = lxml.etree.parse(io.StringIO(response.text), lxml.etree.XMLParser(encoding="utf-8", recover=True)) response.close() return doc