From a8c4bbb32465ac02fbc0b32b09c7dcdf1e9342ab Mon Sep 17 00:00:00 2001 From: James Bunton Date: Tue, 14 Mar 2017 20:19:23 +1100 Subject: [PATCH] Old requests_cache versions break streaming, so don't use it --- common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common.py b/common.py index f0886c8..58f54e1 100644 --- a/common.py +++ b/common.py @@ -1,4 +1,5 @@ import hashlib +import io import json import logging import lxml.etree @@ -99,7 +100,7 @@ def grab_html(url): logging.debug("grab_html(%r)", url) request = http_session.prepare_request(requests.Request("GET", url)) response = http_session.send(request, stream=True) - doc = lxml.html.parse(response.raw, lxml.html.HTMLParser(encoding="utf-8", recover=True)) + doc = lxml.html.parse(io.StringIO(response.text), lxml.html.HTMLParser(encoding="utf-8", recover=True)) response.close() return doc @@ -107,7 +108,7 @@ def grab_xml(url): logging.debug("grab_xml(%r)", url) request = http_session.prepare_request(requests.Request("GET", url)) response = http_session.send(request, stream=True) - doc = lxml.etree.parse(response.raw, lxml.etree.XMLParser(encoding="utf-8", recover=True)) + doc = lxml.etree.parse(io.StringIO(response.text), lxml.etree.XMLParser(encoding="utf-8", recover=True)) response.close() return doc -- 2.39.2