Skip to content

Commit 052d0d6

Browse files
authored
Merge pull request #14 from scrapy-plugins/fix-decompression-error
remove 'Content-Encoding' header when returning responses
2 parents 84dac7d + fb0b412 commit 052d0d6

File tree

2 files changed

+44
-3
lines changed

2 files changed

+44
-3
lines changed

scrapy_zyte_api/responses.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@
88

99

1010
class ZyteAPIMixin:
11+
12+
REMOVE_HEADERS = {
13+
# Zyte API already decompresses the HTTP Response Body. Scrapy's
14+
# HttpCompressionMiddleware will error out when it attempts to
15+
# decompress an already decompressed body based on this header.
16+
"content-encoding"
17+
}
18+
1119
def __init__(self, *args, zyte_api_response: Dict = None, **kwargs):
1220
super().__init__(*args, **kwargs)
1321
self._zyte_api_response = zyte_api_response
@@ -27,11 +35,15 @@ def zyte_api_response(self) -> Optional[Dict]:
2735
"""
2836
return self._zyte_api_response
2937

30-
@staticmethod
31-
def _prepare_headers(init_headers: Optional[List[Dict[str, str]]]):
38+
@classmethod
39+
def _prepare_headers(cls, init_headers: Optional[List[Dict[str, str]]]):
3240
if not init_headers:
3341
return None
34-
return {h["name"]: h["value"] for h in init_headers}
42+
return {
43+
h["name"]: h["value"]
44+
for h in init_headers
45+
if h["name"].lower() not in cls.REMOVE_HEADERS
46+
}
3547

3648

3749
class ZyteAPITextResponse(ZyteAPIMixin, TextResponse):

tests/test_responses.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,3 +135,32 @@ def test_non_utf8_response():
135135
response = ZyteAPITextResponse.from_api_response(sample_zyte_api_response)
136136
assert response.text == content
137137
assert response.encoding == "utf-8"
138+
139+
140+
@pytest.mark.parametrize(
141+
"api_response,cls",
142+
[
143+
(api_response_browser, ZyteAPITextResponse),
144+
(api_response_body, ZyteAPIResponse),
145+
],
146+
)
147+
def test_response_headers_removal(api_response, cls):
148+
"""Headers like 'Content-Encoding' should be removed later in the response
149+
instance returned to Scrapy.
150+
151+
However, it should still be present inside 'zyte_api_response.headers'.
152+
"""
153+
additional_headers = [
154+
{"name": "Content-Encoding", "value": "gzip"},
155+
{"name": "X-Some-Other-Value", "value": "123"},
156+
]
157+
raw_response = api_response()
158+
raw_response["httpResponseHeaders"] = additional_headers
159+
160+
response = cls.from_api_response(raw_response)
161+
162+
assert response.headers == {b"X-Some-Other-Value": [b"123"]}
163+
assert (
164+
response.zyte_api_response["httpResponseHeaders"]
165+
== raw_response["httpResponseHeaders"]
166+
)

0 commit comments

Comments
 (0)