File tree Expand file tree Collapse file tree 2 files changed +44
-3
lines changed
Expand file tree Collapse file tree 2 files changed +44
-3
lines changed Original file line number Diff line number Diff line change 88
99
1010class ZyteAPIMixin :
11+
12+ REMOVE_HEADERS = {
13+ # Zyte API already decompresses the HTTP Response Body. Scrapy's
14+ # HttpCompressionMiddleware will error out when it attempts to
15+ # decompress an already decompressed body based on this header.
16+ "content-encoding"
17+ }
18+
1119 def __init__ (self , * args , zyte_api_response : Dict = None , ** kwargs ):
1220 super ().__init__ (* args , ** kwargs )
1321 self ._zyte_api_response = zyte_api_response
@@ -27,11 +35,15 @@ def zyte_api_response(self) -> Optional[Dict]:
2735 """
2836 return self ._zyte_api_response
2937
30- @staticmethod
31- def _prepare_headers (init_headers : Optional [List [Dict [str , str ]]]):
38+ @classmethod
39+ def _prepare_headers (cls , init_headers : Optional [List [Dict [str , str ]]]):
3240 if not init_headers :
3341 return None
34- return {h ["name" ]: h ["value" ] for h in init_headers }
42+ return {
43+ h ["name" ]: h ["value" ]
44+ for h in init_headers
45+ if h ["name" ].lower () not in cls .REMOVE_HEADERS
46+ }
3547
3648
3749class ZyteAPITextResponse (ZyteAPIMixin , TextResponse ):
Original file line number Diff line number Diff line change @@ -135,3 +135,32 @@ def test_non_utf8_response():
135135 response = ZyteAPITextResponse .from_api_response (sample_zyte_api_response )
136136 assert response .text == content
137137 assert response .encoding == "utf-8"
138+
139+
140+ @pytest .mark .parametrize (
141+ "api_response,cls" ,
142+ [
143+ (api_response_browser , ZyteAPITextResponse ),
144+ (api_response_body , ZyteAPIResponse ),
145+ ],
146+ )
147+ def test_response_headers_removal (api_response , cls ):
148+ """Headers like 'Content-Encoding' should be removed later in the response
149+ instance returned to Scrapy.
150+
151+ However, it should still be present inside 'zyte_api_response.headers'.
152+ """
153+ additional_headers = [
154+ {"name" : "Content-Encoding" , "value" : "gzip" },
155+ {"name" : "X-Some-Other-Value" , "value" : "123" },
156+ ]
157+ raw_response = api_response ()
158+ raw_response ["httpResponseHeaders" ] = additional_headers
159+
160+ response = cls .from_api_response (raw_response )
161+
162+ assert response .headers == {b"X-Some-Other-Value" : [b"123" ]}
163+ assert (
164+ response .zyte_api_response ["httpResponseHeaders" ]
165+ == raw_response ["httpResponseHeaders" ]
166+ )
You can’t perform that action at this time.
0 commit comments