Skip to content

Commit 97ea1e4

Browse files
committed
implement ZYTE_API_DEFAULT_PARAMS in the settings
1 parent 2b4a0fb commit 97ea1e4

File tree

3 files changed

+61
-8
lines changed

3 files changed

+61
-8
lines changed

README.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ You can see the full list of parameters in the `Zyte API Specification
7979

8080
On the other hand, you could also control it on a per request basis by setting the
8181
``zyte_api`` key in `Request.meta <https://docs.scrapy.org/en/latest/topics/request-response.html#scrapy.http.Request.meta>`_.
82+
When doing so, it will override any parameters that was set via the setting
83+
named ``ZYTE_API_DEFAULT_PARAMS``.
8284

8385
.. code-block:: python
8486

scrapy_zyte_api/handler.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def __init__(
3131
)
3232
self._stats = crawler.stats
3333
self._job_id = crawler.settings.get("JOB")
34+
self._zyte_api_default_params = settings.getdict("ZYTE_API_DEFAULT_PARAMS")
3435
self._session = create_session()
3536

3637
@classmethod
@@ -56,11 +57,14 @@ def download_request(self, request: Request, spider: Spider) -> Deferred:
5657
async def _download_request(
5758
self, request: Request, spider: Spider
5859
) -> Union[ZyteAPITextResponse, ZyteAPIResponse]:
59-
api_params: Dict[str, Any] = request.meta["zyte_api"]
60-
if not isinstance(api_params, dict):
60+
api_params: Dict[str, Any] = self._zyte_api_default_params or {}
61+
try:
62+
api_params.update(request.meta.get("zyte_api") or {})
63+
except TypeError:
6164
logger.error(
62-
"zyte_api parameters in the request meta should be "
63-
f"provided as dictionary, got {type(api_params)} instead ({request.url})."
65+
f"zyte_api parameters in the request meta should be "
66+
f"provided as dictionary, got {type(request.meta.get('zyte_api'))} "
67+
f"instead ({request.url})."
6468
)
6569
raise IgnoreRequest()
6670
# Define url by default

tests/test_api_requests.py

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
from asyncio import iscoroutine
33
from typing import Any, Dict
4+
from unittest import mock
45

56
import pytest
67
from _pytest.logging import LogCaptureFixture # NOQA
@@ -23,17 +24,16 @@
2324

2425

2526
class TestAPI:
26-
2727
@staticmethod
28-
async def produce_request_response(meta):
28+
async def produce_request_response(meta, custom_settings=None):
2929
with MockServer() as server:
30-
async with make_handler({}, server.urljoin("/")) as handler:
30+
async with make_handler(custom_settings, server.urljoin("/")) as handler:
3131
req = Request(
3232
"http://example.com",
3333
method="POST",
3434
meta=meta,
3535
)
36-
coro = handler._download_request(req, Spider("test"))
36+
coro = handler._download_request(req, None)
3737
assert iscoroutine(coro)
3838
assert not isinstance(coro, Deferred)
3939
resp = await coro # type: ignore
@@ -101,6 +101,53 @@ async def test_http_response_headers_request(self, meta: Dict[str, Dict[str, Any
101101
assert resp.body == b"<html></html>"
102102
assert resp.headers == {b"Test_Header": [b"test_value"]}
103103

104+
@pytest.mark.parametrize(
105+
"meta,custom_settings,expected",
106+
[
107+
({}, {}, {}),
108+
({"zyte_api": {}}, {}, {}),
109+
(
110+
{},
111+
{"ZYTE_API_DEFAULT_PARAMS": {"browserHtml": True, "geolocation": "CA"}},
112+
{"browserHtml": True, "geolocation": "CA"},
113+
),
114+
(
115+
{"zyte_api": {}},
116+
{"ZYTE_API_DEFAULT_PARAMS": {"browserHtml": True, "geolocation": "CA"}},
117+
{"browserHtml": True, "geolocation": "CA"},
118+
),
119+
(
120+
{"zyte_api": {"javascript": True, "geolocation": "US"}},
121+
{"ZYTE_API_DEFAULT_PARAMS": {"browserHtml": True, "geolocation": "CA"}},
122+
{"browserHtml": True, "geolocation": "US", "javascript": True},
123+
),
124+
],
125+
)
126+
@mock.patch("tests.AsyncClient")
127+
@pytest.mark.asyncio
128+
async def test_empty_zyte_api_request_meta(
129+
self,
130+
mock_client,
131+
meta: Dict[str, Dict[str, Any]],
132+
custom_settings: Dict[str, str],
133+
expected: Dict[str, str],
134+
):
135+
try:
136+
# This would always error out since the mocked client doesn't
137+
# return the expected API response.
138+
await self.produce_request_response(meta, custom_settings=custom_settings)
139+
except:
140+
pass
141+
142+
request_call = [c for c in mock_client.mock_calls if "request_raw(" in str(c)]
143+
if not request_call:
144+
pytest.fail("The client's request_raw() method was not called.")
145+
146+
args_used = request_call[0].args[0]
147+
args_used.pop("url")
148+
149+
assert args_used == expected
150+
104151
@pytest.mark.parametrize(
105152
"meta, api_relevant",
106153
[

0 commit comments

Comments
 (0)