Skip to content

Commit 8340ced

Browse files
committed
Update to use HttpReponse which replaces ResponseData
Reference PR: scrapinghub/web-poet#30
1 parent e8f4c10 commit 8340ced

File tree

8 files changed

+48
-27
lines changed

8 files changed

+48
-27
lines changed

docs/intro/advanced-tutorial.rst

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,10 @@ Suppose we have the following Page Object:
5454
}
5555
5656
# Simulates clicking on a button that says "View All Images"
57-
response: web_poet.ResponseData = await self.http_client.get(
57+
response: web_poet.HttpResponse = await self.http_client.get(
5858
f"https://api.example.com/v2/images?id={item['product_id']}"
5959
)
60-
page = web_poet.WebPage(response)
61-
item["images"] = page.css(".product-images img::attr(src)").getall()
60+
item["images"] = response.css(".product-images img::attr(src)").getall()
6261
return item
6362
6463
@@ -122,11 +121,10 @@ This basically acts as a switch to update the behavior of the Page Object:
122121
123122
# Simulates clicking on a button that says "View All Images"
124123
if self.meta.get("enable_extracting_all_images")
125-
response: web_poet.ResponseData = await self.http_client.get(
124+
response: web_poet.HttpResponse = await self.http_client.get(
126125
f"https://api.example.com/v2/images?id={item['product_id']}"
127126
)
128-
page = web_poet.WebPage(response)
129-
item["images"] = page.css(".product-images img::attr(src)").getall()
127+
item["images"] = response.css(".product-images img::attr(src)").getall()
130128
131129
return item
132130

scrapy_poet/backend.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
import attr
44
import scrapy
55
from scrapy.utils.defer import deferred_to_future
6-
from web_poet.page_inputs import ResponseData
6+
from web_poet.page_inputs import HttpResponse, HttpResponseHeaders
77
from web_poet.requests import Request, RequestBackendError
88

9+
from scrapy_poet.utils import scrapy_response_to_http_response
10+
911

1012
logger = logging.getLogger(__name__)
1113

@@ -28,13 +30,8 @@ async def scrapy_backend(request: Request):
2830

2931
try:
3032
deferred = backend(request)
31-
response = await deferred_to_future(deferred)
32-
return ResponseData(
33-
url=response.url,
34-
html=response.text,
35-
status=response.status,
36-
headers=response.headers,
37-
)
33+
response: scrapy.http.Response = await deferred_to_future(deferred)
34+
return scrapy_response_to_http_response(response)
3835

3936
except scrapy.exceptions.IgnoreRequest:
4037
logger.warning(f"Additional Request Ignored: {request}")

scrapy_poet/middleware.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222

2323
DEFAULT_PROVIDERS = {
24-
HttpResponseProvider: 500
24+
HttpResponseProvider: 500,
2525
HttpClientProvider: 600,
2626
MetaProvider: 700,
2727
}

scrapy_poet/page_input_providers.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from scrapy.utils.reqser import request_to_dict
2020
from scrapy.utils.request import request_fingerprint
2121

22+
from scrapy_poet.utils import scrapy_response_to_http_response
2223
from scrapy_poet.injection_errors import MalformedProvidedClassesError
2324
from scrapy_poet.backend import create_scrapy_backend
2425
from web_poet import HttpResponse, HttpResponseHeaders, Meta
@@ -164,14 +165,7 @@ class HttpResponseProvider(PageObjectInputProvider, CacheDataProviderMixin):
164165

165166
def __call__(self, to_provide: Set[Callable], response: Response):
166167
"""Builds a ``HttpResponse`` instance using a Scrapy ``Response``"""
167-
return [
168-
HttpResponse(
169-
url=response.url,
170-
body=response.body,
171-
status=response.status,
172-
headers=HttpResponseHeaders.from_bytes(response.headers),
173-
)
174-
]
168+
return [scrapy_response_to_http_response(response)]
175169

176170
def fingerprint(self, to_provide: Set[Callable], request: Request) -> str:
177171
request_keys = {"url", "method", "body"}

scrapy_poet/utils.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import os
22

3+
from web_poet import HttpResponse, HttpResponseHeaders
4+
from scrapy.http import Response
35
from scrapy.utils.project import project_data_dir, inside_project
46
from tldextract import tldextract
57

@@ -28,3 +30,15 @@ def get_scrapy_data_path(createdir: bool = True, default_dir: str = ".scrapy") -
2830
if createdir:
2931
os.makedirs(path, exist_ok=True)
3032
return path
33+
34+
35+
def scrapy_response_to_http_response(response: Response):
36+
"""Convenience method to convert a ``scrapy.http.Response`` into a
37+
``web_poet.HttpResponse``.
38+
"""
39+
return HttpResponse(
40+
url=response.url,
41+
body=response.body,
42+
status=response.status,
43+
headers=HttpResponseHeaders.from_bytes(response.headers),
44+
)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
'andi >= 0.4.1',
1515
'attrs',
1616
'parsel',
17-
'web-poet @ git+https://[email protected]/scrapinghub/web-poet@additional-requests#egg=web-poet',
17+
'web-poet @ git+https://[email protected]/scrapinghub/web-poet@tmp-dep-AR-1#egg=web-poet',
1818
'tldextract',
1919
'sqlitedict',
2020
],

tests/test_backend.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,21 +38,39 @@ async def test_incompatible_scrapy_request(scrapy_backend):
3838
await scrapy_backend(req)
3939

4040

41+
@pytest.fixture
42+
def fake_http_response():
43+
return web_poet.HttpResponse(
44+
"https://example.com",
45+
b"some content",
46+
200,
47+
{"Content-Type": "text/html; charset=utf-8"},
48+
)
49+
50+
4151
@pytest.mark.asyncio
42-
async def test_scrapy_poet_backend():
52+
async def test_scrapy_poet_backend(fake_http_response):
4353
req = web_poet.Request("https://example.com")
4454

4555
with mock.patch(
4656
"scrapy_poet.backend.deferred_to_future", new_callable=AsyncMock
4757
) as mock_dtf:
4858

59+
mock_dtf.return_value = fake_http_response
60+
4961
mock_downloader = mock.MagicMock(return_value=AsyncMock)
5062
scrapy_backend = create_scrapy_backend(mock_downloader)
5163

5264
response = await scrapy_backend(req)
5365

5466
mock_downloader.assert_called_once()
55-
assert isinstance(response, web_poet.ResponseData)
67+
assert isinstance(response, web_poet.HttpResponse)
68+
69+
assert response.url == "https://example.com"
70+
assert response.text == "some content"
71+
assert response.status == 200
72+
assert response.headers.get("Content-Type") == "text/html; charset=utf-8"
73+
assert len(response.headers) == 1
5674

5775

5876
@pytest.mark.asyncio

tox.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ deps =
1111
pytest-asyncio
1212
scrapy >= 2.6.0
1313
pytest-twisted
14-
web-poet @ git+https://[email protected]/scrapinghub/web-poet@meta#egg=web-poet
14+
web-poet @ git+https://[email protected]/scrapinghub/web-poet@tmp-dep-AR-1#egg=web-poet
1515

1616
commands =
1717
py.test \

0 commit comments

Comments
 (0)