|
25 | 25 | PageObjectInputProvider |
26 | 26 | ) |
27 | 27 | from web_poet import default_registry |
28 | | -from web_poet.page_inputs import HttpResponse |
| 28 | +from web_poet.page_inputs import HttpResponse, RequestUrl |
29 | 29 | from scrapy_poet import DummyResponse |
30 | 30 | from tests.utils import (HtmlResource, |
31 | 31 | crawl_items, |
@@ -317,13 +317,70 @@ def test_skip_downloads(settings): |
317 | 317 | assert isinstance(item['response'], Response) is True |
318 | 318 | assert isinstance(item['response'], DummyResponse) is False |
319 | 319 | assert crawler.stats.get_stats().get('downloader/request_count', 0) == 1 |
| 320 | + assert crawler.stats.get_stats().get('scrapy_poet/dummy_response_count', 0) == 0 |
320 | 321 | assert crawler.stats.get_stats().get('downloader/response_count', 0) == 1 |
321 | 322 |
|
322 | 323 | item, url, crawler = yield crawl_single_item( |
323 | 324 | SkipDownloadSpider, ProductHtml, settings) |
324 | 325 | assert isinstance(item['response'], Response) is True |
325 | 326 | assert isinstance(item['response'], DummyResponse) is True |
326 | 327 | assert crawler.stats.get_stats().get('downloader/request_count', 0) == 0 |
| 328 | + assert crawler.stats.get_stats().get('scrapy_poet/dummy_response_count', 0) == 1 |
| 329 | + assert crawler.stats.get_stats().get('downloader/response_count', 0) == 1 |
| 330 | + |
| 331 | + |
| 332 | +class RequestUrlSpider(scrapy.Spider): |
| 333 | + url = None |
| 334 | + |
| 335 | + def start_requests(self): |
| 336 | + yield Request(url=self.url, callback=self.parse) |
| 337 | + |
| 338 | + def parse(self, response: DummyResponse, url: RequestUrl): |
| 339 | + return { |
| 340 | + 'response': response, |
| 341 | + 'url': url, |
| 342 | + } |
| 343 | + |
| 344 | + |
| 345 | +@inlineCallbacks |
| 346 | +def test_skip_download_request_url(settings): |
| 347 | + item, url, crawler = yield crawl_single_item( |
| 348 | + RequestUrlSpider, ProductHtml, settings) |
| 349 | + assert isinstance(item['response'], Response) is True |
| 350 | + assert isinstance(item['response'], DummyResponse) is True |
| 351 | + assert isinstance(item['url'], RequestUrl) |
| 352 | + assert str(item['url']) == url |
| 353 | + assert crawler.stats.get_stats().get('downloader/request_count', 0) == 0 |
| 354 | + assert crawler.stats.get_stats().get('scrapy_poet/dummy_response_count', 0) == 1 |
| 355 | + assert crawler.stats.get_stats().get('downloader/response_count', 0) == 1 |
| 356 | + |
| 357 | + |
| 358 | +@attr.s(auto_attribs=True) |
| 359 | +class RequestUrlPage(ItemPage): |
| 360 | + url: RequestUrl |
| 361 | + |
| 362 | + def to_item(self): |
| 363 | + return {'url': self.url} |
| 364 | + |
| 365 | + |
| 366 | +class RequestUrlPageSpider(scrapy.Spider): |
| 367 | + url = None |
| 368 | + |
| 369 | + def start_requests(self): |
| 370 | + yield Request(url=self.url, callback=self.parse) |
| 371 | + |
| 372 | + def parse(self, response: DummyResponse, page: RequestUrlPage): |
| 373 | + return page.to_item() |
| 374 | + |
| 375 | + |
| 376 | +@inlineCallbacks |
| 377 | +def test_skip_download_request_url_page(settings): |
| 378 | + item, url, crawler = yield crawl_single_item( |
| 379 | + RequestUrlPageSpider, ProductHtml, settings) |
| 380 | + assert tuple(item.keys()) == ('url',) |
| 381 | + assert str(item['url']) == url |
| 382 | + assert crawler.stats.get_stats().get('downloader/request_count', 0) == 0 |
| 383 | + assert crawler.stats.get_stats().get('scrapy_poet/dummy_response_count', 0) == 1 |
327 | 384 | assert crawler.stats.get_stats().get('downloader/response_count', 0) == 1 |
328 | 385 |
|
329 | 386 |
|
|
0 commit comments