Skip to content

Commit 61e6cbc

Browse files
authored
Merge pull request #102 from scrapinghub/testing-cmd
Command for creating test fixtures.
2 parents f119b6b + 8998f7f commit 61e6cbc

File tree

6 files changed

+192
-1
lines changed

6 files changed

+192
-1
lines changed

docs/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ To get started, see :ref:`intro-install` and :ref:`intro-tutorial`.
4545

4646
overrides
4747
providers
48+
testing
4849

4950
.. toctree::
5051
:caption: All the rest

docs/settings.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,13 @@ developing Page Objects.
7878

7979
It's `recommended` to set this off into ``False`` by default since you might miss
8080
out on sporadic errors.
81+
82+
83+
SCRAPY_POET_TESTS_DIR
84+
---------------------
85+
86+
Default: ``fixtures``
87+
88+
Sets the location where the ``savefixture`` command creates tests.
89+
90+
More info at :ref:`testing`.

docs/testing.rst

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
.. _testing:
2+
3+
======================
4+
Tests for Page Objects
5+
======================
6+
7+
``web-poet`` provides :ref:`tools for testing page objects
8+
<web-poet:web-poet-testing>`. ``scrapy-poet`` projects can use a Scrapy command
9+
to easily generate tests::
10+
11+
scrapy savefixture my_project.pages.MyItemPage 'https://quotes.toscrape.com/page/1/'
12+
13+
This will request the provided page, create an instance of the provided page
14+
object for this page, request its :meth:`~web_poet.pages.ItemPage.to_item`
15+
method and save both the page object dependencies and the resulting item as a
16+
test fixture. These fixtures can then be used with the ``pytest`` plugin
17+
provided by ``web-poet``.
18+
19+
Configuring the test location
20+
=============================
21+
22+
The ``SCRAPY_POET_TESTS_DIR`` setting specifies where to create the tests. It
23+
can be set in the project settings or with the ``-s`` command argument.
24+
25+
Handling time fields
26+
====================
27+
28+
The tests generated by ``savefixture`` set the :ref:`frozen_time metadata value
29+
<web-poet:web-poet-testing-frozen_time>` to the time of the test creation.

scrapy_poet/commands.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import datetime
2+
from pathlib import Path
3+
from typing import Type
4+
5+
import andi
6+
import scrapy
7+
import time_machine
8+
from scrapy import Request
9+
from scrapy.commands import ScrapyCommand
10+
from scrapy.crawler import Crawler
11+
from scrapy.exceptions import UsageError
12+
from scrapy.http import Response
13+
from scrapy.utils.misc import load_object
14+
from twisted.internet.defer import inlineCallbacks
15+
from web_poet import ItemPage
16+
from web_poet.testing import Fixture
17+
18+
from scrapy_poet import callback_for
19+
from scrapy_poet.downloadermiddlewares import DEFAULT_PROVIDERS, InjectionMiddleware
20+
from scrapy_poet.injection import Injector
21+
22+
saved_dependencies = []
23+
saved_items = []
24+
25+
26+
class SavingInjector(Injector):
27+
@inlineCallbacks
28+
def build_instances_from_providers(
29+
self, request: Request, response: Response, plan: andi.Plan
30+
):
31+
instances = yield super().build_instances_from_providers(
32+
request, response, plan
33+
)
34+
saved_dependencies.extend(instances.values())
35+
return instances
36+
37+
38+
class SavingPipeline:
39+
def process_item(self, item, spider):
40+
saved_items.append(item)
41+
return item
42+
43+
44+
class SavingInjectionMiddleware(InjectionMiddleware):
45+
def __init__(self, crawler: Crawler) -> None:
46+
super().__init__(crawler)
47+
self.injector = SavingInjector(
48+
crawler,
49+
default_providers=DEFAULT_PROVIDERS,
50+
overrides_registry=self.overrides_registry,
51+
)
52+
53+
54+
def spider_for(injectable: Type[ItemPage]) -> Type[scrapy.Spider]:
55+
class InjectableSpider(scrapy.Spider):
56+
name = "injectable"
57+
url = None
58+
59+
def start_requests(self):
60+
yield scrapy.Request(self.url, self.cb)
61+
62+
cb = callback_for(injectable)
63+
64+
return InjectableSpider
65+
66+
67+
class SaveFixtureCommand(ScrapyCommand):
68+
def syntax(self):
69+
return "<page object class> <URL>"
70+
71+
def short_desc(self):
72+
return "Generate a web-poet test for the provided page object and URL"
73+
74+
def run(self, args, opts):
75+
if len(args) != 2:
76+
raise UsageError()
77+
type_name = args[0]
78+
url = args[1]
79+
80+
cls = load_object(type_name)
81+
if not issubclass(cls, ItemPage):
82+
raise UsageError(f"Error: {type_name} is not a descendant of ItemPage")
83+
84+
spider_cls = spider_for(cls)
85+
self.settings["ITEM_PIPELINES"][SavingPipeline] = 100
86+
self.settings["DOWNLOADER_MIDDLEWARES"][SavingInjectionMiddleware] = 543
87+
88+
frozen_time = datetime.datetime.now(datetime.timezone.utc)
89+
with time_machine.travel(frozen_time):
90+
self.crawler_process.crawl(spider_cls, url=url)
91+
self.crawler_process.start()
92+
93+
deps = saved_dependencies
94+
item = saved_items[0]
95+
meta = {
96+
"frozen_time": frozen_time.isoformat(),
97+
}
98+
basedir = Path(self.settings.get("SCRAPY_POET_TESTS_DIR", "fixtures"))
99+
fixture = Fixture.save(basedir / type_name, inputs=deps, item=item, meta=meta)
100+
print(f"\nThe test fixture has been written to {fixture.path}.")

setup.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,20 @@
1515
author_email="[email protected]",
1616
url="https://github.com/scrapinghub/scrapy-poet",
1717
packages=find_packages(exclude=["tests", "example"]),
18+
entry_points={
19+
"scrapy.commands": ["savefixture = scrapy_poet.commands:SaveFixtureCommand"]
20+
},
1821
package_data={"scrapy_poet": ["VERSION"]},
1922
install_requires=[
2023
"andi >= 0.4.1",
2124
"attrs >= 21.3.0",
2225
"parsel >= 1.5.0",
2326
"scrapy >= 2.6.0",
2427
"sqlitedict >= 1.5.0",
28+
"time_machine",
2529
"twisted >= 18.9.0",
2630
"url-matcher >= 0.2.0",
27-
"web-poet >= 0.6.0",
31+
"web-poet >= 0.7.0",
2832
],
2933
classifiers=[
3034
"Development Status :: 3 - Alpha",

tests/test_commands.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import subprocess
2+
import sys
3+
import tempfile
4+
from pathlib import Path
5+
6+
from web_poet.testing import Fixture
7+
8+
9+
def call_scrapy_command(cwd: str, *args: str) -> None:
10+
with tempfile.TemporaryFile() as out:
11+
args = (sys.executable, "-m", "scrapy.cmdline") + args
12+
subprocess.call(args, stdout=out, stderr=out, cwd=cwd)
13+
14+
15+
def test_savefixture(tmp_path) -> None:
16+
project_name = "foo"
17+
cwd = Path(tmp_path)
18+
call_scrapy_command(str(cwd), "startproject", project_name)
19+
cwd /= project_name
20+
type_name = "foo.po.BTSBookPage"
21+
(cwd / project_name / "po.py").write_text(
22+
"""
23+
import attrs
24+
from web_poet import ResponseUrl
25+
from web_poet.pages import WebPage
26+
27+
28+
@attrs.define
29+
class BTSBookPage(WebPage):
30+
31+
response_url: ResponseUrl
32+
33+
def to_item(self):
34+
return {
35+
'url': self.url,
36+
'name': self.css("title::text").get(),
37+
}
38+
"""
39+
)
40+
url = "http://books.toscrape.com/catalogue/the-wedding-pact-the-omalleys-2_767/index.html"
41+
call_scrapy_command(str(cwd), "savefixture", type_name, url)
42+
fixtures_dir = cwd / "fixtures"
43+
fixture_dir = fixtures_dir / type_name / "test-1"
44+
fixture = Fixture(fixture_dir)
45+
assert fixture.is_valid()
46+
assert (fixture.input_path / "HttpResponse-body.html").exists()
47+
assert fixture.meta_path.exists()

0 commit comments

Comments
 (0)