Merge pull request #102 from scrapinghub/testing-cmd

kmike · web-flow · commit 61e6cbc3c560 · 2023-01-18T17:09:02.000+05:00
Command for creating test fixtures.
diff --git a/docs/index.rst b/docs/index.rst
@@ -45,6 +45,7 @@ To get started, see :ref:`intro-install` and :ref:`intro-tutorial`.
 
    overrides
    providers
+   testing
 
 .. toctree::
    :caption: All the rest
diff --git a/docs/settings.rst b/docs/settings.rst
@@ -78,3 +78,13 @@ developing Page Objects.
 
 It's `recommended` to set this off into ``False`` by default since you might miss
 out on sporadic errors.
+
+
+SCRAPY_POET_TESTS_DIR
+---------------------
+
+Default: ``fixtures``
+
+Sets the location where the ``savefixture`` command creates tests.
+
+More info at :ref:`testing`.
diff --git a/docs/testing.rst b/docs/testing.rst
@@ -0,0 +1,29 @@
+.. _testing:
+
+======================
+Tests for Page Objects
+======================
+
+``web-poet`` provides :ref:`tools for testing page objects
+<web-poet:web-poet-testing>`. ``scrapy-poet`` projects can use a Scrapy command
+to easily generate tests::
+
+    scrapy savefixture my_project.pages.MyItemPage 'https://quotes.toscrape.com/page/1/'
+
+This will request the provided page, create an instance of the provided page
+object for this page, request its :meth:`~web_poet.pages.ItemPage.to_item`
+method and save both the page object dependencies and the resulting item as a
+test fixture. These fixtures can then be used with the ``pytest`` plugin
+provided by ``web-poet``.
+
+Configuring the test location
+=============================
+
+The ``SCRAPY_POET_TESTS_DIR`` setting specifies where to create the tests. It
+can be set in the project settings or with the ``-s`` command argument.
+
+Handling time fields
+====================
+
+The tests generated by ``savefixture`` set the :ref:`frozen_time metadata value
+<web-poet:web-poet-testing-frozen_time>` to the time of the test creation.
diff --git a/scrapy_poet/commands.py b/scrapy_poet/commands.py
@@ -0,0 +1,100 @@
+import datetime
+from pathlib import Path
+from typing import Type
+
+import andi
+import scrapy
+import time_machine
+from scrapy import Request
+from scrapy.commands import ScrapyCommand
+from scrapy.crawler import Crawler
+from scrapy.exceptions import UsageError
+from scrapy.http import Response
+from scrapy.utils.misc import load_object
+from twisted.internet.defer import inlineCallbacks
+from web_poet import ItemPage
+from web_poet.testing import Fixture
+
+from scrapy_poet import callback_for
+from scrapy_poet.downloadermiddlewares import DEFAULT_PROVIDERS, InjectionMiddleware
+from scrapy_poet.injection import Injector
+
+saved_dependencies = []
+saved_items = []
+
+
+class SavingInjector(Injector):
+    @inlineCallbacks
+    def build_instances_from_providers(
+        self, request: Request, response: Response, plan: andi.Plan
+    ):
+        instances = yield super().build_instances_from_providers(
+            request, response, plan
+        )
+        saved_dependencies.extend(instances.values())
+        return instances
+
+
+class SavingPipeline:
+    def process_item(self, item, spider):
+        saved_items.append(item)
+        return item
+
+
+class SavingInjectionMiddleware(InjectionMiddleware):
+    def __init__(self, crawler: Crawler) -> None:
+        super().__init__(crawler)
+        self.injector = SavingInjector(
+            crawler,
+            default_providers=DEFAULT_PROVIDERS,
+            overrides_registry=self.overrides_registry,
+        )
+
+
+def spider_for(injectable: Type[ItemPage]) -> Type[scrapy.Spider]:
+    class InjectableSpider(scrapy.Spider):
+        name = "injectable"
+        url = None
+
+        def start_requests(self):
+            yield scrapy.Request(self.url, self.cb)
+
+        cb = callback_for(injectable)
+
+    return InjectableSpider
+
+
+class SaveFixtureCommand(ScrapyCommand):
+    def syntax(self):
+        return "<page object class> <URL>"
+
+    def short_desc(self):
+        return "Generate a web-poet test for the provided page object and URL"
+
+    def run(self, args, opts):
+        if len(args) != 2:
+            raise UsageError()
+        type_name = args[0]
+        url = args[1]
+
+        cls = load_object(type_name)
+        if not issubclass(cls, ItemPage):
+            raise UsageError(f"Error: {type_name} is not a descendant of ItemPage")
+
+        spider_cls = spider_for(cls)
+        self.settings["ITEM_PIPELINES"][SavingPipeline] = 100
+        self.settings["DOWNLOADER_MIDDLEWARES"][SavingInjectionMiddleware] = 543
+
+        frozen_time = datetime.datetime.now(datetime.timezone.utc)
+        with time_machine.travel(frozen_time):
+            self.crawler_process.crawl(spider_cls, url=url)
+            self.crawler_process.start()
+
+        deps = saved_dependencies
+        item = saved_items[0]
+        meta = {
+            "frozen_time": frozen_time.isoformat(),
+        }
+        basedir = Path(self.settings.get("SCRAPY_POET_TESTS_DIR", "fixtures"))
+        fixture = Fixture.save(basedir / type_name, inputs=deps, item=item, meta=meta)
+        print(f"\nThe test fixture has been written to {fixture.path}.")
diff --git a/setup.py b/setup.py
@@ -15,16 +15,20 @@
     author_email="kmike84@gmail.com",
     url="https://github.com/scrapinghub/scrapy-poet",
     packages=find_packages(exclude=["tests", "example"]),
+    entry_points={
+        "scrapy.commands": ["savefixture = scrapy_poet.commands:SaveFixtureCommand"]
+    },
     package_data={"scrapy_poet": ["VERSION"]},
     install_requires=[
         "andi >= 0.4.1",
         "attrs >= 21.3.0",
         "parsel >= 1.5.0",
         "scrapy >= 2.6.0",
         "sqlitedict >= 1.5.0",
+        "time_machine",
         "twisted >= 18.9.0",
         "url-matcher >= 0.2.0",
-        "web-poet >= 0.6.0",
+        "web-poet >= 0.7.0",
     ],
     classifiers=[
         "Development Status :: 3 - Alpha",
diff --git a/tests/test_commands.py b/tests/test_commands.py
@@ -0,0 +1,47 @@
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+from web_poet.testing import Fixture
+
+
+def call_scrapy_command(cwd: str, *args: str) -> None:
+    with tempfile.TemporaryFile() as out:
+        args = (sys.executable, "-m", "scrapy.cmdline") + args
+        subprocess.call(args, stdout=out, stderr=out, cwd=cwd)
+
+
+def test_savefixture(tmp_path) -> None:
+    project_name = "foo"
+    cwd = Path(tmp_path)
+    call_scrapy_command(str(cwd), "startproject", project_name)
+    cwd /= project_name
+    type_name = "foo.po.BTSBookPage"
+    (cwd / project_name / "po.py").write_text(
+        """
+import attrs
+from web_poet import ResponseUrl
+from web_poet.pages import WebPage
+
+
+@attrs.define
+class BTSBookPage(WebPage):
+
+    response_url: ResponseUrl
+
+    def to_item(self):
+        return {
+            'url': self.url,
+            'name': self.css("title::text").get(),
+        }
+"""
+    )
+    url = "http://books.toscrape.com/catalogue/the-wedding-pact-the-omalleys-2_767/index.html"
+    call_scrapy_command(str(cwd), "savefixture", type_name, url)
+    fixtures_dir = cwd / "fixtures"
+    fixture_dir = fixtures_dir / type_name / "test-1"
+    fixture = Fixture(fixture_dir)
+    assert fixture.is_valid()
+    assert (fixture.input_path / "HttpResponse-body.html").exists()
+    assert fixture.meta_path.exists()