|
| 1 | +import asyncio |
| 2 | +import concurrent |
1 | 3 | import logging
|
| 4 | +import platform |
| 5 | +import threading |
2 | 6 | from typing import Awaitable, Iterator, Optional, Tuple, Union
|
3 | 7 |
|
| 8 | +import scrapy |
4 | 9 | from playwright.async_api import Error, Page, Request, Response
|
5 |
| -from scrapy import Spider |
6 | 10 | from scrapy.http.headers import Headers
|
7 | 11 | from scrapy.utils.python import to_unicode
|
| 12 | +from twisted.internet.defer import Deferred |
8 | 13 | from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
|
9 | 14 |
|
10 | 15 |
|
@@ -53,7 +58,7 @@ def _is_safe_close_error(error: Error) -> bool:
|
53 | 58 |
|
54 | 59 | async def _get_page_content(
|
55 | 60 | page: Page,
|
56 |
| - spider: Spider, |
| 61 | + spider: scrapy.Spider, |
57 | 62 | context_name: str,
|
58 | 63 | scrapy_request_url: str,
|
59 | 64 | scrapy_request_method: str,
|
@@ -89,3 +94,39 @@ async def _get_header_value(
|
89 | 94 | return await resource.header_value(header_name)
|
90 | 95 | except Exception:
|
91 | 96 | return None
|
| 97 | + |
| 98 | + |
| 99 | +if platform.system() == "Windows": |
| 100 | + |
| 101 | + class _WindowsAdapter: |
| 102 | + """Utility class to redirect coroutines to an asyncio event loop running |
| 103 | + in a different thread. This allows to use a ProactorEventLoop, which is |
| 104 | + supported by Playwright on Windows. |
| 105 | + """ |
| 106 | + |
| 107 | + loop = None |
| 108 | + thread = None |
| 109 | + |
| 110 | + @classmethod |
| 111 | + def get_event_loop(cls) -> asyncio.AbstractEventLoop: |
| 112 | + if cls.thread is None: |
| 113 | + if cls.loop is None: |
| 114 | + policy = asyncio.WindowsProactorEventLoopPolicy() # type: ignore |
| 115 | + cls.loop = policy.new_event_loop() |
| 116 | + asyncio.set_event_loop(cls.loop) |
| 117 | + if not cls.loop.is_running(): |
| 118 | + cls.thread = threading.Thread(target=cls.loop.run_forever, daemon=True) |
| 119 | + cls.thread.start() |
| 120 | + return cls.loop |
| 121 | + |
| 122 | + @classmethod |
| 123 | + async def get_result(cls, o) -> concurrent.futures.Future: |
| 124 | + return asyncio.run_coroutine_threadsafe(coro=o, loop=cls.get_event_loop()).result() |
| 125 | + |
| 126 | + def deferred_from_coro(o) -> Deferred: |
| 127 | + if isinstance(o, Deferred): |
| 128 | + return o |
| 129 | + return scrapy.utils.defer.deferred_from_coro(_WindowsAdapter.get_result(o)) |
| 130 | + |
| 131 | +else: |
| 132 | + deferred_from_coro = scrapy.utils.defer.deferred_from_coro |
0 commit comments