Skip to content

Commit b350ef9

Browse files
committed
Connect to remote browser with BrowserType.connect
1 parent 030c8a1 commit b350ef9

File tree

2 files changed

+27
-3
lines changed

2 files changed

+27
-3
lines changed

scrapy_playwright/handler.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from scrapy import Spider, signals
2323
from scrapy.core.downloader.handlers.http import HTTPDownloadHandler
2424
from scrapy.crawler import Crawler
25+
from scrapy.exceptions import NotSupported
2526
from scrapy.http import Request, Response
2627
from scrapy.http.headers import Headers
2728
from scrapy.responsetypes import responsetypes
@@ -67,6 +68,8 @@ class BrowserContextWrapper:
6768
class Config:
6869
cdp_url: Optional[str]
6970
cdp_kwargs: dict
71+
connect_url: Optional[str]
72+
connect_kwargs: dict
7073
browser_type_name: str
7174
launch_options: dict
7275
max_pages_per_context: int
@@ -76,20 +79,27 @@ class Config:
7679

7780
@classmethod
7881
def from_settings(cls, settings: Settings) -> "Config":
82+
if settings.get("PLAYWRIGHT_CDP_URL") and settings.get("PLAYWRIGHT_CONNECT_URL"):
83+
msg = "Setting both PLAYWRIGHT_CDP_URL and PLAYWRIGHT_CONNECT_URL is not supported"
84+
logger.error(msg)
85+
raise NotSupported(msg)
7986
cfg = cls(
8087
cdp_url=settings.get("PLAYWRIGHT_CDP_URL"),
8188
cdp_kwargs=settings.getdict("PLAYWRIGHT_CDP_KWARGS") or {},
89+
connect_url=settings.get("PLAYWRIGHT_CONNECT_URL"),
90+
connect_kwargs=settings.getdict("PLAYWRIGHT_CONNECT_KWARGS") or {},
8291
browser_type_name=settings.get("PLAYWRIGHT_BROWSER_TYPE") or DEFAULT_BROWSER_TYPE,
8392
launch_options=settings.getdict("PLAYWRIGHT_LAUNCH_OPTIONS") or {},
8493
max_pages_per_context=settings.getint("PLAYWRIGHT_MAX_PAGES_PER_CONTEXT"),
8594
max_contexts=settings.getint("PLAYWRIGHT_MAX_CONTEXTS") or None,
8695
startup_context_kwargs=settings.getdict("PLAYWRIGHT_CONTEXTS"),
8796
)
8897
cfg.cdp_kwargs.pop("endpoint_url", None)
98+
cfg.connect_kwargs.pop("ws_endpoint", None)
8999
if not cfg.max_pages_per_context:
90100
cfg.max_pages_per_context = settings.getint("CONCURRENT_REQUESTS")
91-
if cfg.cdp_url and cfg.launch_options:
92-
logger.warning("PLAYWRIGHT_CDP_URL is set, ignoring PLAYWRIGHT_LAUNCH_OPTIONS")
101+
if (cfg.cdp_url or cfg.connect_url) and cfg.launch_options:
102+
logger.warning("Connecting to remote browser, ignoring PLAYWRIGHT_LAUNCH_OPTIONS")
93103
if "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT" in settings:
94104
with suppress(TypeError, ValueError):
95105
cfg.navigation_timeout = float(settings["PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT"])
@@ -172,6 +182,15 @@ async def _maybe_connect_devtools(self) -> None:
172182
)
173183
logger.info("Connected using CDP: %s", self.config.cdp_url)
174184

185+
async def _maybe_connect_remote(self) -> None:
186+
async with self.browser_launch_lock:
187+
if not hasattr(self, "browser"):
188+
logger.info("Connecting to remote Playwright: %s", self.config.connect_url)
189+
self.browser = await self.browser_type.connect(
190+
self.config.connect_url, **self.config.connect_kwargs
191+
)
192+
logger.info("Connected to remote Playwright: %s", self.config.connect_kwargs)
193+
175194
async def _create_browser_context(
176195
self,
177196
name: str,
@@ -193,6 +212,11 @@ async def _create_browser_context(
193212
context = await self.browser.new_context(**context_kwargs)
194213
persistent = False
195214
remote = True
215+
elif self.config.connect_url:
216+
await self._maybe_connect_remote()
217+
context = await self.browser.new_context(**context_kwargs)
218+
persistent = False
219+
remote = True
196220
else:
197221
await self._maybe_launch_browser()
198222
context = await self.browser.new_context(**context_kwargs)

tests/tests_asyncio/test_remote.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,5 +76,5 @@ async def test_devtools(self):
7676
assert (
7777
"scrapy-playwright",
7878
logging.WARNING,
79-
"PLAYWRIGHT_CDP_URL is set, ignoring PLAYWRIGHT_LAUNCH_OPTIONS",
79+
"Connecting to remote browser, ignoring PLAYWRIGHT_LAUNCH_OPTIONS",
8080
) in self._caplog.record_tuples

0 commit comments

Comments
 (0)