22
22
from scrapy import Spider , signals
23
23
from scrapy .core .downloader .handlers .http import HTTPDownloadHandler
24
24
from scrapy .crawler import Crawler
25
+ from scrapy .exceptions import NotSupported
25
26
from scrapy .http import Request , Response
26
27
from scrapy .http .headers import Headers
27
28
from scrapy .responsetypes import responsetypes
@@ -67,6 +68,8 @@ class BrowserContextWrapper:
67
68
class Config :
68
69
cdp_url : Optional [str ]
69
70
cdp_kwargs : dict
71
+ connect_url : Optional [str ]
72
+ connect_kwargs : dict
70
73
browser_type_name : str
71
74
launch_options : dict
72
75
max_pages_per_context : int
@@ -76,20 +79,27 @@ class Config:
76
79
77
80
@classmethod
78
81
def from_settings (cls , settings : Settings ) -> "Config" :
82
+ if settings .get ("PLAYWRIGHT_CDP_URL" ) and settings .get ("PLAYWRIGHT_CONNECT_URL" ):
83
+ msg = "Setting both PLAYWRIGHT_CDP_URL and PLAYWRIGHT_CONNECT_URL is not supported"
84
+ logger .error (msg )
85
+ raise NotSupported (msg )
79
86
cfg = cls (
80
87
cdp_url = settings .get ("PLAYWRIGHT_CDP_URL" ),
81
88
cdp_kwargs = settings .getdict ("PLAYWRIGHT_CDP_KWARGS" ) or {},
89
+ connect_url = settings .get ("PLAYWRIGHT_CONNECT_URL" ),
90
+ connect_kwargs = settings .getdict ("PLAYWRIGHT_CONNECT_KWARGS" ) or {},
82
91
browser_type_name = settings .get ("PLAYWRIGHT_BROWSER_TYPE" ) or DEFAULT_BROWSER_TYPE ,
83
92
launch_options = settings .getdict ("PLAYWRIGHT_LAUNCH_OPTIONS" ) or {},
84
93
max_pages_per_context = settings .getint ("PLAYWRIGHT_MAX_PAGES_PER_CONTEXT" ),
85
94
max_contexts = settings .getint ("PLAYWRIGHT_MAX_CONTEXTS" ) or None ,
86
95
startup_context_kwargs = settings .getdict ("PLAYWRIGHT_CONTEXTS" ),
87
96
)
88
97
cfg .cdp_kwargs .pop ("endpoint_url" , None )
98
+ cfg .connect_kwargs .pop ("ws_endpoint" , None )
89
99
if not cfg .max_pages_per_context :
90
100
cfg .max_pages_per_context = settings .getint ("CONCURRENT_REQUESTS" )
91
- if cfg .cdp_url and cfg .launch_options :
92
- logger .warning ("PLAYWRIGHT_CDP_URL is set , ignoring PLAYWRIGHT_LAUNCH_OPTIONS" )
101
+ if ( cfg .cdp_url or cfg . connect_url ) and cfg .launch_options :
102
+ logger .warning ("Connecting to remote browser , ignoring PLAYWRIGHT_LAUNCH_OPTIONS" )
93
103
if "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT" in settings :
94
104
with suppress (TypeError , ValueError ):
95
105
cfg .navigation_timeout = float (settings ["PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT" ])
@@ -172,6 +182,15 @@ async def _maybe_connect_devtools(self) -> None:
172
182
)
173
183
logger .info ("Connected using CDP: %s" , self .config .cdp_url )
174
184
185
+ async def _maybe_connect_remote (self ) -> None :
186
+ async with self .browser_launch_lock :
187
+ if not hasattr (self , "browser" ):
188
+ logger .info ("Connecting to remote Playwright: %s" , self .config .connect_url )
189
+ self .browser = await self .browser_type .connect (
190
+ self .config .connect_url , ** self .config .connect_kwargs
191
+ )
192
+ logger .info ("Connected to remote Playwright: %s" , self .config .connect_kwargs )
193
+
175
194
async def _create_browser_context (
176
195
self ,
177
196
name : str ,
@@ -193,6 +212,11 @@ async def _create_browser_context(
193
212
context = await self .browser .new_context (** context_kwargs )
194
213
persistent = False
195
214
remote = True
215
+ elif self .config .connect_url :
216
+ await self ._maybe_connect_remote ()
217
+ context = await self .browser .new_context (** context_kwargs )
218
+ persistent = False
219
+ remote = True
196
220
else :
197
221
await self ._maybe_launch_browser ()
198
222
context = await self .browser .new_context (** context_kwargs )
0 commit comments