@@ -3,7 +3,7 @@ import { load } from 'cheerio';
3
3
import { CheerioCrawlingContext , htmlToText , log , PlaywrightCrawlingContext , sleep , Request } from 'crawlee' ;
4
4
5
5
import { ContentCrawlerStatus , ContentCrawlerTypes } from './const.js' ;
6
- import { addResultToResponse , responseData , sendResponseIfFinished } from './responses.js' ;
6
+ import { addResultToResponse , sendResponseIfFinished } from './responses.js' ;
7
7
import { Output , ContentCrawlerUserData } from './types.js' ;
8
8
import { addTimeMeasureEvent , transformTimeMeasuresToRelative } from './utils.js' ;
9
9
import { processHtml } from './website-content-crawler/html-processing.js' ;
@@ -27,22 +27,6 @@ async function waitForPlaywright({ page }: PlaywrightCrawlingContext, time: numb
27
27
return Promise . race ( [ page . waitForLoadState ( 'networkidle' , { timeout : 0 } ) , sleep ( time - hardDelay ) ] ) ;
28
28
}
29
29
30
- /**
31
- * Checks if the request should time out based on response timeout.
32
- * It verifies if the response data contains the responseId. If not, it sets the request's noRetry flag
33
- * to true and throws an error to cancel the request.
34
- *
35
- * @param {Request } request - The request object to be checked.
36
- * @param {string } responseId - The response ID to look for in the response data.
37
- * @throws {Error } Throws an error if the request times out.
38
- */
39
- function checkTimeoutAndCancelRequest ( request : Request , responseId : string ) {
40
- if ( ! responseData . has ( responseId ) ) {
41
- request . noRetry = true ;
42
- throw new Error ( 'Timed out. Cancelling the request...' ) ;
43
- }
44
- }
45
-
46
30
/**
47
31
* Decide whether to wait based on the remaining time left for the Actor to run.
48
32
* Always waits if the Actor is in the STANDBY_MODE.
@@ -164,9 +148,7 @@ export async function requestHandlerPlaywright(
164
148
context : PlaywrightCrawlingContext < ContentCrawlerUserData > ,
165
149
) {
166
150
const { request, response, page, closeCookieModals } = context ;
167
- const { contentScraperSettings : settings , responseId } = request . userData ;
168
-
169
- checkTimeoutAndCancelRequest ( request , responseId ) ;
151
+ const { contentScraperSettings : settings } = request . userData ;
170
152
171
153
log . info ( `Processing URL: ${ request . url } ` ) ;
172
154
addTimeMeasureEvent ( request . userData , 'playwright-request-start' ) ;
@@ -198,9 +180,6 @@ export async function requestHandlerCheerio(
198
180
context : CheerioCrawlingContext < ContentCrawlerUserData > ,
199
181
) {
200
182
const { $, request, response } = context ;
201
- const { responseId } = request . userData ;
202
-
203
- checkTimeoutAndCancelRequest ( request , responseId ) ;
204
183
205
184
log . info ( `Processing URL: ${ request . url } ` ) ;
206
185
addTimeMeasureEvent ( request . userData , 'cheerio-request-start' ) ;
0 commit comments