Skip to content

Commit 19bfbb6

Browse files
authored
Revert "fix: cancel timed out requests (#65)"
This reverts commit d9eddc7.
1 parent d9eddc7 commit 19bfbb6

File tree

3 files changed

+4
-30
lines changed

3 files changed

+4
-30
lines changed

Diff for: CHANGELOG.md

-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
11
This changelog summarizes all changes of the RAG Web Browser
22

3-
### 1.0.13 (2025-03-27)
4-
5-
🐛 Bug Fixes
6-
- Cancel crawling requests from timed-out search queries
7-
83
### 1.0.12 (2025-03-24)
94

105
🐛 Bug Fixes

Diff for: src/request-handler.ts

+2-23
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { load } from 'cheerio';
33
import { CheerioCrawlingContext, htmlToText, log, PlaywrightCrawlingContext, sleep, Request } from 'crawlee';
44

55
import { ContentCrawlerStatus, ContentCrawlerTypes } from './const.js';
6-
import { addResultToResponse, responseData, sendResponseIfFinished } from './responses.js';
6+
import { addResultToResponse, sendResponseIfFinished } from './responses.js';
77
import { Output, ContentCrawlerUserData } from './types.js';
88
import { addTimeMeasureEvent, transformTimeMeasuresToRelative } from './utils.js';
99
import { processHtml } from './website-content-crawler/html-processing.js';
@@ -27,22 +27,6 @@ async function waitForPlaywright({ page }: PlaywrightCrawlingContext, time: numb
2727
return Promise.race([page.waitForLoadState('networkidle', { timeout: 0 }), sleep(time - hardDelay)]);
2828
}
2929

30-
/**
31-
* Checks if the request should time out based on response timeout.
32-
* It verifies if the response data contains the responseId. If not, it sets the request's noRetry flag
33-
* to true and throws an error to cancel the request.
34-
*
35-
* @param {Request} request - The request object to be checked.
36-
* @param {string} responseId - The response ID to look for in the response data.
37-
* @throws {Error} Throws an error if the request times out.
38-
*/
39-
function checkTimeoutAndCancelRequest(request: Request, responseId: string) {
40-
if (!responseData.has(responseId)) {
41-
request.noRetry = true;
42-
throw new Error('Timed out. Cancelling the request...');
43-
}
44-
}
45-
4630
/**
4731
* Decide whether to wait based on the remaining time left for the Actor to run.
4832
* Always waits if the Actor is in the STANDBY_MODE.
@@ -164,9 +148,7 @@ export async function requestHandlerPlaywright(
164148
context: PlaywrightCrawlingContext<ContentCrawlerUserData>,
165149
) {
166150
const { request, response, page, closeCookieModals } = context;
167-
const { contentScraperSettings: settings, responseId } = request.userData;
168-
169-
checkTimeoutAndCancelRequest(request, responseId);
151+
const { contentScraperSettings: settings } = request.userData;
170152

171153
log.info(`Processing URL: ${request.url}`);
172154
addTimeMeasureEvent(request.userData, 'playwright-request-start');
@@ -198,9 +180,6 @@ export async function requestHandlerCheerio(
198180
context: CheerioCrawlingContext<ContentCrawlerUserData>,
199181
) {
200182
const { $, request, response } = context;
201-
const { responseId } = request.userData;
202-
203-
checkTimeoutAndCancelRequest(request, responseId);
204183

205184
log.info(`Processing URL: ${request.url}`);
206185
addTimeMeasureEvent(request.userData, 'cheerio-request-start');

Diff for: src/responses.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ type ResponseData = {
1111
timeoutId?: NodeJS.Timeout;
1212
};
1313

14-
export const responseData = new Map<string, ResponseData>();
14+
const responseData = new Map<string, ResponseData>();
1515

1616
/**
1717
* Helper function to get response object by responseId.
@@ -39,7 +39,7 @@ export function createResponsePromise(responseId: string, timeoutSecs: number):
3939

4040
// Set a timeout to reject the promise if it takes too long
4141
data.timeoutId = setTimeout(() => {
42-
sendResponseError(responseId, 'Timed out.');
42+
sendResponseError(responseId, 'Timed out');
4343
}, timeoutSecs * 1000);
4444
});
4545
}

0 commit comments

Comments
 (0)