Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .actor/input_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,12 @@
"description": "If enabled, the Actor attempts to close or remove cookie consent dialogs to improve the quality of extracted text. Note that this setting increases the latency.",
"default": true
},
"blockMedia": {
"title": "Block media resources",
"type": "boolean",
"description": "If enabled, the Actor will block loading of images, videos and CSS resources when using the Playwright browser. This can improve performance and reduce bandwidth usage.",
"default": false
},
"debugMode": {
"title": "Enable debug mode",
"type": "boolean",
Expand Down
21 changes: 21 additions & 0 deletions src/input.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,27 @@ function createPlaywrightCrawlerOptions(input: Input, proxy: ProxyConfiguration
maxConcurrency,
minConcurrency,
},
preNavigationHooks: input.blockMedia ? [
async ({ page }) => {
await page.route('**/*', async (route) => {
const resourceType = route.request().resourceType();
const url = route.request().url();

// Block if it's an image/video/css resource type or has an image/video extension
if (
resourceType === 'image'
|| resourceType === 'video'
|| resourceType === 'media'
|| resourceType === 'stylesheet'
|| /\.(jpg|jpeg|png|gif|bmp|webp|mp4|webm|ogg|mov|css)$/i.test(url)
) {
await route.abort();
} else {
await route.continue();
}
});
},
] : [],
},
};
}
Expand Down
1 change: 1 addition & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ export type Input = {
removeElementsCssSelector: string;
removeCookieWarnings: boolean;
scrapingTool: 'browser-playwright' | 'raw-http';
blockMedia: boolean;
};

export type StandbyInput = Input & {
Expand Down
Loading