Skip to content

Commit 52b5066

Browse files
authored
fix: update selectors for organic search results (#63)
1 parent 9327510 commit 52b5066

File tree

2 files changed

+18
-7
lines changed

2 files changed

+18
-7
lines changed

Diff for: CHANGELOG.md

+10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
This changelog summarizes all changes of the RAG Web Browser
22

3+
### 1.0.11 (2025-03-21)
4+
5+
🐛 Bug Fixes
6+
- Selector for organic search results
7+
8+
### 1.0.10 (2025-03-19)
9+
10+
🚀 Features
11+
- Handle all query parameters in the standby mode (including proxy)
12+
313
### 1.0.9 (2025-03-14)
414

515
🚀 Features

Diff for: src/google-search/google-extractors-urls.ts

+8-7
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,15 @@ export const deduplicateResults = <T extends { title?: string; url?: string }>(r
2424
* Extracts search results from the given selectors (source: @apify/google-search).
2525
*/
2626
const extractResultsFromSelectors = ($: CheerioAPI, selectors: string[]) => {
27-
const searchResults = [];
27+
const searchResults: OrganicResult[] = [];
2828
const selector = selectors.join(', ');
2929
for (const resultEl of $(selector)) {
30-
searchResults.push(
31-
...$(resultEl)
32-
.map((_i, el) => parseResult($, el as Element))
33-
.toArray(),
34-
);
30+
const results = $(resultEl).map((_i, el) => parseResult($, el as Element)).toArray();
31+
for (const result of results) {
32+
if (result.title && result.url) {
33+
searchResults.push(result);
34+
}
35+
}
3536
}
3637
return searchResults;
3738
};
@@ -59,7 +60,7 @@ export const scrapeOrganicResults = ($: CheerioAPI) => {
5960
const resultSelectors2023January = [
6061
'.hlcw0c', // Top result with site links
6162
'.g.Ww4FFb', // General search results
62-
'.MjjYud .g', // General catch all. Used for one main + one nested from the same site. Added in Jun 2023, not very good selector
63+
'.MjjYud', // General search results 2025 March, this includes also images so we need to add a check that results has both title and url
6364
'.g .tF2Cxc>.yuRUbf', // old search selector 2021 January
6465
'.g [data-header-feature="0"]', // old search selector 2022 January
6566
'.g .rc', // very old selector

0 commit comments

Comments
 (0)