-
-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
Copy pathalgolia.ts
207 lines (185 loc) Β· 6.56 KB
/
algolia.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
/* eslint-disable no-console */
/**
* This script is used to index the static docs HTML files generated by Next.js into Algolia.
*
* It's a migration from the Gatsby solution,
* which relied on the `gatsby-plugin-algolia`: https://github.com/getsentry/sentry-docs/blob/3c1361bdcb23a0fcee1f3019bca7c14a5d632162/src/gatsby/utils/algolia.ts
*
* The record generation logic is reused as is, with *two* notable changes:
* 1. We manually feed the HTML files to the record generation function
* 2. We manually upload the records to Algolia
*
* This script is meant to be run on a GitHub Action (see `.github/workflows/algolia-index.yml`).
*
* If you want to run it locally,
* 1. Make sure you have the required env vars set up
* 2. Be careful to change to `DOCS_INDEX_NAME` to a value different
* from the productoin docs index name (specified in the `@sentry-internal/global-search`)
* to avoid accidental deletions
* 3. Run a production build of the app before running this script
*/
import fs from 'fs';
import {join} from 'path';
import {
extrapolate,
htmlToAlgoliaRecord,
sentryAlgoliaIndexSettings,
standardSDKSlug,
} from '@sentry-internal/global-search';
import algoliasearch, {SearchIndex} from 'algoliasearch';
import {isDeveloperDocs} from 'sentry-docs/isDeveloperDocs';
import {getDevDocsFrontMatter, getDocsFrontMatter} from '../src/mdx';
import {FrontMatter} from '../src/types';
// This is the path to the static files generated by Next.js for the app directory
// The directory structure is not documented and could change in the future
// The ideal way to do this is probably to run production server and fetch the HTML from there.
const staticHtmlFilesPath = join(process.cwd(), '.next', 'server', 'app');
const ALGOLIA_APP_ID = process.env.ALGOLIA_APP_ID;
const ALGOLIA_API_KEY = process.env.ALGOLIA_API_KEY;
const DOCS_INDEX_NAME = process.env.DOCS_INDEX_NAME;
// If set to true, the script will skip indexing a page if it encounters an error
const ALOGOLIA_SKIP_ON_ERROR = process.env.ALOGOLIA_SKIP_ON_ERROR === 'true';
if (!ALGOLIA_APP_ID) {
throw new Error('`ALGOLIA_APP_ID` env var must be configured in repo secrets');
}
if (!ALGOLIA_API_KEY) {
throw new Error('`ALGOLIA_API_KEY` env var must be configured in repo secrets');
}
if (!DOCS_INDEX_NAME) {
throw new Error('`DOCS_INDEX_NAME` env var must be configured in repo secrets');
}
const client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_API_KEY);
const index = client.initIndex(DOCS_INDEX_NAME);
indexAndUpload();
async function indexAndUpload() {
// the page front matters are the source of truth for the static doc routes
// as they are used directly by generateStaticParams() on [[..path]] page
const pageFrontMatters = isDeveloperDocs
? getDevDocsFrontMatter()
: await getDocsFrontMatter();
const records = await generateAlogliaRecords(pageFrontMatters);
console.log('π₯ Generated %d new Algolia records.', records.length);
const existingRecordIds = await fetchExistingRecordIds(index);
console.log(
'π₯ Found %d existing Algolia records in `%s`',
existingRecordIds.length,
DOCS_INDEX_NAME
);
console.log('π₯ Saving new records to `%s`...', DOCS_INDEX_NAME);
const saveResult = await index.saveObjects(records, {
batchSize: 10000,
autoGenerateObjectIDIfNotExist: true,
});
const newRecordIDs = new Set(saveResult.objectIDs);
console.log('π₯ Saved %d records', newRecordIDs.size);
const recordsToDelete = existingRecordIds.filter(id => !newRecordIDs.has(id));
if (recordsToDelete.length === 0) {
console.log('π₯ No stale records to delete');
return;
}
console.log('π₯ Deleting old (stale) records ...');
const deleteResult = await index.deleteObjects(recordsToDelete);
console.log(
'π₯ Deleted %d stale records from `%s`',
deleteResult.objectIDs.length,
DOCS_INDEX_NAME
);
if (!isDeveloperDocs) {
console.log('π₯ Applying custom index settings ...');
await index.setSettings(sentryAlgoliaIndexSettings);
console.log(`π₯ Applied custom settings to ${DOCS_INDEX_NAME}`);
}
}
async function fetchExistingRecordIds(algoliaIndex: SearchIndex) {
console.log('π₯ fetching existing records ids ...');
const existingRecordIds = new Set<string>();
await algoliaIndex.browseObjects({
attributesToRetrieve: ['objectID'],
batch: chunk => {
chunk.forEach(record => {
existingRecordIds.add(record.objectID);
});
},
});
return Array.from(existingRecordIds);
}
async function generateAlogliaRecords(pageFrontMatters: FrontMatter[]) {
const records = await Promise.all(
pageFrontMatters
.filter(
frontMatter => !frontMatter.draft && !frontMatter.noindex && frontMatter.title
)
.map(getRecords)
);
return records.flat();
}
/**
* Framework popularity ranking map - frameworks listed in order of priority
*/
const frameworkPopularity: Record<string, number> = {
nextjs: 1,
react: 2,
'react-native': 3,
python: 4,
laravel: 5,
node: 6,
vue: 7,
ios: 8,
angular: 9,
nestjs: 10,
django: 11,
spring: 12,
go: 13,
ruby: 14,
kotlin: 15,
dart: 16,
unity: 17,
};
const getPopularity = (sdk: string | undefined, framework: string | undefined) => {
if (sdk && frameworkPopularity[sdk]) {
return frameworkPopularity[sdk];
}
if (framework && frameworkPopularity[framework]) {
return frameworkPopularity[framework];
}
return Number.MAX_SAFE_INTEGER;
};
async function getRecords(pageFm: FrontMatter) {
console.log('processing:', pageFm.slug);
let sdk: string | undefined;
let framework: string | undefined;
if (pageFm.slug.includes('platforms/')) {
sdk = standardSDKSlug(pageFm.slug.split('/')[1])?.slug as string;
framework = sdk;
if (pageFm.slug.includes('/guides/')) {
framework = standardSDKSlug(pageFm.slug.split('/')[3])?.slug as string;
}
}
try {
const htmlFile = join(staticHtmlFilesPath, pageFm.slug + '.html');
const html = fs.readFileSync(htmlFile).toString();
const pageRecords = await htmlToAlgoliaRecord(
html,
{
title: pageFm.title,
url: '/' + pageFm.slug + '/',
pathSegments: extrapolate(pageFm.slug, '/').map(x => `/${x}/`),
keywords: pageFm.keywords,
sdk,
framework,
...(!isDeveloperDocs && {popularity: getPopularity(sdk, framework)}),
},
'#main'
);
return pageRecords;
} catch (e) {
const error = new Error(`π΄ Error processing ${pageFm.slug}: ${e.message}`, {
cause: e,
});
if (ALOGOLIA_SKIP_ON_ERROR) {
console.error(error);
return [];
}
throw error;
}
}