Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit ddc09fb

Browse files
committedJan 6, 2025
feat(pdf): build pdf by new tsx script
1 parent 57de449 commit ddc09fb

29 files changed

+327
-33
lines changed
 

‎.gitignore

+3-5
Original file line numberDiff line numberDiff line change
@@ -35,20 +35,18 @@ pdf/kotlin-reference.pdf
3535
pdf/tmp.html
3636
*.pyc
3737
google-credentials.json
38-
./package-lock.json
38+
package-lock.json
3939
.env
4040
pages/api/
4141
external/
4242
assets/externals
4343
pages/docs/tutorials/kotlin-for-py/
4444
_teamcity
45-
dist
45+
/dist
4646
libs
4747
generated
4848
.next
4949
.teamcity/*.iml
5050

51-
/scripts/doindex/package-lock.json
52-
/search-report*
53-
/reports
51+
/reports*
5452
/data/page_views_map.json

‎.teamcity/builds/TemplateSearchIndex.kt

+2-6
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import jetbrains.buildServer.configs.kotlin.buildSteps.ScriptBuildStep
77
import jetbrains.buildServer.configs.kotlin.triggers.schedule
88
import vcsRoots.KotlinLangOrg
99

10-
const val SCRIPT_PATH = "scripts/doindex";
10+
const val SCRIPT_PATH = "scripts/dist";
1111

1212
fun scriptDistAnalyze(block: ScriptBuildStep.() -> Unit) = ScriptBuildStep {
1313
id = "script-dist-analyze"
@@ -42,11 +42,7 @@ abstract class TemplateSearchIndex(init: BuildType.() -> Unit) : BuildType({
4242
}
4343

4444
vcs {
45-
root(
46-
KotlinLangOrg, """
47-
$SCRIPT_PATH
48-
""".trimIndent()
49-
)
45+
root(KotlinLangOrg, "$SCRIPT_PATH/")
5046
cleanCheckout = true
5147
showDependenciesChanges = true
5248
}

‎.teamcity/builds/apiReferences/BuildApiPages.kt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package builds.apiReferences
22

33
import BuildParams.DOKKA_TEMPLATES_VERSION
4+
import builds.SCRIPT_PATH
45
import builds.scriptDistAnalyze
56
import jetbrains.buildServer.configs.kotlin.BuildStep
67
import jetbrains.buildServer.configs.kotlin.BuildSteps
@@ -39,7 +40,7 @@ abstract class BuildApiPages(
3940
artifactRules = "$pagesRoot/** => pages.zip"
4041

4142
vcs {
42-
root(KotlinLangOrg, "scripts/doindex/")
43+
root(KotlinLangOrg, "$SCRIPT_PATH/")
4344
}
4445

4546
params {

‎.teamcity/builds/apiReferences/stdlib/BuildStdlibApiReference.kt

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
package builds.apiReferences.stdlib
22

33
import BuildParams.KOTLIN_CORE_API_BUILD_ID
4+
import builds.SCRIPT_PATH
45
import builds.apiReferences.scriptGenerateSitemap
56
import builds.apiReferences.scriptNoRobots
67
import jetbrains.buildServer.configs.kotlin.AbsoluteId
78
import jetbrains.buildServer.configs.kotlin.BuildType
89
import jetbrains.buildServer.configs.kotlin.buildSteps.script
10+
import vcsRoots.KotlinLangOrg
911

1012
private const val PAGES_ROOT = "dist/api/core"
1113

@@ -14,11 +16,7 @@ object BuildStdlibApiReference : BuildType({
1416
description = "Build pages for Kotlin Core API"
1517

1618
vcs {
17-
root(
18-
vcsRoots.KotlinLangOrg, """
19-
scripts/doindex/
20-
""".trimIndent()
21-
)
19+
root(KotlinLangOrg, "$SCRIPT_PATH/")
2220
}
2321

2422
artifactRules = """

‎.teamcity/builds/kotlinlang/buidTypes/PdfGenerator.kt

+41-15
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,72 @@
11
package builds.kotlinlang.buidTypes
22

3+
import builds.SCRIPT_PATH
34
import builds.kotlinlang.templates.DockerImageBuilder
4-
import jetbrains.buildServer.configs.kotlin.AbsoluteId
55
import jetbrains.buildServer.configs.kotlin.BuildType
6+
import jetbrains.buildServer.configs.kotlin.FailureAction
67
import jetbrains.buildServer.configs.kotlin.buildSteps.script
78

8-
99
object PdfGenerator : BuildType({
1010
name = "PDF Generator"
1111
description = "Build PDF reference https://kotlinlang.org/docs/"
1212

1313
templates(DockerImageBuilder)
1414

15-
artifactRules = "pdf/kotlin-docs.pdf => kotlin-docs.pdf"
15+
artifactRules = """
16+
dist/docs/pdf.html
17+
pdf/kotlin-docs.pdf
18+
""".trimIndent()
19+
20+
requirements {
21+
doesNotContain("docker.server.osType", "windows")
22+
}
1623

1724
steps {
1825
script {
26+
id = "script-dist-pdf-html"
27+
name = "Generate pdf.html"
28+
//language=bash
29+
scriptContent = """
30+
#!/bin/sh
31+
set -e
32+
npm install
33+
npm run generate-pdf
34+
""".trimIndent()
35+
dockerImage = "node:22-alpine"
36+
workingDir = SCRIPT_PATH
37+
}
38+
script {
39+
name = "Generate PDF"
40+
//language=sh
1941
scriptContent = """
20-
#!/bin/bash
21-
22-
mv ./dist/docs/pdfSourceKR.html ./dist/docs/pdf.html
42+
# install legacy wkhtmltopdf deps
43+
apt update
44+
apt install -y xfonts-75dpi xfonts-100dpi libjpeg62-turbo xfonts-base
45+
wget https://deb.debian.org/debian/pool/main/o/openssl/libssl1.1_1.1.1w-0+deb11u1_amd64.deb
46+
wget https://deb.debian.org/debian/pool/main/o/openssl/libssl-dev_1.1.1w-0+deb11u1_amd64.deb
47+
wget https://deb.debian.org/debian/pool/main/o/openssl/openssl_1.1.1w-0+deb11u1_amd64.deb
48+
dpkg -i libssl1.1_1.1.1w-0+deb11u1_amd64.deb libssl-dev_1.1.1w-0+deb11u1_amd64.deb openssl_1.1.1w-0+deb11u1_amd64.deb
49+
ln -s /usr/lib/x86_64-linux-gnu/libjpeg.so.62 /usr/lib/x86_64-linux-gnu/libjpeg.so.8
50+
# refresh wkhtmltopdf
51+
wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.buster_amd64.deb
52+
dpkg -i wkhtmltox_0.12.6-1.buster_amd64.deb
2353
2454
## refresh packages
2555
pip install -r requirements.txt
26-
2756
python kotlin-website.py reference-pdf
2857
""".trimIndent()
29-
dockerImage = "%dep.Kotlin_KotlinSites_Builds_KotlinlangOrg_BuildPythonContainer.kotlin-website-image%"
58+
dockerImage = "python:3.9"
3059
}
3160
}
3261

3362
dependencies {
34-
dependency(AbsoluteId("Documentation_TransitioningProducts_KotlinReferenceWithCoroutines")) {
63+
dependency(BuildReferenceDocs) {
3564
snapshot {
65+
onDependencyFailure = FailureAction.FAIL_TO_START
66+
onDependencyCancel = FailureAction.CANCEL
3667
}
37-
3868
artifacts {
39-
cleanDestination = true
40-
artifactRules = """
41-
webHelpImages.zip!** => dist/docs/images
42-
pdfSourceKR.html => dist/docs/
43-
""".trimIndent()
69+
artifactRules = "+:docs.zip!** => dist/docs/"
4470
}
4571
}
4672
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

‎scripts/doindex/package.json ‎scripts/dist/package.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
"type": "module",
33
"scripts": {
44
"generate-metadata": "tsx analyzer/index.ts",
5-
"build": "tsc --outDir ../pages-js --project tsconfig.json"
5+
"generate-pdf": "tsx pdf/index.ts",
6+
"build": "tsc --outDir ../dist --project tsconfig.json"
67
},
78
"dependencies": {
89
"@types/fast-levenshtein": "^0.0.4",

‎scripts/dist/pdf/index.ts

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import { dirname, join } from 'node:path';
2+
import { EventEmitter } from 'events';
3+
4+
import { DIST_FOLDER } from '../lib/files/index.js';
5+
import { appendFile, open, readFile } from 'node:fs/promises';
6+
import { processTocToUrls, Result } from './lib.js';
7+
import { newTaskExecutor } from '../lib/pool.js';
8+
9+
console.time('Data successfully built');
10+
11+
const TASK_PATH = import.meta.dirname + '/task';
12+
13+
EventEmitter.defaultMaxListeners = 15;
14+
15+
const DOCS_PATH = join(DIST_FOLDER, 'docs');
16+
const TOC_PATH = join(DOCS_PATH, 'HelpTOC.json');
17+
18+
const TOC = JSON.parse(await readFile(TOC_PATH, { encoding: 'utf-8' }));
19+
20+
const nodes = new Map(
21+
(await processTocToUrls(TOC))
22+
.map(id => new URL(id, 'https://kotlinlang.org/docs/'))
23+
.filter(url => url.hostname === 'kotlinlang.org' && dirname(url.pathname) === '/docs')
24+
.map(url => url.pathname.slice(1))
25+
.map(key => [key, ''])
26+
);
27+
28+
if (nodes.size === 0) throw new Error('No nodes found');
29+
30+
async function onItem({ id, html }: Result) {
31+
nodes.set(id, html);
32+
}
33+
34+
const [pool, finish] = newTaskExecutor(
35+
TASK_PATH, onItem
36+
);
37+
38+
for (const id of nodes.keys()) {
39+
pool.push(id);
40+
}
41+
42+
await finish;
43+
44+
const pdfFile = await open(join(DOCS_PATH, 'pdf.html'), 'w');
45+
46+
for (const html of nodes.values()) {
47+
await appendFile(pdfFile, html);
48+
}
49+
50+
await pdfFile.close();
51+
52+
console.timeEnd('Data successfully built');

‎scripts/dist/pdf/lib.ts

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import { join } from 'node:path';
2+
import { readFile } from 'node:fs/promises';
3+
import { CheerioAPI, load } from 'cheerio';
4+
5+
import { FileType } from '../lib/files/type.js';
6+
import { ROOT_DIR } from '../lib/files/index.js';
7+
8+
export type Result = {
9+
id: string
10+
html: string
11+
}
12+
13+
export function isHiddenProved(relativePath: string, type?: FileType) {
14+
return (!type || type === 'Hidden') && (
15+
relativePath.startsWith('docs/kotlin-tour-') ||
16+
relativePath === 'docs/multiplatform.html'
17+
);
18+
}
19+
20+
const TREE_XML_PATH = join(ROOT_DIR + '/docs/kr.tree');
21+
22+
let TREE_XML: CheerioAPI | null = null;
23+
24+
async function readTreeXMLEntities() {
25+
if (!TREE_XML)
26+
TREE_XML = load(await readFile(TREE_XML_PATH, { encoding: 'utf-8' }), {
27+
xml: true
28+
});
29+
30+
return TREE_XML;
31+
}
32+
33+
async function getSubtreeItems(id: string) {
34+
const $tree = await readTreeXMLEntities();
35+
const topic = id.replace(/\.html$/, '.md');
36+
37+
return $tree(`toc-element[topic="${topic}"] > toc-element[topic]`).toArray().map(node => {
38+
const file = node?.attribs?.topic?.replace(/\.(md|topic)$/, '.html');
39+
if (!file || !isHiddenProved(`docs/${file}`)) return null;
40+
return file;
41+
})
42+
.filter(Boolean);
43+
}
44+
45+
export async function processTocToUrls({ topLevelIds, entities }: {
46+
entities: { pages: Record<string, { url?: string, pages: string[] }> },
47+
topLevelIds: string[]
48+
}) {
49+
const result: string[] = [];
50+
const visited = new Set<string>();
51+
const stack = [...topLevelIds.reverse()]; // Reverse to maintain order
52+
const data = entities.pages;
53+
while (stack.length) {
54+
const id = stack.pop();
55+
56+
if (visited.has(id)) continue;
57+
visited.add(id);
58+
59+
const page = data[id];
60+
if (!page) continue;
61+
const { url, pages } = page;
62+
63+
if (url) result.push(page.url);
64+
65+
if (isHiddenProved(`docs/${page.url}`)) {
66+
const hiddenPages = await getSubtreeItems(page.url);
67+
for (const id of hiddenPages) {
68+
result.push(id);
69+
}
70+
}
71+
72+
if (pages?.length) {
73+
for (let i = page.pages.length - 1; i >= 0; i--) {
74+
stack.push(page.pages[i]);
75+
}
76+
}
77+
}
78+
79+
return result;
80+
}

‎scripts/dist/pdf/task.ts

+142
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
import { basename, dirname, join } from 'node:path';
2+
import { CheerioAPI } from 'cheerio';
3+
import { Element } from 'domhandler';
4+
import { getType } from '../lib/files/type.js';
5+
import { DIST_FOLDER } from '../lib/files/index.js';
6+
import { isHiddenProved, Result } from './lib.js';
7+
import { replaceNode } from '../lib/html.js';
8+
9+
function sendEvent(...args: Parameters<typeof process.send>) {
10+
process.send(...args);
11+
}
12+
13+
function sendWarning() {
14+
console.error('process.send is not defined');
15+
}
16+
17+
const send = process.send ? sendEvent : sendWarning;
18+
19+
const HOME_HTML_CONTENT = '' +
20+
'<section class = "panel__content" ><div class = "container"><article class="article">' +
21+
'<h1 id = "home.xml">Kotlin Docs</h1>' +
22+
'</article><div id="disqus_thread"></div ></div></section>';
23+
24+
25+
function buildAnchor(filename: string, hash: string) {
26+
let result = '#' + filename.replace(/\.html$/, '.md');
27+
if (hash) result += '-' + hash.substring(1);
28+
return result;
29+
}
30+
31+
async function fixSectionHtml($: CheerioAPI, node: Element, relativePath: string) {
32+
const $node = $(node);
33+
34+
$node.find('.last-modified, .navigation-links._bottom').remove();
35+
$node.find('img[data-gif-src]').each(function(_i, img) {
36+
img.attribs.src = img.attribs['data-gif-src'];
37+
});
38+
39+
replaceNode($node, '.code-collapse', function($node, _attrs, content) {
40+
return `<div class="code-block" data-lang="${$node.attr('data-lang')}">${content}</div>`;
41+
});
42+
43+
replaceNode($node, 'object[data]', function($node, attrs, content) {
44+
const textUrl = $node.attr('data');
45+
try {
46+
const { hostname, pathname, searchParams } = new URL(textUrl, 'https://kotlinlang.org/');
47+
if (hostname.endsWith('youtube.com') || hostname.endsWith('youtu.be')) {
48+
let id = '';
49+
// https://www.youtube.com/watch?v=...&feature=...
50+
if (pathname === '/watch') id = searchParams.get('v');
51+
// http://www.youtube.com/v/...?blabalab
52+
else if (pathname.startsWith('/v/')) id = pathname.substring(3);
53+
// http://www.youtube.com/embed/...?blabla
54+
else if (pathname.startsWith('/embed/')) id = pathname.substring(6);
55+
56+
if (id)
57+
return `<figure class="video"><img src="https://img.youtube.com/vi/${id}/maxresdefault.jpg" width="560"></figure>` +
58+
`<p><a href="https://youtube.com/v/${id}">Watch video online.</a></p>`;
59+
}
60+
} catch (e) {
61+
// skip content
62+
}
63+
64+
return `<object ${attrs}>${content}</object>`;
65+
});
66+
67+
$node.find('figure:not([title]) img[title]').each(function(_i, img) {
68+
$(img).closest('figure').attr('title', img.attribs.title);
69+
});
70+
71+
$node.find('.code-block').each(function(_i, node) {
72+
let child = node.firstChild;
73+
while (child) {
74+
if (child.type === 'text') {
75+
child.data = child.data.replace(/^[\n\s]+/g, '').trim();
76+
if (!child.data) child = child.nextSibling;
77+
else child = null;
78+
}
79+
}
80+
});
81+
82+
$node.find('[id]:not(h1)').each(function(_i, node) {
83+
const article = $(node).closest('.article');
84+
const h1Id = article.find('h1[id$=".md"]').attr('id');
85+
if (h1Id && article.length === 1) node.attribs.id = h1Id + '-' + node.attribs.id;
86+
});
87+
$node.find('a[href]').each(function(_i, node) {
88+
let anchor = '';
89+
90+
const href = node.attribs.href;
91+
const url = new URL(href, 'https://kotlinlang.org/docs/');
92+
93+
if (href[0] === '#') anchor = buildAnchor(basename(relativePath), href);
94+
else if (url.hostname === 'kotlinlang.org' && dirname(url.pathname) === '/docs') {
95+
const filename = basename(url.pathname);
96+
if (filename.endsWith('.html')) anchor = buildAnchor(filename, url.hash);
97+
}
98+
99+
if (anchor) {
100+
node.attribs.href = anchor;
101+
node.attribs['data-origin-href'] = href;
102+
}
103+
});
104+
105+
return $.html(node)
106+
// sample drop
107+
.replace(/\/\/sampleStart\n/g, '')
108+
.replace(/\n\/\/sampleEnd/g, '');
109+
}
110+
111+
async function onMessage(relativePath: string) {
112+
let html: string = null;
113+
114+
const path = join(DIST_FOLDER, relativePath);
115+
let [type, $] = await getType(relativePath, path);
116+
117+
if (isHiddenProved(relativePath, type))
118+
type = 'Page_Documentation';
119+
120+
if (type === 'Page_Documentation') {
121+
const sections = $('section.panel__content');
122+
123+
if (relativePath === 'docs/home.html') {
124+
html = HOME_HTML_CONTENT;
125+
} else if (sections.length > 0) {
126+
html = '';
127+
for (const node of sections) {
128+
html += await fixSectionHtml($, node, relativePath);
129+
}
130+
}
131+
}
132+
133+
const data: Result = {
134+
id: relativePath,
135+
html: html || ''
136+
};
137+
138+
send({ event: 'result', data });
139+
}
140+
141+
process.on('message', onMessage);
142+
send({ event: 'inited' });
File renamed without changes.

0 commit comments

Comments
 (0)
Please sign in to comment.