Skip to content
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 158 additions & 1 deletion packages/fern-docs/bundle/src/server/llm-txt-md.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import {
toTree,
visit,
} from "@fern-docs/mdx";
import { applyTemplates } from "../mdx/components/code/Template";

export function convertToLlmTxtMarkdown(
markdown: string,
Expand All @@ -22,12 +23,168 @@ export function convertToLlmTxtMarkdown(
return [
`# ${title}`,
description != null ? `> ${description}` : undefined,
stripMdxFeatures(content, format),
stripMdxFeatures(expandMdxComponents(content, format), format),

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there anywhere we should be adding/modifying tests for this?

]
.filter(isNonNullish)
.join("\n\n");
}

/**
* Expands custom MDX components to their semantic markdown equivalents
* before stripping MDX features. This ensures components like TSFetchCodeBlock
* and Template are converted to readable content for LLM consumption.
*/
function expandMdxComponents(markdown: string, format: "mdx" | "md"): string {
if (format !== "mdx") {
return markdown;
}

const { mdast } = toTree(markdown, {
format,
sanitize: true,
});

visit(mdast, (node, idx, parent) => {
if (parent == null || idx == null) {
return;
}

if (isMdxJsxElementHast(node)) {
if (node.name === "TSFetchCodeBlock") {
const codeContent = extractCodeFromTSFetchCodeBlock(node);
if (codeContent) {
const codeBlock = {
type: "code",
lang: "typescript",
value: codeContent,
};
parent.children[idx] = codeBlock;
}
return;
}

if (node.name === "CodeGroup") {
const codeBlocks = extractCodeFromCodeGroup(node);
if (codeBlocks.length > 0) {
parent.children.splice(idx, 1, ...codeBlocks);
return idx + codeBlocks.length - 1;
}
return;
}

if (node.name === "Template") {
const templateData = extractTemplateData(node);
if (templateData && node.children) {
const childrenMarkdown = mdastToMarkdown({ type: "root", children: node.children });
const expandedContent = applyTemplates(childrenMarkdown, templateData);

const { mdast: expandedMdast } = toTree(expandedContent, { format: "md", sanitize: true });
if (expandedMdast.children) {
parent.children.splice(idx, 1, ...expandedMdast.children);
return idx + expandedMdast.children.length - 1;
}
}
return;
}
}

return;
});

let expandedMarkdown = mdastToMarkdown(mdast);

const templateData = extractGlobalTemplateData(markdown);
if (templateData && Object.keys(templateData).length > 0) {
expandedMarkdown = applyTemplates(expandedMarkdown, templateData);
}

return expandedMarkdown;
}

/**
* Extract code content from TSFetchCodeBlock component
*/
function extractCodeFromTSFetchCodeBlock(node: any): string | null {
if (node.children && node.children.length > 0) {
const codeChild = node.children.find((child: any) => child.type === "text" || child.type === "code");
if (codeChild) {
return codeChild.value || codeChild.children?.[0]?.value || "";
}
}

const srcAttr = node.attributes?.find((attr: any) => attr.name === "src");
const contentAttr = node.attributes?.find((attr: any) => attr.name === "content");

if (contentAttr?.value) {
return contentAttr.value;
}

if (srcAttr?.value) {
return `// Code from: ${srcAttr.value}`;
}

return null;
}

/**
* Extract code blocks from CodeGroup component
*/
function extractCodeFromCodeGroup(node: any): any[] {
const codeBlocks: any[] = [];

if (node.children) {
node.children.forEach((child: any, index: number) => {
if (child.type === "code" || (child.type === "element" && child.tagName === "code")) {
codeBlocks.push({
type: "code",
lang: child.lang || "text",
value: child.value || child.children?.[0]?.value || "",
});
} else if (isMdxJsxElementHast(child) && child.name === "Code") {
const lang = child.attributes?.find((attr: any) => attr.name === "language")?.value || "text";
const content = child.children?.[0]?.value || "";
codeBlocks.push({
type: "code",
lang,
value: content,
});
}
});
}

return codeBlocks;
}

/**
* Extract template data from Template component attributes
*/
function extractTemplateData(node: any): Record<string, string> | null {
const dataAttr = node.attributes?.find((attr: any) => attr.name === "data");
if (dataAttr?.value && typeof dataAttr.value === "object") {
return dataAttr.value;
}
return null;
}

/**
* Extract global template variables from markdown content
* This handles common OpenRouter template variables
*/
function extractGlobalTemplateData(markdown: string): Record<string, string> {
const templateData: Record<string, string> = {
FREE_MODEL_CREDITS_THRESHOLD: "10", // Example value
API_KEY_REF: "your-api-key",
BASE_URL: "https://openrouter.ai/api/v1",
};

const { data: frontmatter } = getFrontmatter(markdown);
if (frontmatter.templateData) {
Object.assign(templateData, frontmatter.templateData);
}

return templateData;
}

/**
* This is a living list of mdx features that we don't want to include in the LLM TXT format:
* - esm imports
Expand Down