Skip to content

Commit

Permalink
feat: add llms.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
thedaviddias committed Feb 16, 2025
1 parent 0ab080b commit 4ae9189
Show file tree
Hide file tree
Showing 4 changed files with 399 additions and 1 deletion.
112 changes: 112 additions & 0 deletions app/[lang]/llms.txt/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import fg from 'fast-glob';
import matter from 'gray-matter';
import * as fs from 'node:fs/promises';
import path from 'node:path';
import { remark } from 'remark';
import remarkGfm from 'remark-gfm';
import remarkStringify from 'remark-stringify';

export const revalidate = false;

// Regular expressions for cleaning up the content
const IMPORT_REGEX = /import\s+?(?:(?:{[^}]*}|\*|\w+)\s+from\s+)?['"](.*?)['"];?\n?/g;
const COMPONENT_USAGE_REGEX = /<[A-Z][a-zA-Z]*(?:\s+[^>]*)?(?:\/?>|>[^<]*<\/[A-Z][a-zA-Z]*>)/g;
const NEXTRA_COMPONENT_REGEX = /<(?:Callout|Steps|Tabs|Tab|FileTree)[^>]*>[^<]*<\/(?:Callout|Steps|Tabs|Tab|FileTree)>/g;
const MDX_EXPRESSION_REGEX = /{(?:[^{}]|{[^{}]*})*}/g;
const EXPORT_REGEX = /export\s+(?:default\s+)?(?:const|let|var|function|class|interface|type)?\s+[a-zA-Z_$][0-9a-zA-Z_$]*[\s\S]*?(?:;|\n|$)/g;

export async function GET() {
try {
const files = await fg(['content/en/patterns/**/*.mdx']);

const scan = files.map(async (file) => {
try {
const fileContent = await fs.readFile(file);
const { content, data } = matter(fileContent.toString());

// Get the filename without extension to use as fallback title
const basename = path.basename(file, '.mdx');

// Extract category from file path
const pathParts = path.dirname(file).split(path.sep);
let category = 'general';
if (pathParts.length > 3 && pathParts[3]) {
category = pathParts[3];
}

// Skip if the file is marked as hidden or draft
if (data.draft || data.hidden) {
return null;
}

// Use filename as title if no title in frontmatter, and convert to Title Case
const title = data.title || basename.split('-')
.map(word => word.charAt(0).toUpperCase() + word.slice(1))
.join(' ');

const processed = await processContent(content);
return `File: ${file}
# ${category.toUpperCase()}: ${title}
${data.description || ''}
${processed}`;
} catch (error) {
console.error(`Error processing file ${file}:`, error);
return null;
}
});

const scanned = (await Promise.all(scan)).filter(Boolean);

if (!scanned.length) {
return new Response('No content found', { status: 404 });
}

return new Response(scanned.join('\n\n'));
} catch (error) {
console.error('Error generating LLM content:', error);
return new Response('Internal Server Error', { status: 500 });
}
}

async function processContent(content: string): Promise<string> {
try {
// Multi-step cleanup to handle different MDX constructs
let cleanContent = content
// Remove imports first
.replace(IMPORT_REGEX, '')
// Remove exports
.replace(EXPORT_REGEX, '')
// Remove Nextra components with their content
.replace(NEXTRA_COMPONENT_REGEX, '')
// Remove other React components
.replace(COMPONENT_USAGE_REGEX, '')
// Remove MDX expressions
.replace(MDX_EXPRESSION_REGEX, '')
// Clean up multiple newlines
.replace(/\n{3,}/g, '\n\n')
// Remove empty JSX expressions
.replace(/{[\s]*}/g, '')
// Clean up any remaining JSX-like syntax
.replace(/<>[\s\S]*?<\/>/g, '')
.replace(/{\s*\/\*[\s\S]*?\*\/\s*}/g, '')
.trim();

// Simple markdown processing without MDX
const file = await remark()
.use(remarkGfm)
.use(remarkStringify)
.process(cleanContent);

return String(file);
} catch (error) {
console.error('Error processing content:', error);
// If processing fails, return a basic cleaned version
return content
.replace(IMPORT_REGEX, '')
.replace(COMPONENT_USAGE_REGEX, '')
.replace(MDX_EXPRESSION_REGEX, '')
.trim();
}
}
2 changes: 1 addition & 1 deletion middleware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ export { middleware } from 'nextra/locales'
export const config = {
// Matcher ignoring `/_next/` and `/api/`
matcher: [
'/((?!api/mdx|api/email|api/patterns/random|api/og|_next/static|_next/image|favicon.ico|robots.txt|og/opengraph-image.png|covers|twitter-image|sitemap.xml|6ba7b811-9dad-11d1-80b4.txt|43mg4ybv6sxxanu24g7dngawd9up5w93.txt|apple-icon.png|manifest|_pagefind|examples).*)'
'/((?!api/mdx|api/email|api/patterns/random|api/og|_next/static|_next/image|llms.txt|favicon.ico|robots.txt|og/opengraph-image.png|covers|twitter-image|sitemap.xml|6ba7b811-9dad-11d1-80b4.txt|43mg4ybv6sxxanu24g7dngawd9up5w93.txt|apple-icon.png|manifest|_pagefind|examples).*)'
]
}
9 changes: 9 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,13 @@
"@mdn/browser-compat-data": "^5.6.37",
"@radix-ui/react-slot": "^1.1.2",
"@sentry/nextjs": "^8.54.0",
"@types/chalk": "^2.2.4",
"@types/dagre": "^0.7.52",
"chalk": "^5.4.1",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"dagre": "^0.8.5",
"fast-glob": "^3.3.3",
"gray-matter": "^4.0.3",
"html-to-image": "^1.11.11",
"lucide-react": "^0.475.0",
Expand All @@ -45,13 +48,17 @@
"next-plausible": "^3.12.4",
"nextra": "4.2.5",
"nextra-theme-docs": "4.2.5",
"openai": "^4.83.0",
"react": "19.0.0",
"react-dom": "19.0.0",
"react-intersection-observer": "^9.15.1",
"react-markdown": "^9.0.3",
"react-resizable-panels": "^2.1.7",
"reactflow": "^11.11.4",
"remark": "^15.0.1",
"remark-gfm": "^4.0.0",
"remark-mdx": "^3.1.0",
"remark-stringify": "^11.0.0",
"remove-markdown": "^0.6.0",
"require-in-the-middle": "^7.5.1",
"simple-icons": "^14.6.0",
Expand All @@ -69,11 +76,13 @@
"@types/node": "22.13.1",
"@types/react": "19.0.8",
"cross-env": "^7.0.3",
"dotenv": "^16.4.7",
"eslint": "^9.20.0",
"eslint-config-next": "15.1.6",
"pagefind": "^1.3.0",
"plop": "^4.0.1",
"tailwindcss": "4.0.5",
"ts-node": "^10.9.2",
"tsx": "^4.19.2",
"typescript": "^5.7.3"
}
Expand Down
Loading

0 comments on commit 4ae9189

Please sign in to comment.