From 62d1627412c04fa75fad89486de89da5d2d981cb Mon Sep 17 00:00:00 2001 From: Misha Bragin Date: Mon, 1 Dec 2025 20:39:03 +0100 Subject: [PATCH] Add llms.txt (#502) --- .gitignore | 4 + package.json | 5 +- scripts/generate-llm-docs.mjs | 238 ++++++++++++++++++++++++++++++++++ 3 files changed, 245 insertions(+), 2 deletions(-) create mode 100644 scripts/generate-llm-docs.mjs diff --git a/.gitignore b/.gitignore index 8b1254a5..69cdf01d 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,7 @@ package-lock.json /generator/openapi/ /generator/openapi.yml /generator/expandOpenAPIRef + +# LLM documentation (generated) +/public/llms/ +/public/llms.txt diff --git a/package.json b/package.json index d6527a3e..c9291b72 100644 --- a/package.json +++ b/package.json @@ -3,9 +3,10 @@ "version": "1.0.0", "private": true, "scripts": { - "dev": "next dev", - "build": "next build", + "dev": "npm run gen:llm && next dev", + "build": "npm run gen:llm && next build", "gen": "swagger-codegen generate -i https://raw.githubusercontent.com/netbirdio/netbird/main/management/server/http/api/openapi.yml -l openapi -o generator/openapi && npx ts-node generator/index.ts gen --input generator/openapi/openapi.json --output src/pages/ipa/resources", + "gen:llm": "node scripts/generate-llm-docs.mjs", "start": "next start", "lint": "next lint" }, diff --git a/scripts/generate-llm-docs.mjs b/scripts/generate-llm-docs.mjs new file mode 100644 index 00000000..5d13416c --- /dev/null +++ b/scripts/generate-llm-docs.mjs @@ -0,0 +1,238 @@ +#!/usr/bin/env node +/** + * LLM Documentation Generator + * + * Generates clean markdown files from MDX pages for LLM indexing. + * Creates: + * - public/llms/*.md - Clean markdown versions of each page + * - public/llms.txt - Index file linking to all pages + */ + +import fs from 'fs'; +import path from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const ROOT_DIR = path.join(__dirname, '..'); +const PAGES_DIR = path.join(ROOT_DIR, 'src/pages'); +const OUTPUT_DIR = path.join(ROOT_DIR, 'public/llms'); +const LLMS_TXT_PATH = path.join(ROOT_DIR, 'public/llms.txt'); + +// Base URL for the docs site +const BASE_URL = 'https://docs.netbird.io'; + +/** + * Strip JSX/React components from MDX content, keeping clean markdown + */ +function stripJsx(content) { + let result = content; + + // Remove import statements + result = result.replace(/^import\s+.*?[;\n]/gm, ''); + + // Remove export statements (but keep the content if it's a description) + result = result.replace(/^export\s+const\s+description\s*=\s*(['"`])(.+?)\1;?\s*$/gm, ''); + result = result.replace(/^export\s+.*?[;\n]/gm, ''); + + // Remove JSX self-closing tags like ,