diff --git a/config/_default/hugo.toml b/config/_default/hugo.toml index 6a20bfd3891..4bdd8ad476b 100644 --- a/config/_default/hugo.toml +++ b/config/_default/hugo.toml @@ -63,9 +63,10 @@ replacements = "github.com/FortAwesome/Font-Awesome -> @fortawesome/fontawesome- # Comment out if you don't want the "print entire section" link enabled. [outputs] - section = ["HTML", "print"] + home = ["HTML", "LLMSTXT", "LLMSFULL", "PAGEMD"] + section = ["HTML", "print", "PAGEMD"] # MvM - add line below to print every page - page = ["HTML", "print"] + page = ["HTML", "print", "PAGEMD"] # MvM - add line below to activate production of RSS pages [outputFormats] @@ -73,6 +74,31 @@ replacements = "github.com/FortAwesome/Font-Awesome -> @fortawesome/fontawesome- mediatype = "application/rss" baseName = "rss" + # AI agent discovery: llms.txt index (home-level) + [outputFormats.LLMSTXT] + baseName = "llms" + mediaType = "text/plain" + isPlainText = true + notAlternative = true + + # AI agent discovery: llms-full.txt with full page content (home-level) + [outputFormats.LLMSFULL] + baseName = "llms-full" + mediaType = "text/plain" + isPlainText = true + notAlternative = true + + # AI agent discovery: clean Markdown version of each page at index.md + [outputFormats.PAGEMD] + baseName = "index.html" + mediaType = "text/markdown" + isPlainText = true + notAlternative = true + +[mediaTypes] + [mediaTypes."text/markdown"] + suffixes = ["md"] + # MvM - Set default priority for sitemap = 0.5 # Can override in front matter for each section using # cascade: diff --git a/config/production/hugo.toml b/config/production/hugo.toml index 1193ff913a5..daddcd6629b 100644 --- a/config/production/hugo.toml +++ b/config/production/hugo.toml @@ -8,7 +8,7 @@ title = "Mendix Documentation" # MvM - add line below to output MxDocsAlgolia at root of /public (master only) [outputs] _merge = 'shallow' - home = ["MxDocsAlgolia"] + home = ["MxDocsAlgolia", "LLMSTXT", "LLMSFULL", "PAGEMD"] # MvM - MxDocsAlgolia outputFormats parameters - to generate Algolia for Mendix [outputFormats] diff --git a/layouts/_default/list.llmsfull.txt b/layouts/_default/list.llmsfull.txt new file mode 100644 index 00000000000..06d45762a7e --- /dev/null +++ b/layouts/_default/list.llmsfull.txt @@ -0,0 +1,17 @@ +# {{ .Site.Title }} — Full Documentation + +> This file contains the complete Markdown content of all Mendix documentation pages for AI agent indexing. +> For a page index with descriptions, see llms.txt. + +{{- /* Walk the same tree as llms.txt: start from the landingpage root's children */ -}} +{{- $docsRoot := index (where .Site.Sections "Type" "landingpage") 0 -}} + +{{- /* Include the root (home/landing) page itself */ -}} +{{- if $docsRoot -}} +{{- printf "\n---\n\n# %s\n\nURL: %s\nMarkdown: %sindex.html.md" $docsRoot.Title $docsRoot.Permalink $docsRoot.Permalink -}} +{{- with $docsRoot.Description -}}{{- printf "\nDescription: %s" . -}}{{- end -}} +{{- printf "\n\n%s" ($docsRoot.RawContent | strings.ReplaceRE `\]\((/[^:)#" ]+/)(#[^)"]*)?\)` `](${1}index.html.md${2})`) -}} +{{- range $docsRoot.Pages -}} + {{- partial "llms-tree-full.txt" (dict "page" .) -}} +{{- end -}} +{{- end }} diff --git a/layouts/_default/list.llmstxt.txt b/layouts/_default/list.llmstxt.txt new file mode 100644 index 00000000000..a005f48953c --- /dev/null +++ b/layouts/_default/list.llmstxt.txt @@ -0,0 +1,14 @@ +# {{ .Site.Title }} + +> Documentation for Mendix Studio Pro, platform features, APIs, and how-to guides. + +{{- /* The docs root section has type: landingpage and url: / + Its immediate children are the top-level sections (refguide, howto, etc.) */ -}} +{{- $docsRoot := index (where .Site.Sections "Type" "landingpage") 0 -}} +{{- if $docsRoot -}} +{{- partial "llms-tree.txt" (dict "page" $docsRoot "depth" 0) -}} +{{- end }} + +## Optional + +- [llms-full.txt]({{ .Site.BaseURL }}llms-full.txt): Full Markdown content of all documentation pages for offline indexing. diff --git a/layouts/_default/list.pagemd.md b/layouts/_default/list.pagemd.md new file mode 100644 index 00000000000..1b3c8b7af3d --- /dev/null +++ b/layouts/_default/list.pagemd.md @@ -0,0 +1,37 @@ +# {{ .Title | strings.TrimSpace -}} + +{{ $needSeparator := false -}} + +{{/* Description */}} +{{ with .Description | strings.TrimSpace }} + +> {{ replace . "\n" "\n> " -}} +{{ $needSeparator = true -}} +{{ end -}} + +{{/* Page content with index.html.md link rewriting */}} +{{ $content := .RenderShortcodes | strings.TrimSpace | strings.ReplaceRE `\]\((/[^:)#" ]+/)(#[^)"]*)?\)` `](${1}index.html.md${2})` -}} +{{ with $content -}} +{{ if $needSeparator }} +--- + +{{ else }} +{{ end -}} +{{ . }} +{{ $needSeparator = true -}} +{{ end -}} + +{{/* Child pages list */}} +{{ with .Pages -}} +{{ if $needSeparator }} +--- + +{{ else }} +{{ end -}} +Section pages: + +{{ range . -}} +- [{{ .Title | strings.TrimSpace }}]({{ .RelPermalink }}index.html.md) +{{- with .Description | strings.TrimSpace }}: {{ . }}{{ end }} +{{ end -}} +{{ end -}} diff --git a/layouts/_default/single.pagemd.md b/layouts/_default/single.pagemd.md new file mode 100644 index 00000000000..d4232e73345 --- /dev/null +++ b/layouts/_default/single.pagemd.md @@ -0,0 +1 @@ +{{ .RawContent | strings.ReplaceRE `\]\((/[^:)#" ]+/)(#[^)"]*)?\)` `](${1}index.html.md${2})` }} \ No newline at end of file diff --git a/layouts/partials/llms-tree-full.txt b/layouts/partials/llms-tree-full.txt new file mode 100644 index 00000000000..dcf6da95e20 --- /dev/null +++ b/layouts/partials/llms-tree-full.txt @@ -0,0 +1,7 @@ +{{- $page := .page -}} +{{- if and (not $page.Draft) (not $page.Params.private) -}} +{{- printf "\n---\n\n# %s\n\nURL: %s\nMarkdown: %sindex.html.md" $page.Title $page.Permalink $page.Permalink -}} +{{- with $page.Description -}}{{- printf "\nDescription: %s" . -}}{{- end -}} +{{- printf "\n\n%s" ($page.RawContent | strings.ReplaceRE `\]\((/[^:)#" ]+/)(#[^)"]*)?\)` `](${1}index.html.md${2})`) -}} +{{- if $page.IsSection -}}{{- range $page.Pages -}}{{- partial "llms-tree-full.txt" (dict "page" .) -}}{{- end -}}{{- end -}} +{{- end -}} diff --git a/layouts/partials/llms-tree.txt b/layouts/partials/llms-tree.txt new file mode 100644 index 00000000000..5fcc20cf574 --- /dev/null +++ b/layouts/partials/llms-tree.txt @@ -0,0 +1,8 @@ +{{- $page := .page -}} +{{- $depth := .depth -}} +{{- if and (not $page.Draft) (not $page.Params.private) -}} +{{- $indent := strings.Repeat $depth " " -}} +{{- $desc := "" -}}{{- with $page.Description -}}{{- $desc = printf ": %s" . -}}{{- end -}} +{{- printf "\n%s- [%s](%sindex.html.md)%s" $indent $page.Title $page.Permalink $desc -}} +{{- if $page.IsSection -}}{{- range $page.Pages -}}{{- partial "llms-tree.txt" (dict "page" . "depth" (add $depth 1)) -}}{{- end -}}{{- end -}} +{{- end -}} diff --git a/layouts/robots.txt b/layouts/robots.txt index d5bf6a73a6c..c8ebdf136aa 100644 --- a/layouts/robots.txt +++ b/layouts/robots.txt @@ -17,4 +17,28 @@ Disallow: /404.html Disallow: / {{- end }} +# AI crawlers — explicitly allow indexing and training data collection +{{- $aiBots := slice + "GPTBot" "OAI-SearchBot" "ChatGPT-User" + "ClaudeBot" "Claude-SearchBot" "Claude-User" + "Google-Extended" + "Meta-ExternalAgent" + "PerplexityBot" + "Applebot-Extended" + "Diffbot" + "CCBot" + "Bytespider" -}} +{{- range $aiBots }} + +User-agent: {{ . }} +{{- if $isProduction }} +Allow: / +Disallow: /_includes/ +Disallow: /_print/ +Disallow: /attachments/ +{{- else }} +Disallow: / +{{- end }} +{{- end }} + # End of robots.txt file diff --git a/package.json b/package.json index 3e386957570..02e56721187 100644 --- a/package.json +++ b/package.json @@ -31,5 +31,10 @@ "dependencies": { "@fortawesome/fontawesome-free": "^6.5.2", "bootstrap": "^5.3.3" + }, + "allowScripts": { + "github:google/docsy#01c827ea890e8e498f6046a7666a3031f318cc7f": true, + "hugo-extended@0.156.0": true, + "fsevents@2.3.3": true } }