From b74c36507502f049b5c784960596ab03b49abbc7 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 3 May 2026 12:37:06 -0400 Subject: [PATCH] docs: add llms.txt ecosystem hub at site root MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds docs/source/llms.txt following the llmstxt.org schema as a directory hub for the DataFusion ecosystem: links to the core Rust user/library/ contributor guides, Rust API docs, and the Python/Ballista/Comet subproject docs roots. Configures Sphinx html_extra_path so the file is served verbatim at https://datafusion.apache.org/llms.txt, and excludes it from the RAT license-header check (markdown body cannot carry the standard "..." comment header). Per the convention noted in the file, agents can probe each subproject docs root for its own llms.txt — keeps the hub future-proof without hardcoding pending URLs. Co-Authored-By: Claude Opus 4.7 (1M context) --- dev/release/rat_exclude_files.txt | 1 + docs/source/conf.py | 4 ++++ docs/source/llms.txt | 26 ++++++++++++++++++++++++++ 3 files changed, 31 insertions(+) create mode 100644 docs/source/llms.txt diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 7953a5b4e2913..f5ce368df724e 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -60,6 +60,7 @@ datafusion/proto-common/src/generated/prost.rs .github/ISSUE_TEMPLATE/bug_report.yml .github/ISSUE_TEMPLATE/feature_request.yml .github/workflows/docs.yaml +docs/source/llms.txt **/node_modules/* datafusion/wasmtest/pkg/* clippy.toml diff --git a/docs/source/conf.py b/docs/source/conf.py index 03dcfb5bfa61b..c8027fc71bd54 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -109,6 +109,10 @@ # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] +# Copy agent-facing files (llms.txt) verbatim to the site root so they +# resolve at the conventional URL `https://datafusion.apache.org/llms.txt`. +html_extra_path = ["llms.txt"] + html_logo = "_static/images/2x_bgwhite_original.png" html_css_files = ["theme_overrides.css"] diff --git a/docs/source/llms.txt b/docs/source/llms.txt new file mode 100644 index 0000000000000..5d738107c8d33 --- /dev/null +++ b/docs/source/llms.txt @@ -0,0 +1,26 @@ +# Apache DataFusion + +> Apache DataFusion is an extensible query engine written in Rust that uses Apache Arrow as its in-memory format. This file is a directory of agent-facing entry points for the DataFusion ecosystem — the Rust core query engine and its subprojects. Subproject `llms.txt` files contain the project-specific guidance for writing code against each one. + +## Core DataFusion (Rust) + +- [User guide](https://datafusion.apache.org/user-guide/introduction.html): install, example usage, SQL, DataFrame, expressions, configuration, explain plans. +- [Library user guide](https://datafusion.apache.org/library-user-guide/index.html): embedding DataFusion, extending SQL, custom table providers, building logical plans, the query optimizer. +- [Contributor guide](https://datafusion.apache.org/contributor-guide/index.html): development environment, architecture, testing, release management, governance. +- [Rust API docs (`docs.rs`)](https://docs.rs/datafusion/latest/datafusion/): generated reference for the `datafusion` crate. +- [GitHub repository](https://github.com/apache/datafusion): source, issues, pull requests. + +## Subprojects + +Each subproject may expose its own `llms.txt` at `/llms.txt` — agents following the [llmstxt.org](https://llmstxt.org) convention can probe these paths for project-specific guidance. + +- [DataFusion Python](https://datafusion.apache.org/python/): Python bindings — SQL and lazy DataFrame API over Apache Arrow. +- [DataFusion Ballista](https://datafusion.apache.org/ballista/): distributed execution extension for DataFusion. +- [DataFusion Comet](https://datafusion.apache.org/comet/): Apache Spark accelerator built on DataFusion. + +## Optional + +- [Blog](https://datafusion.apache.org/blog/): release notes and ecosystem updates. +- [crates.io `datafusion`](https://crates.io/crates/datafusion): published crate. +- [Code of conduct](https://github.com/apache/datafusion/blob/main/CODE_OF_CONDUCT.md) +- [Apache Software Foundation](https://apache.org)