hud-evals · mintlify · Jun 19, 2026
diff --git a/docs/migrate-v6.mdx b/docs/migrate-v6.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Migrate to v6"
-description: "Convert v5 environments (scenarios + tools + MCP serving) to the leaner v6 spec (tasks + capabilities)."
+description: "Migrate HUD environments from v5 to v6: replace MCP tools with capabilities, swap @env.scenario for @env.template, and convert task runs."
 icon: "arrows-rotate"
 ---
 

diff --git a/docs/platform/agents/chats.mdx b/docs/platform/agents/chats.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Chat Agents"
-description: "Multi-turn conversational agents served over A2A — pair a scenario's tools with a model"
+description: "Build multi-turn chat agents on the HUD platform, served over the A2A protocol, by pairing a chat-enabled scenario with a language model."
 icon: "comments"
 ---
 

diff --git a/docs/platform/environments.mdx b/docs/platform/environments.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Environments"
-description: "Deploy, manage, and monitor agent environments on the HUD platform."
+description: "Deploy and monitor HUD agent environments via GitHub repo import, CLI deploy, or prebuilt templates, with live build logs and auto-rebuilds."
 icon: "cube"
 ---
 

diff --git a/docs/platform/index.mdx b/docs/platform/index.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Overview"
-description: "Navigate the HUD platform to manage models, environments, and tasksets."
+description: "Overview of the HUD platform dashboard for managing AI agent models, environments, tasksets, traces, API keys, and team settings at hud.ai."
 icon: "building"
 ---
 

diff --git a/docs/platform/internal/trace-analysis.mdx b/docs/platform/internal/trace-analysis.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Trace Analysis Environment"
-description: "How we built an environment to let agents analyze their own traces—and how you can apply the same pattern"
+description: "How HUD built an agent-driven trace analysis environment using coding tools and preprocessed files to debug failed evaluations at scale."
 icon: "magnifying-glass-chart"
 ---
 

diff --git a/docs/platform/models.mdx b/docs/platform/models.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Models"
-description: "Browse, fork, and train AI models on the HUD platform."
+description: "Browse, fork, and train AI models on the HUD platform: manage checkpoints, inspect inference logs, and route inference through the HUD Gateway."
 icon: "robot"
 ---
 

diff --git a/docs/platform/slack.mdx b/docs/platform/slack.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Slack Integration"
-description: "Run HUD scenarios directly from Slack by @mentioning the HUD bot."
+description: "Run HUD agent scenarios from Slack by @mentioning the HUD bot: trigger evaluations, list scenarios, and post trace links back to channels."
 icon: "slack"
 ---
 

diff --git a/docs/v6/advanced/chat.mdx b/docs/v6/advanced/chat.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Chat"
-description: "Multi-turn conversational tasks and the Chat runner."
+description: "Build multi-turn chat tasks in HUD v6 using message-list prompts and the Chat runner to drive conversations, manage history, and grade replies."
 icon: "comments"
 ---
 

diff --git a/docs/v6/advanced/harbor-convert.mdx b/docs/v6/advanced/harbor-convert.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Harbor interop"
-description: "Load Harbor tasks into the HUD runtime, or export HUD tasks as Harbor folders."
+description: "Load Harbor task directories into HUD as tasksets, or export HUD tasks to Harbor folders with Dockerfiles, instruction.md, and graded test scripts."
 icon: "ship"
 ---
 

diff --git a/docs/v6/advanced/integrations.mdx b/docs/v6/advanced/integrations.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Integrations"
-description: "Use HUD with external agent frameworks and endpoints."
+description: "Integrate HUD with external agent frameworks: build custom harnesses, wrap browser-use, point at OpenAI-compatible endpoints, or serve via A2A."
 icon: "puzzle-piece"
 ---
 

diff --git a/docs/v6/advanced/patterns.mdx b/docs/v6/advanced/patterns.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Patterns"
-description: "Compose capabilities, manage state, and structure larger task sets."
+description: "Patterns for HUD v6 environments: compose capabilities, manage stateful daemons, parameterize task difficulty, and group rollouts for variance."
 icon: "shapes"
 ---
 

diff --git a/docs/v6/advanced/subagents.mdx b/docs/v6/advanced/subagents.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Subagents as tools"
-description: "Expose a specialist sub-agent as a plain MCP tool an orchestrator can call."
+description: "Expose a specialist HUD subagent as an MCP tool an orchestrator can call, by wrapping a task rollout in a FastMCP-registered function."
 icon: "diagram-project"
 ---
 

diff --git a/docs/v6/cookbooks/a2a-chat.mdx b/docs/v6/cookbooks/a2a-chat.mdx
@@ -1,6 +1,6 @@
 ---
 title: "A2A chat"
-description: "Serve a chat task over the A2A protocol and talk to it from any client."
+description: "Serve a HUD chat task over the A2A protocol with a reference server: per-context Chat sessions, agent card, and citations artifacts."
 icon: "plug"
 ---
 

diff --git a/docs/v6/cookbooks/coding-agent.mdx b/docs/v6/cookbooks/coding-agent.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Coding agent"
-description: "Run a coding agent against a shell + files environment, graded by tests."
+description: "Run a HUD coding agent against a shell + files workspace with a seeded bug, graded by pytest via BashGrader to prevent reward hacking."
 icon: "code"
 ---
 

diff --git a/docs/v6/cookbooks/ops-diagnostics.mdx b/docs/v6/cookbooks/ops-diagnostics.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Ops diagnostics"
-description: "An investigation task where the agent integrates evidence to produce a diagnosis."
+description: "Build a HUD investigation task where an agent reads logs, deploy history, and traces to diagnose a root cause, graded by an LLM judge."
 icon: "stethoscope"
 ---
 

diff --git a/docs/v6/cookbooks/robot-benchmark.mdx b/docs/v6/cookbooks/robot-benchmark.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Robot benchmark"
-description: "Run a VLA policy against a containerized robot sim, graded by task success."
+description: "Run a pi0.5 VLA policy against the LIBERO robot manipulation benchmark in HUD using a Dockerized sim and the beta robot capability."
 icon: "robot"
 tag: "Beta"
 ---

diff --git a/docs/v6/faq.mdx b/docs/v6/faq.mdx
@@ -1,6 +1,6 @@
 ---
 title: "FAQ"
-description: "Answers to the questions that come up most when getting started with HUD."
+description: "Answers common HUD questions about API keys, Docker, GPU needs, platform support, costs, and when to use hud eval, serve, or deploy."
 icon: "circle-question"
 ---
 

diff --git a/docs/v6/index.mdx b/docs/v6/index.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Introduction"
-description: "Build, evaluate, and train AI agents on RL environments you define once and run anywhere."
+description: "Introduction to the HUD SDK for building reusable RL environments and capabilities that any AI agent, model, or training harness can run."
 icon: "book"
 ---
 

diff --git a/docs/v6/quickstart.mdx b/docs/v6/quickstart.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Quickstart"
-description: "Install, write a task, run it against a model, and read the reward."
+description: "Install the HUD CLI, write your first task with a prompt and reward, then run it against Claude through the gateway to get a graded trace."
 icon: "bolt"
 ---
 

diff --git a/docs/v6/reference/agents.mdx b/docs/v6/reference/agents.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Agents"
-description: "Built-in agents, their configs, create_agent, and the Run contract."
+description: "Reference for HUD's built-in agent classes, create_agent, provider configs, and the Run contract used to drive rollouts against capabilities."
 icon: "robot"
 ---
 

diff --git a/docs/v6/reference/capabilities.mdx b/docs/v6/reference/capabilities.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Capabilities"
-description: "The connections an environment exposes, how to spin each one up, and the clients that attach to them."
+description: "Reference for HUD environment capabilities: ssh shell, mcp tools, cdp browser, rfb VNC, and robot, including how to spin each one up."
 icon: "plug"
 ---
 

diff --git a/docs/v6/reference/cli.mdx b/docs/v6/reference/cli.mdx
@@ -1,6 +1,6 @@
 ---
 title: "CLI"
-description: "The hud command reference across the environment lifecycle."
+description: "Reference for the hud CLI commands: init, serve, eval, deploy, task, and sync, covering environment build, evaluation, and platform sync."
 icon: "terminal"
 ---
 

diff --git a/docs/v6/reference/environment.mdx b/docs/v6/reference/environment.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Environment"
-description: "The Environment class: tasks, capabilities, initializers, and serving."
+description: "Reference for the HUD Environment class: registering task templates, attaching capabilities, lifecycle hooks, and serving the control channel."
 icon: "cube"
 ---
 

diff --git a/docs/v6/reference/graders.mdx b/docs/v6/reference/graders.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Graders"
-description: "Native graders, comparison helpers, and the native grade combiner."
+description: "Reference for HUD graders including BashGrader, LLMJudgeGrader, comparison helpers like exact_match and contains, and the combine function."
 icon: "scale-balanced"
 ---
 

diff --git a/docs/v6/reference/robots.mdx b/docs/v6/reference/robots.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Robots"
-description: "The robot capability: contracts, bridges, and the agent harness."
+description: "Reference for HUD's beta robot capability: RobotBridge, RobotEndpoint, RobotAgent, and the openpi-style observation and action contract over WebSocket."
 icon: "robot"
 tag: "Beta"
 ---

diff --git a/docs/v6/reference/tasks.mdx b/docs/v6/reference/tasks.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Tasks & Tasksets"
-description: "The Task, Taskset, Job, and SyncPlan API."
+description: "Reference for HUD Task, Taskset, and Job APIs, including task authoring with templates and runtime placement options for local, Docker, or hosted runs."
 icon: "list-check"
 ---
 

diff --git a/docs/v6/reference/training.mdx b/docs/v6/reference/training.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Training"
-description: "The TrainingClient API, loss set, custom losses, and the hud models CLI."
+description: "Reference for the HUD TrainingClient: forward_backward, optim_step, built-in and custom losses, and the hud models CLI for trainable checkpoints."
 icon: "dumbbell"
 ---
 

diff --git a/docs/v6/reference/types.mdx b/docs/v6/reference/types.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Types"
-description: "Run, Trace, answer and result types, and typed task I/O."
+description: "Reference for HUD's serializable types: Run, Trace, Grade, Step, and Answer, exchanged between agents, tasks, and graders during a rollout."
 icon: "code"
 ---
 

diff --git a/docs/v6/run/models.mdx b/docs/v6/run/models.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Run on any model"
-description: "Evaluate a task with Claude, OpenAI, Gemini, or any OpenAI-compatible endpoint."
+description: "Run HUD evaluations against Claude, OpenAI, Gemini, or any OpenAI-compatible endpoint like vLLM using hud eval or the create_agent API in code."
 icon: "robot"
 ---
 

diff --git a/docs/v6/run/signal.mdx b/docs/v6/run/signal.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Designing tasks for signal"
-description: "Build tasks that produce learnable, well-calibrated training signal."
+description: "Design HUD tasks that produce learnable RL signal: within-group reward spread, calibrated difficulty, and graders that resist reward hacking."
 icon: "wave-square"
 ---
 

diff --git a/docs/v6/run/training.mdx b/docs/v6/run/training.mdx
@@ -1,6 +1,6 @@
 ---
 title: "Train on rewards"
-description: "Turn rewarded rollouts into weight updates — on HUD's managed trainer or your own loop."
+description: "Train models on HUD rewards using the managed TrainingClient or your own GRPO or PPO loop, with custom losses and trainable forked model checkpoints."
 icon: "dumbbell"
 ---