diff --git a/api/content-markdown.ts b/api/content-markdown.ts index c50349c..16a1ec3 100644 --- a/api/content-markdown.ts +++ b/api/content-markdown.ts @@ -10,9 +10,10 @@ import { hasContentSlug, hasSolutionSlug, readContentSections, + readCookbookGoal, readCookbookIntro, } from "../src/lib/content-markdown"; -import { joinContentSections } from "../src/lib/content-sections"; +import { goalOnly } from "../src/lib/content-sections"; import { buildCookbookMarkdownDocument } from "../src/lib/cookbook-composition"; import { expandMdxImports } from "../src/lib/expand-mdx"; import { @@ -168,7 +169,7 @@ function readRecipeMarkdown(rootDir: string, slug: string): string { if (!hasContentSlug(rootDir, "recipes", slug)) { throw new Error(`Recipe page not found: "${slug}"`); } - return joinContentSections(readContentSections(rootDir, "recipes", slug)); + return goalOnly(readContentSections(rootDir, "recipes", slug)); } function readExampleMarkdown(rootDir: string, slug: string): string { @@ -176,9 +177,7 @@ function readExampleMarkdown(rootDir: string, slug: string): string { throw new Error(`Example page not found: "${slug}"`); } - const content = joinContentSections( - readContentSections(rootDir, "examples", slug), - ); + const content = goalOnly(readContentSections(rootDir, "examples", slug)); const example = examples.find((e) => e.id === slug); if (!example) { @@ -230,11 +229,14 @@ function readCookbookMarkdown(rootDir: string, slug: string): string { }; }); + const goal = readCookbookGoal(rootDir, slug); return buildCookbookMarkdownDocument({ cookbookName: cookbook.name, cookbookDescription: cookbook.description, - intro: readCookbookIntro(rootDir, slug), + goal, + intro: goal ? undefined : readCookbookIntro(rootDir, slug), recipes: recipeInputs, + mode: "agent", }); } @@ -345,7 +347,7 @@ export function loadAgentPromptParts( intentRecipe: readContent("intent-recipe"), intentCookbook: readContent("intent-cookbook"), intentExample: readContent("intent-example"), - localBootstrap: joinContentSections( + localBootstrap: goalOnly( readContentSections(rootDir, "recipes", LOCAL_BOOTSTRAP_SLUG), ), }; diff --git a/content/cookbooks/ai-chat-app/intro.md b/content/cookbooks/ai-chat-app/goal.md similarity index 98% rename from content/cookbooks/ai-chat-app/intro.md rename to content/cookbooks/ai-chat-app/goal.md index 2c02796..7dd70d1 100644 --- a/content/cookbooks/ai-chat-app/intro.md +++ b/content/cookbooks/ai-chat-app/goal.md @@ -1,5 +1,3 @@ -## What you are building - A streaming AI chat app on Databricks: a user sends a message, the server authenticates with the Databricks CLI profile (or a service-principal token in production), calls an AI Gateway chat endpoint via the OpenAI-compatible provider, and streams the answer back token-by-token. Chat sessions and messages are persisted in Lakebase Postgres so conversations survive page refreshes and redeploys. ### How the steps fit together diff --git a/content/cookbooks/app-with-lakebase/goal.md b/content/cookbooks/app-with-lakebase/goal.md new file mode 100644 index 0000000..3bd7c8f --- /dev/null +++ b/content/cookbooks/app-with-lakebase/goal.md @@ -0,0 +1,6 @@ +A Databricks App with Lakebase Postgres for persistent data storage. The app has schema setup, full CRUD API routes, and deploys to the Databricks Apps platform. + +### Components + +1. **Create a Lakebase Instance** — provision a managed Postgres project with an endpoint and database, and collect the connection values. +2. **Lakebase Data Persistence** — add the Lakebase plugin to your app with schema initialization, CRUD routes, and data access patterns. diff --git a/content/cookbooks/genie-analytics-app/goal.md b/content/cookbooks/genie-analytics-app/goal.md new file mode 100644 index 0000000..8755fcb --- /dev/null +++ b/content/cookbooks/genie-analytics-app/goal.md @@ -0,0 +1,5 @@ +A minimal Databricks App with AI/BI Genie conversational analytics. Users ask natural-language questions about their data and get SQL-powered answers through an embedded Genie chat interface. + +### Components + +1. **Genie Conversational Analytics** — configure a Genie space, wire up the server and client plugins, declare app resources, and deploy. diff --git a/content/cookbooks/lakebase-off-platform/goal.md b/content/cookbooks/lakebase-off-platform/goal.md new file mode 100644 index 0000000..6cbdad1 --- /dev/null +++ b/content/cookbooks/lakebase-off-platform/goal.md @@ -0,0 +1,7 @@ +A connection from an app hosted outside the Databricks Apps platform (for example on AWS, Vercel, or Netlify) to Lakebase Postgres. The app uses portable environment configuration, token management with automatic credential refresh, and Drizzle ORM for type-safe database access. + +### Components + +1. **Lakebase Environment Management** — set up a Zod-validated environment configuration for secure Lakebase connection values. +2. **Lakebase Token Management** — implement token fetch, cache, and automatic refresh for Lakebase Postgres credentials. +3. **Drizzle ORM with Lakebase** — configure a Drizzle ORM pool with auto-refreshing credentials and migration support. diff --git a/content/cookbooks/operational-data-analytics/goal.md b/content/cookbooks/operational-data-analytics/goal.md new file mode 100644 index 0000000..371f320 --- /dev/null +++ b/content/cookbooks/operational-data-analytics/goal.md @@ -0,0 +1,8 @@ +An end-to-end operational data analytics pipeline: data flows from an OLTP database (Lakebase Postgres) through CDC replication into Unity Catalog, gets transformed through a medallion architecture (bronze/silver/gold layers), and is ready for dashboards and downstream consumers. + +### Components + +1. **Unity Catalog Setup** — configure Unity Catalog with external S3 storage for your destination catalog and schema. +2. **Create a Lakebase Instance** — provision a managed Postgres project as the OLTP source. +3. **Lakehouse Sync CDC** — enable change data capture replication from Lakebase tables to Unity Catalog Delta history tables. +4. **Medallion Architecture from CDC** — build silver (current-state) and gold (analytical) layers from the CDC history tables using Lakeflow Declarative Pipelines. diff --git a/content/examples/agentic-support-console/content.md b/content/examples/agentic-support-console/goal.md similarity index 98% rename from content/examples/agentic-support-console/content.md rename to content/examples/agentic-support-console/goal.md index fbe804a..7788369 100644 --- a/content/examples/agentic-support-console/content.md +++ b/content/examples/agentic-support-console/goal.md @@ -1,5 +1,3 @@ -## Agentic Support Console - This template brings together the full Databricks developer stack into a single operational data application: an AI-powered support console where every customer message is automatically triaged by an LLM, and support agents review, approve, or override the suggestion from a purpose-built internal tool. ### Data Flow diff --git a/content/examples/content-moderator/content.md b/content/examples/content-moderator/goal.md similarity index 99% rename from content/examples/content-moderator/content.md rename to content/examples/content-moderator/goal.md index 36eedad..133cab1 100644 --- a/content/examples/content-moderator/content.md +++ b/content/examples/content-moderator/goal.md @@ -1,5 +1,3 @@ -## Content Moderator - This template demonstrates an internal content moderation tool built on Databricks: authors submit content for different channels (company blog, LinkedIn, Twitter, newsletter, press releases), moderators maintain per-channel guidelines, and an LLM scores each submission against those guidelines before a human reviewer makes the final call. ### Data Flow diff --git a/content/examples/inventory-intelligence/content.md b/content/examples/inventory-intelligence/goal.md similarity index 99% rename from content/examples/inventory-intelligence/content.md rename to content/examples/inventory-intelligence/goal.md index e95119f..c261903 100644 --- a/content/examples/inventory-intelligence/content.md +++ b/content/examples/inventory-intelligence/goal.md @@ -1,5 +1,3 @@ -## Inventory Intelligence - This template builds a full retail inventory management system on the Databricks stack: a React app where store managers monitor stock health, review AI-generated replenishment recommendations, and approve purchase orders — all powered by a live medallion pipeline and pluggable demand forecast job. ### Data Flow diff --git a/content/examples/rag-chat/content.md b/content/examples/rag-chat/content.md index e54cf79..3551c04 100644 --- a/content/examples/rag-chat/content.md +++ b/content/examples/rag-chat/content.md @@ -1,36 +1,21 @@ -## RAG Chat App +### 2. Create the Lakebase Postgres prerequisites -This template demonstrates a Retrieval-Augmented Generation chat app built on Databricks: a user question is embedded, similar documents are retrieved from a pgvector store in Lakebase Postgres, and the retrieved context is injected into a Model Serving call that streams the answer back. Conversations and sources are persisted per chat in Lakebase. +The template's AppKit Lakebase plugin requires an existing Postgres **branch** and **database**. `databricks postgres create-project` automatically provisions a default branch named `production` and a default database on it, so one command is all you need. Pick a short lowercase project id and export the resolved resource names — the next step's `databricks apps init` command reads them as shell variables. -### Data Flow +```bash +PROJECT_ID=rag-chat -All retrieval and chat state live in Lakebase Postgres; generation uses AI Gateway: +databricks postgres create-project "$PROJECT_ID" -1. **Seeding** pulls a handful of Wikipedia articles on startup, chunks them by paragraph, embeds each chunk through the AI Gateway embeddings endpoint (`databricks-gte-large-en` by default), and writes rows into `rag.documents` with a `vector(1024)` column. -2. **User turns** are embedded with the same endpoint. The server runs a pgvector cosine-similarity search to retrieve the top-k matching chunks. -3. **Context injection**: the retrieved chunks are prepended as a system message before the user's conversation history is sent to the chat completion endpoint (`databricks-gpt-5-4-mini` by default) via AI Gateway. -4. **Streaming**: `streamText` streams tokens back to the client while an `onFinish` callback appends the assistant turn to Lakebase. -5. **Chat history**: every user and assistant turn is persisted in `chat.messages`, keyed by `chat_id`, so conversations can be resumed. +export BRANCH_NAME="projects/$PROJECT_ID/branches/production" +export DATABASE_NAME=$(databricks api get "/api/2.0/postgres/$BRANCH_NAME/databases" -o json | \ + python3 -c "import json,sys; print(json.load(sys.stdin)['databases'][0]['name'])") -### Template Approach +echo "Branch: $BRANCH_NAME" +echo "Database: $DATABASE_NAME" +``` -Unlike the other templates, **this template is designed to be consumed via `databricks apps init`**, not `git clone`. The init flow: +`create-project` is long-running; the CLI waits for it to finish by default. **If it reports `already exists`:** -- Prompts for the Lakebase Postgres branch and database resource names. -- Auto-resolves `PGHOST`, `PGDATABASE`, and `LAKEBASE_ENDPOINT` into your local `.env` by calling the Lakebase APIs. -- Writes `DATABRICKS_CONFIG_PROFILE` or `DATABRICKS_HOST` based on your Databricks CLI configuration. -- Drops you into a ready-to-run project directory named by `--name`. - -This validates the [AppKit templates system](/docs/appkit/v0/development/templates) as a way to ship DevHub templates — see `appkit.plugins.json` and `.env.tmpl` in the template for how it works. - -### What to Adapt - -Setup and provisioning are documented in the repository's **`template/README.md`**. - -To make this template your own: - -- **Lakebase**: Point the bundle at your own Lakebase project, branch, and database (prompted at init time). -- **Model Serving endpoint**: Override `DATABRICKS_ENDPOINT` for a different chat model (e.g. `databricks-claude-sonnet-4`). -- **Embeddings endpoint**: Override `DATABRICKS_EMBEDDING_ENDPOINT` if you want a different embedding model. Make sure the `vector(N)` dimension in `server/lib/rag-store.ts` matches. -- **Seed data**: Replace the Wikipedia article list in `server/lib/seed-data.ts` with your own corpus. The chunking function splits on paragraph boundaries — adapt if your source has different structure. -- **Retrieval**: The default top-k is 5 and the similarity metric is cosine. Tune in `retrieveSimilar()`. +- **Prefer picking a different `PROJECT_ID`** (e.g. append a short suffix) and re-export `BRANCH_NAME` / `DATABASE_NAME` from the new id. Lakebase projects can hold data that other apps and pipelines depend on, so do **not** run `databricks postgres delete-project` on an existing project without explicit confirmation from the user that nothing else uses it. +- **Eventual-consistency exception:** if you just deleted a project with this id in the same session and `databricks postgres list-projects` no longer shows it, wait 30–60s and retry `create-project` — the control plane is briefly inconsistent after deletion. diff --git a/content/examples/rag-chat/deployment.md b/content/examples/rag-chat/deployment.md deleted file mode 100644 index 7e63841..0000000 --- a/content/examples/rag-chat/deployment.md +++ /dev/null @@ -1,25 +0,0 @@ -### 4. Install and deploy - -`databricks apps init` already wrote `.env` with the resolved Lakebase connection details. For a deploy-only flow you can go straight to deploy — `DATABRICKS_WORKSPACE_ID` and the Lakebase variables are auto-injected into the deployed runtime from `app.yaml` and the bound `postgres` resource. - -```bash -cd rag-chat-app -npm install -npm run deploy -``` - -`npm run deploy` wraps three steps: hydrate the bundle variable overrides from `.env` + the Lakebase Postgres API (`scripts/sync-bundle-vars.mjs`), `databricks bundle deploy` (creates the Databricks app on first run), and `databricks bundle run app` (starts it and prints the URL). - -#### Optional — run locally before deploying - -Local `npm run dev` needs `DATABRICKS_WORKSPACE_ID` (the **numeric** id used to build the AI Gateway URL) in `.env`. In the deployed app this is auto-injected; locally you have to fetch and patch it yourself: - -```bash -WORKSPACE_ID=$(databricks api get /api/2.1/unity-catalog/current-metastore-assignment \ - | python3 -c "import json,sys;print(json.load(sys.stdin)['workspace_id'])") -sed -i.bak "s/^DATABRICKS_WORKSPACE_ID=.*/DATABRICKS_WORKSPACE_ID=$WORKSPACE_ID/" .env && rm .env.bak - -npm run dev -``` - -(Optionally override `DATABRICKS_ENDPOINT` / `DATABRICKS_EMBEDDING_ENDPOINT` in `.env` if you want different chat / embeddings endpoints — also applies to deploy via `app.yaml`.) diff --git a/content/examples/rag-chat/goal.md b/content/examples/rag-chat/goal.md new file mode 100644 index 0000000..88fb0e9 --- /dev/null +++ b/content/examples/rag-chat/goal.md @@ -0,0 +1,34 @@ +This template demonstrates a Retrieval-Augmented Generation chat app built on Databricks: a user question is embedded, similar documents are retrieved from a pgvector store in Lakebase Postgres, and the retrieved context is injected into a Model Serving call that streams the answer back. Conversations and sources are persisted per chat in Lakebase. + +### Data Flow + +All retrieval and chat state live in Lakebase Postgres; generation uses AI Gateway: + +1. **Seeding** pulls a handful of Wikipedia articles on startup, chunks them by paragraph, embeds each chunk through the AI Gateway embeddings endpoint (`databricks-gte-large-en` by default), and writes rows into `rag.documents` with a `vector(1024)` column. +2. **User turns** are embedded with the same endpoint. The server runs a pgvector cosine-similarity search to retrieve the top-k matching chunks. +3. **Context injection**: the retrieved chunks are prepended as a system message before the user's conversation history is sent to the chat completion endpoint (`databricks-gpt-5-4-mini` by default) via AI Gateway. +4. **Streaming**: `streamText` streams tokens back to the client while an `onFinish` callback appends the assistant turn to Lakebase. +5. **Chat history**: every user and assistant turn is persisted in `chat.messages`, keyed by `chat_id`, so conversations can be resumed. + +### Template Approach + +Unlike the other templates, **this template is designed to be consumed via `databricks apps init`**, not `git clone`. The init flow: + +- Prompts for the Lakebase Postgres branch and database resource names. +- Auto-resolves `PGHOST`, `PGDATABASE`, and `LAKEBASE_ENDPOINT` into your local `.env` by calling the Lakebase APIs. +- Writes `DATABRICKS_CONFIG_PROFILE` or `DATABRICKS_HOST` based on your Databricks CLI configuration. +- Drops you into a ready-to-run project directory named by `--name`. + +This validates the [AppKit templates system](/docs/appkit/v0/development/templates) as a way to ship DevHub templates — see `appkit.plugins.json` and `.env.tmpl` in the template for how it works. + +### What to Adapt + +Setup and provisioning are documented in the repository's **`template/README.md`**. + +To make this template your own: + +- **Lakebase**: Point the bundle at your own Lakebase project, branch, and database (prompted at init time). +- **Model Serving endpoint**: Override `DATABRICKS_ENDPOINT` for a different chat model (e.g. `databricks-claude-sonnet-4`). +- **Embeddings endpoint**: Override `DATABRICKS_EMBEDDING_ENDPOINT` if you want a different embedding model. Make sure the `vector(N)` dimension in `server/lib/rag-store.ts` matches. +- **Seed data**: Replace the Wikipedia article list in `server/lib/seed-data.ts` with your own corpus. The chunking function splits on paragraph boundaries — adapt if your source has different structure. +- **Retrieval**: The default top-k is 5 and the similarity metric is cosine. Tune in `retrieveSimilar()`. diff --git a/content/examples/rag-chat/prerequisites.md b/content/examples/rag-chat/prerequisites.md deleted file mode 100644 index 3551c04..0000000 --- a/content/examples/rag-chat/prerequisites.md +++ /dev/null @@ -1,21 +0,0 @@ -### 2. Create the Lakebase Postgres prerequisites - -The template's AppKit Lakebase plugin requires an existing Postgres **branch** and **database**. `databricks postgres create-project` automatically provisions a default branch named `production` and a default database on it, so one command is all you need. Pick a short lowercase project id and export the resolved resource names — the next step's `databricks apps init` command reads them as shell variables. - -```bash -PROJECT_ID=rag-chat - -databricks postgres create-project "$PROJECT_ID" - -export BRANCH_NAME="projects/$PROJECT_ID/branches/production" -export DATABASE_NAME=$(databricks api get "/api/2.0/postgres/$BRANCH_NAME/databases" -o json | \ - python3 -c "import json,sys; print(json.load(sys.stdin)['databases'][0]['name'])") - -echo "Branch: $BRANCH_NAME" -echo "Database: $DATABASE_NAME" -``` - -`create-project` is long-running; the CLI waits for it to finish by default. **If it reports `already exists`:** - -- **Prefer picking a different `PROJECT_ID`** (e.g. append a short suffix) and re-export `BRANCH_NAME` / `DATABASE_NAME` from the new id. Lakebase projects can hold data that other apps and pipelines depend on, so do **not** run `databricks postgres delete-project` on an existing project without explicit confirmation from the user that nothing else uses it. -- **Eventual-consistency exception:** if you just deleted a project with this id in the same session and `databricks postgres list-projects` no longer shows it, wait 30–60s and retry `create-project` — the control plane is briefly inconsistent after deletion. diff --git a/content/examples/saas-tracker/content.md b/content/examples/saas-tracker/goal.md similarity index 98% rename from content/examples/saas-tracker/content.md rename to content/examples/saas-tracker/goal.md index 9a57829..8451358 100644 --- a/content/examples/saas-tracker/content.md +++ b/content/examples/saas-tracker/goal.md @@ -1,5 +1,3 @@ -## SaaS Subscription Tracker - This template demonstrates a straightforward internal CRUD tool built on Databricks: a SaaS subscription tracker where teams log the tools they use, who owns each subscription, what it costs, and when it renews. A Genie space provides self-serve analytics over the subscription data. ### Data Flow diff --git a/content/examples/vacation-rentals/content.md b/content/examples/vacation-rentals/goal.md similarity index 98% rename from content/examples/vacation-rentals/content.md rename to content/examples/vacation-rentals/goal.md index 9e045a3..c91bb73 100644 --- a/content/examples/vacation-rentals/content.md +++ b/content/examples/vacation-rentals/goal.md @@ -1,5 +1,3 @@ -## Vacation Rentals Operations Console - This template demonstrates an internal operations console for a vacation rentals platform ("Wanderbricks"). Operators see revenue performance by destination, work through a booking queue with per-booking flags and agent notes, and ask natural-language questions about the business through an embedded Genie chat panel. ### Data Flow diff --git a/content/intent-cookbook.md b/content/intent-cookbook.md index 8225735..eda0e71 100644 --- a/content/intent-cookbook.md +++ b/content/intent-cookbook.md @@ -2,22 +2,23 @@ The user copied the prompt for a DevHub **cookbook** — **{{name}}** ({{url}}). -A cookbook is a step-by-step pattern guide that walks the user through building an **archetype application** end-to-end on Databricks. Cookbooks are composed from multiple recipes — they show how the recipes fit together into a working app (e.g. an AI chat app with persistence, a Lakebase-backed CRUD app, a RAG chat app). The cookbook is the recommended starting point when the user wants the whole archetype, not just one piece. +A cookbook is a composed pattern that builds an **archetype application** end-to-end on Databricks from multiple recipe goals. The cookbook goal below describes the overall app and its components. Your installed Databricks agent skills contain the implementation patterns for each component. + +Use the cookbook goal for scope and architecture; use the skills for implementation. Your job in this conversation is to: 1. Clarify the user's **goal for this archetype** — production app, learning project, or demo. 2. Verify the local Databricks dev environment is ready (block below). -3. Walk the user through the cookbook section by section, asking the questions each section surfaces, and stitching the included recipes together coherently. -4. When the cookbook content and your installed Databricks agent skills cover the same topic, **treat the skills as the source of truth** for implementation patterns, CLI commands, and code. The cookbook provides context and scope; the skills provide the authoritative how-to. +3. Use the component goals to understand scope, then **use your installed Databricks agent skills** to implement each component step by step. ## Step 1 — Clarify intent before touching code Ask **one** question, ideally with a multiple-choice tool: -- **New project from scratch** following this archetype end-to-end. → Run the local-bootstrap below, then scaffold a fresh project and walk through the cookbook step by step. +- **New project from scratch** following this archetype end-to-end. → Run the local-bootstrap below, then scaffold a fresh project and work through each component. - **Add this archetype to an existing Databricks app**. → Read the user's existing project first; introduce the archetype's pieces incrementally without breaking what's there. -- **Just learning the pattern**: the user wants to understand the archetype before deciding to build it. → Walk through the steps as a guided tour; do not execute commands. +- **Just learning the pattern**: the user wants to understand the archetype before deciding to build it. → Walk through the component goals as a guided tour; do not execute commands. - **Not sure — help me decide**: ask follow-ups about the user's end goal (who uses the app, what data, deployed where) and map back to one of the above. ## Step 2 — Pin down archetype-specific decisions @@ -31,6 +32,6 @@ Cookbooks compose multiple Databricks primitives — Lakebase, Agent Bricks, Mod ## Step 3 — Verify the local Databricks dev environment -Cookbooks run multiple `databricks` and AppKit CLI commands across their steps; a misconfigured CLI profile fails immediately and looks like a cookbook bug. **Walk the user through the local-bootstrap block below first**, even if they say their environment is already set up. +Cookbooks run multiple CLI and AppKit commands across their components; a misconfigured CLI profile fails immediately and looks like a cookbook bug. **Walk the user through the local-bootstrap block below first**, even if they say their environment is already set up. -The full cookbook content the user is focused on is attached after the local-bootstrap block. +The cookbook goal and component goals are attached after the local-bootstrap block. diff --git a/content/intent-recipe.md b/content/intent-recipe.md index 145cadf..3127a13 100644 --- a/content/intent-recipe.md +++ b/content/intent-recipe.md @@ -4,25 +4,26 @@ The user copied the prompt for a DevHub **recipe** — **{{name}}** ({{url}}). A recipe is a focused, opinionated how-to for a single Databricks pattern (e.g. wiring Lakebase Change Data Feed, creating a Model Serving endpoint, persisting chat history). Recipes are designed to be dropped into an existing project or composed into a larger build. They are deliberately narrow — they solve one thing well. +The recipe goal below describes **what** the user wants to build — the outcome and key decisions. Your installed Databricks agent skills contain the **how** — the implementation patterns, CLI commands, and code. Use the goal for scope and context; use the skills for implementation. + Your job in this conversation is to: 1. Clarify whether the user is **integrating this recipe into an existing project** or **starting fresh from scratch**, and adapt accordingly. 2. Verify the local Databricks dev environment is ready (block below). -3. Walk the user through the recipe step by step, asking the questions the recipe itself surfaces. -4. When the recipe content and your installed Databricks agent skills cover the same topic, **treat the skills as the source of truth** for implementation patterns, CLI commands, and code. The recipe provides context and scope; the skills provide the authoritative how-to. +3. Use the recipe goal to understand scope, then **use your installed Databricks agent skills** to implement it step by step. ## Step 1 — Clarify intent before touching code Ask **one** question, ideally with a multiple-choice tool (see guidelines): -- **Existing project**: the user already has a Databricks app / repo and wants to add this pattern to it. → Read the user's existing project structure first; the recipe steps will be applied surgically. -- **New project from this recipe**: the user wants this recipe as the starting point of a new app. → Run the local-bootstrap below first, then follow the recipe. -- **Just learning**: the user wants to read through the recipe and understand it without building anything yet. → Walk through the steps as a tutorial; do not execute commands. +- **Existing project**: the user already has a Databricks app / repo and wants to add this pattern to it. → Read the user's existing project structure first; apply the pattern surgically. +- **New project from this recipe**: the user wants this recipe as the starting point of a new app. → Run the local-bootstrap below first, then implement. +- **Just learning**: the user wants to understand the pattern without building anything yet. → Walk through the goal and explain what each piece does. - **Not sure — help me decide**: ask the user what they're trying to accomplish at the project level, then map back to one of the above. ## Step 2 — Pin down recipe-specific decisions -Once the integration mode is clear, ask any follow-ups the recipe itself surfaces — typically about which Databricks resources to use: +Once the integration mode is clear, ask any follow-ups — typically about which Databricks resources to use: - Should we **create new resources** (catalog, schema, Lakebase instance, serving endpoint) or **reuse existing ones** the user already has? Never assume; always ask. - Which **Databricks profile** should the CLI commands target? (`databricks auth profiles` to list valid profiles.) @@ -30,6 +31,6 @@ Once the integration mode is clear, ask any follow-ups the recipe itself surface ## Step 3 — Verify the local Databricks dev environment -Whether integrating or starting fresh, the recipe's commands assume a working Databricks CLI profile and (for app-related recipes) an AppKit project. **Walk the user through the local-bootstrap block below before running any recipe commands** — even if they think the environment is already set up, the verification steps are quick and prevent confusing failures downstream. +Whether integrating or starting fresh, your skills' commands assume a working Databricks CLI profile and (for app-related recipes) an AppKit project. **Walk the user through the local-bootstrap block below before running any commands** — even if they think the environment is already set up. -The full recipe content the user is focused on is attached after the local-bootstrap block. +The recipe goal the user is focused on is attached after the local-bootstrap block. diff --git a/content/recipes/ai-chat-model-serving/goal.md b/content/recipes/ai-chat-model-serving/goal.md new file mode 100644 index 0000000..fd5b08e --- /dev/null +++ b/content/recipes/ai-chat-model-serving/goal.md @@ -0,0 +1,8 @@ +Build a streaming AI chat experience in a Databricks App using Vercel AI SDK with Databricks Model Serving and OpenAI-compatible endpoints. + +When done, you will have: + +- A real-time streaming chat interface in your Databricks App +- Integration with Databricks Model Serving via AI Gateway +- Server-side chat transport and client-side chat UI wired together +- A deployed app where users can converse with a Databricks-hosted LLM diff --git a/content/recipes/embeddings-generation/goal.md b/content/recipes/embeddings-generation/goal.md new file mode 100644 index 0000000..f59d640 --- /dev/null +++ b/content/recipes/embeddings-generation/goal.md @@ -0,0 +1,7 @@ +Generate text embeddings from a Databricks AI Gateway endpoint using the Databricks SDK. + +When done, you will have: + +- A configured embedding endpoint in your app environment +- A reusable helper function that generates vector embeddings from text input +- Integration with your existing Databricks workspace client via AppKit diff --git a/content/recipes/foundation-models-api/goal.md b/content/recipes/foundation-models-api/goal.md new file mode 100644 index 0000000..0068164 --- /dev/null +++ b/content/recipes/foundation-models-api/goal.md @@ -0,0 +1,7 @@ +Access Databricks foundation models through AI Gateway endpoints with built-in governance, monitoring, and streaming support. + +When done, you will have: + +- A configured AI Gateway endpoint for chat inference in your app +- A query helper using the Databricks SDK for standard request/response interactions +- An optional streaming setup using the Vercel AI SDK for real-time chat responses diff --git a/content/recipes/genie-conversational-analytics/content.md b/content/recipes/genie-conversational-analytics/content.md index 2ecc8d8..79ee0c9 100644 --- a/content/recipes/genie-conversational-analytics/content.md +++ b/content/recipes/genie-conversational-analytics/content.md @@ -53,27 +53,33 @@ databricks apps init \ **Warning: Fix generated `databricks.yml` before deploying** -The scaffold generates a `genie_space_name` variable and references it as `name: ${var.genie_space_name}`, but never assigns a value. `bundle deploy` will fail with _no value assigned to required variable genie_space_name_. +The scaffold generates a `genie_space_name` variable and references it as `name: ${var.genie_space_name}`, but never assigns a value in `targets:`. `bundle deploy` will fail with _no value assigned to required variable genie_space_name_. -Your `variables:` block should look like this after the fix — only `genie_space_id`, no `genie_space_name`: +Assign both variables in your `targets:` block: ```yaml variables: genie_space_id: - description: Default Genie Space ID + description: Genie Space ID + genie_space_name: + description: Genie Space name + +targets: + default: + variables: + genie_space_id: + genie_space_name: ``` -And the `genie_space` resource block should use a hardcoded label: +The `genie_space` resource block should reference both variables: ```yaml genie_space: - name: genie-space + name: ${var.genie_space_name} space_id: ${var.genie_space_id} permission: CAN_RUN ``` -The `name: genie-space` is an internal label used by `app.yaml` (`valueFrom: genie-space`), not the Genie space display title. - Skip to step 8 to deploy. --- diff --git a/content/recipes/genie-conversational-analytics/goal.md b/content/recipes/genie-conversational-analytics/goal.md new file mode 100644 index 0000000..2ea9039 --- /dev/null +++ b/content/recipes/genie-conversational-analytics/goal.md @@ -0,0 +1,8 @@ +Embed a Databricks AI/BI Genie chat interface in your app so users can explore data through natural language. + +When done, you will have: + +- A configured AI/BI Genie space connected to your data tables +- A Databricks App with an embedded Genie chat interface +- Server and client plugins wired together with proper app resource declarations +- A deployed app where users can ask questions about their data in plain language diff --git a/content/recipes/genie-multi-space/goal.md b/content/recipes/genie-multi-space/goal.md new file mode 100644 index 0000000..f3efa3c --- /dev/null +++ b/content/recipes/genie-multi-space/goal.md @@ -0,0 +1,8 @@ +Upgrade a single-space Genie app to let users switch between multiple AI/BI Genie spaces from a dropdown. + +When done, you will have: + +- Multiple Genie spaces accessible from a single app +- A dropdown selector for switching between named Genie spaces +- Automatic conversation state cleanup when switching spaces +- Server and client configuration supporting multiple space aliases diff --git a/content/recipes/lakebase-agent-memory/goal.md b/content/recipes/lakebase-agent-memory/goal.md new file mode 100644 index 0000000..e4ab43f --- /dev/null +++ b/content/recipes/lakebase-agent-memory/goal.md @@ -0,0 +1,8 @@ +Persist AI agent chat conversations to Lakebase so users can resume sessions, view full message history, and let the agent reason over previous turns across requests and deploys. + +When done, you will have: + +- A relational schema (chats and messages tables) in Lakebase for storing conversations +- Durable persistence of every chat turn: user input, assistant replies, and tool calls +- An app where users can return to previous chat sessions and continue where they left off +- Agent memory that survives restarts, deploys, and machine changes diff --git a/content/recipes/lakebase-change-data-feed-autoscaling/goal.md b/content/recipes/lakebase-change-data-feed-autoscaling/goal.md new file mode 100644 index 0000000..4163344 --- /dev/null +++ b/content/recipes/lakebase-change-data-feed-autoscaling/goal.md @@ -0,0 +1,7 @@ +Replicate Lakebase Autoscaling Postgres tables into Unity Catalog as managed Delta tables using Lakehouse Sync, capturing every row-level change as SCD Type 2 history. + +When done, you will have: + +- Delta history tables in Unity Catalog with full change tracking for every insert, update, and delete +- Continuous CDC replication from Lakebase Autoscaling Postgres to the lakehouse with no external compute +- Operational data queryable in Spark SQL, notebooks, BI tools, and downstream pipelines diff --git a/content/recipes/lakebase-create-instance/goal.md b/content/recipes/lakebase-create-instance/goal.md new file mode 100644 index 0000000..23701be --- /dev/null +++ b/content/recipes/lakebase-create-instance/goal.md @@ -0,0 +1,7 @@ +Provision a managed Lakebase Postgres project on Databricks and collect the connection values needed by downstream recipes. + +When done, you will have: + +- A managed Postgres cluster running in your Databricks workspace +- A production branch with an active endpoint and default database +- Connection values (host, endpoint path, database path, database name) ready for use in other Lakebase recipes diff --git a/content/recipes/lakebase-data-persistence/content.md b/content/recipes/lakebase-data-persistence/content.md index 9bb5f3f..4c06bdc 100644 --- a/content/recipes/lakebase-data-persistence/content.md +++ b/content/recipes/lakebase-data-persistence/content.md @@ -9,21 +9,18 @@ The code examples below use a generic `items` resource as a placeholder. Replace ### 1. New app: scaffold with the Lakebase feature ```bash -databricks apps init \ - --name \ - --version latest \ - --features=lakebase \ - --set 'lakebase.postgres.branch=projects//branches/production' \ - --set 'lakebase.postgres.database=projects//branches/production/databases/' \ - --set 'lakebase.postgres.databaseName=' \ - --set 'lakebase.postgres.endpointPath=projects//branches/production/endpoints/primary' \ - --set 'lakebase.postgres.host=' \ - --set 'lakebase.postgres.port=5432' \ - --set 'lakebase.postgres.sslmode=require' \ +databricks apps init --name --features lakebase \ + --set "lakebase.postgres.branch=" \ + --set "lakebase.postgres.database=" \ --run none --profile ``` -Use the values returned by `list-databases` and `list-endpoints`. The generated template currently requires all postgres fields together during non-interactive scaffolding. +Where `` is the full branch resource name (e.g. `projects//branches/`) and `` is the full database resource name (e.g. `projects//branches//databases/`). Get these from: + +```bash +databricks postgres list-branches projects/ --profile +databricks postgres list-databases projects//branches/ --profile +``` This scaffolds a complete app with Lakebase already wired up, including a sample CRUD app. Skip to step 3 to configure environment variables, then step 5 to deploy. @@ -37,7 +34,7 @@ The scaffolded Lakebase sample uses `lakebase` in route names and file paths to ### 2. Existing app: add Lakebase manually -The following changes match what `apps init --features=lakebase` generates. Apply them to an existing scaffolded AppKit app. +The following changes match what `apps init --features lakebase` generates. Apply them to an existing scaffolded AppKit app. > **Tip:** The code below may be outdated. To get the latest, clone `https://github.com/databricks/appkit` and look in the `template/` directory. Search for `{{if .plugins.lakebase}}` to find all lakebase-conditional files and blocks. Files entirely wrapped in that conditional are lakebase-only; shared files like `App.tsx` and `server.ts` contain conditional blocks you can extract. diff --git a/content/recipes/lakebase-data-persistence/goal.md b/content/recipes/lakebase-data-persistence/goal.md new file mode 100644 index 0000000..42cfe94 --- /dev/null +++ b/content/recipes/lakebase-data-persistence/goal.md @@ -0,0 +1,8 @@ +Add a managed Postgres database to your Databricks App using the Lakebase plugin, with schema setup, table creation, and full CRUD REST API routes. + +When done, you will have: + +- A Databricks App connected to a Lakebase Postgres database +- Database schema and tables for your domain entities +- Working CRUD API routes (create, read, update, delete) backed by Lakebase +- A deployed app with persistent data storage diff --git a/content/recipes/lakebase-drizzle-off-platform/content.md b/content/recipes/lakebase-drizzle-off-platform/content.md index ef15bb1..2729e25 100644 --- a/content/recipes/lakebase-drizzle-off-platform/content.md +++ b/content/recipes/lakebase-drizzle-off-platform/content.md @@ -1,51 +1,31 @@ ## Drizzle ORM with Lakebase in an Off-Platform App -Connect Drizzle ORM to Lakebase in any Node.js server outside Databricks App Platform. Uses a `pg` Pool with a password callback for automatic credential refresh. +Connect Drizzle ORM to Lakebase in any Node.js server outside Databricks App Platform. Uses the `@databricks/lakebase` package for automatic OAuth token refresh. -### 1. Install Drizzle and the node-postgres driver +### 1. Install Drizzle and the Lakebase package ```bash -npm install drizzle-orm pg -npm install -D drizzle-kit @types/pg tsx +npm install drizzle-orm @databricks/lakebase +npm install -D drizzle-kit tsx ``` `drizzle-orm` and `drizzle-kit` must be on the same major version. If `drizzle-kit` errors with "This version of drizzle-kit is outdated," check that both packages share the same major (e.g. both 0.x or both 1.x). -### 2. Create a Lakebase-backed `pg` pool +### 2. Create a Lakebase-backed pool and Drizzle client -Create `src/lib/db/pool.ts`: +Create `src/lib/db/client.ts`. `createLakebasePool()` reads env vars automatically (`PGHOST`, `PGDATABASE`, `LAKEBASE_ENDPOINT`, `PGUSER`, etc.) and handles OAuth token refresh with a 2-minute buffer: ```typescript -import { Pool, type PoolConfig } from "pg"; -import { env } from "@/lib/env"; -import { getLakebasePostgresToken } from "@/lib/lakebase/tokens"; - -function sslConfig(mode: "require" | "prefer" | "disable"): PoolConfig["ssl"] { - switch (mode) { - case "require": - return { rejectUnauthorized: true }; - case "prefer": - return { rejectUnauthorized: false }; - case "disable": - return false; - } -} +import { drizzle } from "drizzle-orm/node-postgres"; +import { createLakebasePool } from "@databricks/lakebase"; +import * as itemsSchema from "@/lib/items/schema"; -export function createLakebasePool(): Pool { - return new Pool({ - host: env.PGHOST, - port: env.PGPORT, - database: env.PGDATABASE, - user: env.PGUSER, - password: () => getLakebasePostgresToken(), - ssl: sslConfig(env.PGSSLMODE), - max: 10, - idleTimeoutMillis: 30_000, - connectionTimeoutMillis: 10_000, - }); -} +const pool = createLakebasePool(); +export const db = drizzle({ client: pool, schema: { ...itemsSchema } }); ``` +> `@databricks/lakebase` is for **Lakebase Autoscaling only** (not compatible with Provisioned). See the manual alternative at the end if you need Provisioned support. + ### 3. Define a Drizzle schema Create `src/lib/items/schema.ts` with a starter table. Adapt the table name, columns, and types to your domain (e.g. `products`, `orders`, `users`): @@ -64,53 +44,25 @@ export const items = pgTable("items", { Add more schema files under `src/lib//schema.ts` as your app grows. The `drizzle.config.ts` glob (`./src/lib/*/schema.ts`) picks them all up automatically. -### 4. Initialize Drizzle with the pool +### 4. Write the migration script -Create `src/lib/db/client.ts`. Import every domain schema and spread it into the `schema` option: +Create `scripts/db-migrate.ts`. This uses the same `createLakebasePool()` with automatic credential handling — no need to build a temporary `DATABASE_URL`: ```typescript import { drizzle } from "drizzle-orm/node-postgres"; -import { createLakebasePool } from "@/lib/db/pool"; -import * as itemsSchema from "@/lib/items/schema"; +import { migrate } from "drizzle-orm/node-postgres/migrator"; +import { createLakebasePool } from "@databricks/lakebase"; const pool = createLakebasePool(); -export const db = drizzle({ client: pool, schema: { ...itemsSchema } }); +const db = drizzle({ client: pool }); +await migrate(db, { migrationsFolder: "./src/lib/db/migrations" }); +await pool.end(); +console.log("Migrations applied successfully"); ``` -### 5. Handle drizzle-kit migrations with a temporary `DATABASE_URL` +### 5. Keep `drizzle.config.ts` minimal -`drizzle-kit` needs a connection string and cannot use `pg` password callbacks. Build a one-time URL with a fresh Lakebase credential in `scripts/db-migrate.ts`: - -```typescript -import { execSync } from "node:child_process"; -import { env } from "@/lib/env"; -import { getLakebasePostgresToken } from "@/lib/lakebase/tokens"; - -async function runMigrations() { - const token = await getLakebasePostgresToken(); - const encodedUser = encodeURIComponent(env.PGUSER); - const encodedPassword = encodeURIComponent(token); - - const databaseUrl = - `postgresql://${encodedUser}:${encodedPassword}` + - `@${env.PGHOST}:${env.PGPORT}/${env.PGDATABASE}` + - `?sslmode=${env.PGSSLMODE}`; - - execSync("npx drizzle-kit migrate", { - stdio: "inherit", - env: { ...process.env, DATABASE_URL: databaseUrl }, - }); -} - -runMigrations().catch((error) => { - console.error(error); - process.exit(1); -}); -``` - -### 6. Keep `drizzle.config.ts` minimal - -Lakebase Postgres passwords are short-lived tokens, so there is no static `DATABASE_URL` to store in `.env`. The migration script from step 5 builds a temporary URL with a fresh credential and passes it as `DATABASE_URL` when it shells out to `drizzle-kit migrate`. Commands like `generate` only read schema files and never connect, so `dbCredentials` is optional: +Commands like `generate` only read schema files and never connect, so no `dbCredentials` are needed: ```typescript import { defineConfig } from "drizzle-kit"; @@ -119,13 +71,10 @@ export default defineConfig({ schema: "./src/lib/*/schema.ts", out: "./src/lib/db/migrations", dialect: "postgresql", - ...(process.env.DATABASE_URL && { - dbCredentials: { url: process.env.DATABASE_URL }, - }), }); ``` -### 7. Verify schema generation and migration +### 6. Verify schema generation and migration Generate reads schema files locally (no database connection): @@ -143,7 +92,50 @@ npx dotenv -e .env.local -- npx tsx scripts/db-migrate.ts If both commands succeed, your Drizzle schema and Lakebase connection are working. +### Manual alternative (Provisioned or full control) + +If you cannot use `@databricks/lakebase` (e.g. Lakebase Provisioned, or you need full control over SSL and token refresh), build a manual `pg.Pool` with a password callback: + +```bash +npm install drizzle-orm pg +npm install -D drizzle-kit @types/pg tsx +``` + +```typescript +import { Pool, type PoolConfig } from "pg"; +import { env } from "@/lib/env"; +import { getLakebasePostgresToken } from "@/lib/lakebase/tokens"; + +function sslConfig(mode: "require" | "prefer" | "disable"): PoolConfig["ssl"] { + switch (mode) { + case "require": + return { rejectUnauthorized: true }; + case "prefer": + return { rejectUnauthorized: false }; + case "disable": + return false; + } +} + +export function createLakebasePool(): Pool { + return new Pool({ + host: env.PGHOST, + port: env.PGPORT, + database: env.PGDATABASE, + user: env.PGUSER, + password: () => getLakebasePostgresToken(), + ssl: sslConfig(env.PGSSLMODE), + max: 10, + idleTimeoutMillis: 30_000, + connectionTimeoutMillis: 10_000, + }); +} +``` + +For the migration script with this approach, build a temporary `DATABASE_URL` with a fresh credential and pass it to `drizzle-kit migrate` via `execSync`. + #### References - [Drizzle ORM with PostgreSQL](https://orm.drizzle.team/docs/get-started-postgresql) +- [`@databricks/lakebase` README](https://github.com/databricks/appkit/tree/main/packages/lakebase) - [Lakebase credentials API](https://docs.databricks.com/api/workspace/postgres/credentials) diff --git a/content/recipes/lakebase-drizzle-off-platform/goal.md b/content/recipes/lakebase-drizzle-off-platform/goal.md new file mode 100644 index 0000000..97c6ef0 --- /dev/null +++ b/content/recipes/lakebase-drizzle-off-platform/goal.md @@ -0,0 +1,8 @@ +Connect Drizzle ORM to Lakebase in any Node.js server outside Databricks App Platform, with automatic credential refresh and migration support. + +When done, you will have: + +- A Lakebase-backed connection pool with automatic token refresh via password callback +- Drizzle ORM initialized with your schema and ready for type-safe queries +- A migration script that builds a temporary connection URL with fresh Lakebase credentials +- A working Drizzle Kit configuration for schema generation and migrations diff --git a/content/recipes/lakebase-off-platform-env-management/goal.md b/content/recipes/lakebase-off-platform-env-management/goal.md new file mode 100644 index 0000000..a88f62c --- /dev/null +++ b/content/recipes/lakebase-off-platform-env-management/goal.md @@ -0,0 +1,8 @@ +Define and validate the environment variables needed to connect to Lakebase from apps deployed outside Databricks App Platform. + +When done, you will have: + +- All Lakebase connection values collected from the Databricks CLI +- A Zod-based environment validation module that fails fast on missing or invalid variables +- Support for both token auth (local dev) and M2M OAuth (production) +- An `.env.example` file documenting every required variable for your team and CI diff --git a/content/recipes/lakebase-pgvector/goal.md b/content/recipes/lakebase-pgvector/goal.md new file mode 100644 index 0000000..13f7507 --- /dev/null +++ b/content/recipes/lakebase-pgvector/goal.md @@ -0,0 +1,8 @@ +Enable vector similarity search in your Lakebase Postgres database using the pgvector extension, with a server-side module for storing and querying embeddings. + +When done, you will have: + +- The pgvector extension enabled on your Lakebase instance +- A vector embedding table with configurable dimensions +- Server-side functions for inserting documents and performing similarity search +- An IVFFlat or HNSW index for efficient nearest-neighbor queries diff --git a/content/recipes/lakebase-token-management/goal.md b/content/recipes/lakebase-token-management/goal.md new file mode 100644 index 0000000..c58a646 --- /dev/null +++ b/content/recipes/lakebase-token-management/goal.md @@ -0,0 +1,7 @@ +Fetch, cache, and automatically refresh the short-lived Postgres credentials that Lakebase requires, supporting both token auth and M2M OAuth. + +When done, you will have: + +- A token manager that fetches and caches Lakebase Postgres credentials with automatic refresh before expiry +- Support for direct token auth (local development) and M2M OAuth (production) +- An optional local dev script for refreshing your workspace token in your env file diff --git a/content/recipes/medallion-architecture-from-cdc/goal.md b/content/recipes/medallion-architecture-from-cdc/goal.md new file mode 100644 index 0000000..00e566b --- /dev/null +++ b/content/recipes/medallion-architecture-from-cdc/goal.md @@ -0,0 +1,9 @@ +Transform Lakehouse Sync CDC history tables into a layered medallion architecture with bronze, silver, and gold layers using Lakeflow Declarative Pipelines. + +When done, you will have: + +- A bronze layer provided by the upstream Lakehouse Sync CDC history tables (input to this recipe) +- A silver layer with deduplicated, current-state materialized views for each entity +- A gold layer with business aggregations and metrics as materialized views +- A scheduled Lakeflow Declarative Pipeline refreshing silver and gold layers incrementally +- All layers queryable as Unity Catalog tables via SQL, Spark, BI tools, and Genie diff --git a/content/recipes/model-serving-endpoint-creation/goal.md b/content/recipes/model-serving-endpoint-creation/goal.md new file mode 100644 index 0000000..3bb0135 --- /dev/null +++ b/content/recipes/model-serving-endpoint-creation/goal.md @@ -0,0 +1,7 @@ +Provision and validate a Databricks Model Serving endpoint for AI chat inference. + +When done, you will have: + +- A model serving endpoint running in your Databricks workspace +- The endpoint tested and confirmed ready for inference requests +- The endpoint name configured in your app for local development and deployment diff --git a/content/recipes/onboard-your-coding-agent/goal.md b/content/recipes/onboard-your-coding-agent/goal.md new file mode 100644 index 0000000..3a94622 --- /dev/null +++ b/content/recipes/onboard-your-coding-agent/goal.md @@ -0,0 +1,8 @@ +Make a Databricks repo agent-ready so your coding agent understands the Databricks platform and can fetch DevHub documentation on demand. + +When done, you will have: + +- Databricks platform skills installed at project scope, traveling with the repo +- The DevHub Docs MCP server wired up for on-demand access to any DevHub page +- An agent configuration file pinning workspace defaults for the codebase +- A coding agent that generates correct Databricks code instead of guessing diff --git a/content/recipes/set-up-your-local-dev-environment/content.md b/content/recipes/set-up-your-local-dev-environment/content.md index 692c110..ec297d4 100644 --- a/content/recipes/set-up-your-local-dev-environment/content.md +++ b/content/recipes/set-up-your-local-dev-environment/content.md @@ -53,6 +53,8 @@ databricks -v ### 3. Authenticate a profile +Always use OAuth for local development. Personal Access Tokens (PATs) are for CI/CD or non-interactive environments only. + Browser-based OAuth is the default for local use: ```bash diff --git a/content/recipes/set-up-your-local-dev-environment/goal.md b/content/recipes/set-up-your-local-dev-environment/goal.md new file mode 100644 index 0000000..6993699 --- /dev/null +++ b/content/recipes/set-up-your-local-dev-environment/goal.md @@ -0,0 +1,7 @@ +Install the Databricks CLI, authenticate a profile, and verify the handshake. Every other DevHub template assumes this has already passed. + +When done, you will have: + +- Databricks CLI `0.296+` installed and on `PATH` +- An authenticated CLI profile (`databricks auth profiles` shows `Valid: YES`) +- A successful smoke test (`databricks current-user me` returns your identity) diff --git a/content/recipes/spin-up-databricks-app/content.md b/content/recipes/spin-up-databricks-app/content.md index 51fc1ac..a13c7e3 100644 --- a/content/recipes/spin-up-databricks-app/content.md +++ b/content/recipes/spin-up-databricks-app/content.md @@ -73,7 +73,11 @@ agent-browser open http://localhost:3000 Otherwise share the localhost URL with the user and ask them to click through the key flows. Do not deploy until the local app behaves as intended — Databricks Apps deploys are not free and a broken local build will not magically fix itself in production. -### 6. Validate and deploy +### 6. Update smoke tests + +Before validating, update `tests/smoke.spec.ts` to match your app's actual content. The default selectors look for "Minimal Databricks App" and "hello world" text, which will fail on any customized app. Use `getByRole`, `getByText`, or `getByPlaceholder` — never `getByLabelText` (that is a React Testing Library method, not Playwright). Keep result sets under the 1 MB analytics-event payload cap — queries returning thousands of rows cause `INVALID_REQUEST: Event exceeds max size`. Use `LIMIT` or aggregated queries. + +### 7. Validate and deploy Run the project validator first (build + typecheck + lint) so the deploy does not fail on something that would have been caught locally: @@ -89,7 +93,7 @@ databricks apps deploy --profile The CLI uploads the project, builds it on Databricks, and starts the app. On success it prints the workspace URL. -### 7. Verify the deployed app +### 8. Verify the deployed app ```bash databricks apps get --profile -o json diff --git a/content/recipes/spin-up-databricks-app/goal.md b/content/recipes/spin-up-databricks-app/goal.md new file mode 100644 index 0000000..767db71 --- /dev/null +++ b/content/recipes/spin-up-databricks-app/goal.md @@ -0,0 +1,7 @@ +Generate a working AppKit Databricks App from scratch and deploy it to your workspace. + +When done, you will have: + +- A scaffolded Databricks App project with selected plugins (e.g. Lakebase, analytics, Genie, model serving) +- A locally running development server for testing +- A deployed app accessible in your Databricks workspace diff --git a/content/recipes/sync-tables-autoscaling/content.md b/content/recipes/sync-tables-autoscaling/content.md index 971e37d..4136757 100644 --- a/content/recipes/sync-tables-autoscaling/content.md +++ b/content/recipes/sync-tables-autoscaling/content.md @@ -37,31 +37,48 @@ Autoscaling CUs are physically 8x smaller than Provisioned CUs, so per-CU throug ### 1. Create a synced table +> Your Lakebase database must be registered as a UC catalog first (one-time setup per project). If not already done: +> +> ```bash +> databricks postgres create-catalog \ +> --json '{ +> "spec": { +> "postgres_database": "", +> "branch": "projects//branches/" +> } +> }' --profile +> ``` + ```bash -databricks database create-synced-database-table \ +databricks postgres create-synced-table .. \ --json '{ - "name": "..", - "database_instance_name": "", - "logical_database_name": "", "spec": { "source_table_full_name": "..", "primary_key_columns": [""], "scheduling_policy": "", - "create_database_objects_if_missing": true + "branch": "projects//branches/", + "postgres_database": "databricks_postgres", + "create_database_objects_if_missing": true, + "new_pipeline_spec": { + "storage_catalog": "", + "storage_schema": "default" + } } }' --profile ``` -> If your Lakebase database is **registered as a Unity Catalog catalog**, you can omit `database_instance_name` and `logical_database_name`. +`new_pipeline_spec.storage_catalog` must be a **regular** UC catalog for DLT pipeline metadata, not the Lakebase catalog. Long-running operation; the CLI waits by default. Use `--no-wait` to return immediately. + +> **DABs:** Do not use `synced_database_tables` in DABs with Autoscaling projects — it maps to the Provisioned Terraform resource and may create unintended Provisioned instances. DAB support for Autoscaling synced tables is not yet available. Use the CLI commands above. Verify: ```bash -databricks database get-synced-database-table .. --profile +databricks postgres get-synced-table \ + "synced_tables/.." \ + --profile ``` -> **Important:** If your Autoscaling project was created via the `/postgres/` API (not `/database/`), programmatic synced table creation is not yet available via CLI. Use the Databricks UI as a fallback. In **Catalog**, select the source table → **Create synced table**, then choose your Lakebase project, branch, sync mode, and pipeline. This gap is expected to close soon. - ### 2. Configure pipeline reuse How you set up pipelines depends on your sync mode: @@ -80,8 +97,8 @@ The initial snapshot runs automatically on creation. For **Snapshot** and **Trig Trigger a sync update programmatically via the Databricks CLI. Look up the pipeline ID for the synced table, then start an update: ```bash -PIPELINE_ID=$(databricks database get-synced-database-table \ - .. \ +PIPELINE_ID=$(databricks postgres get-synced-table \ + "synced_tables/.." \ --output json --profile \ | jq -r '.data_synchronization_status.pipeline_id') diff --git a/content/recipes/sync-tables-autoscaling/goal.md b/content/recipes/sync-tables-autoscaling/goal.md new file mode 100644 index 0000000..89ec199 --- /dev/null +++ b/content/recipes/sync-tables-autoscaling/goal.md @@ -0,0 +1,7 @@ +Serve lakehouse data through Lakebase Autoscaling Postgres so your applications can query it with sub-10ms latency using a synced table that stays up to date automatically. + +When done, you will have: + +- A synced table in Unity Catalog tracking the replication pipeline +- A read-only Postgres table in Lakebase queryable with sub-10ms latency from any standard Postgres client +- A managed Lakeflow pipeline keeping the data in sync via snapshot, triggered, or continuous mode diff --git a/content/recipes/unity-catalog-setup/goal.md b/content/recipes/unity-catalog-setup/goal.md new file mode 100644 index 0000000..e946594 --- /dev/null +++ b/content/recipes/unity-catalog-setup/goal.md @@ -0,0 +1,8 @@ +Create a Unity Catalog catalog backed by an external S3 bucket for scenarios that require custom storage control. + +When done, you will have: + +- An IAM role granting Databricks access to your S3 bucket +- A storage credential and external location registered in Unity Catalog +- A Unity Catalog catalog using your external S3 bucket as its storage root +- Infrastructure ready for Sync Tables, cross-account access, or custom lifecycle policies diff --git a/content/recipes/volume-file-upload/goal.md b/content/recipes/volume-file-upload/goal.md new file mode 100644 index 0000000..b340426 --- /dev/null +++ b/content/recipes/volume-file-upload/goal.md @@ -0,0 +1,8 @@ +Add file upload, browsing, download, delete, file type validation, and CSV row preview to your Databricks App using Unity Catalog Volumes. + +When done, you will have: + +- A Unity Catalog Volume configured as file storage +- A file management UI with upload, browse, download, and delete capabilities +- File type validation and CSV row preview functionality +- Automatically registered HTTP routes for all file operations diff --git a/plugins/about-devhub.ts b/plugins/about-devhub.ts index 97676f1..4671d79 100644 --- a/plugins/about-devhub.ts +++ b/plugins/about-devhub.ts @@ -2,7 +2,7 @@ import { readFileSync } from "fs"; import { resolve } from "path"; import type { LoadContext, Plugin } from "@docusaurus/types"; import { ABOUT_DEVHUB_SLUG } from "../src/lib/bootstrap-prompt"; -import { joinContentSections } from "../src/lib/content-sections"; +import { goalOnly } from "../src/lib/content-sections"; import { readContentSections } from "../src/lib/content-markdown"; import type { AgentPromptParts } from "../src/lib/copy-preamble"; @@ -29,7 +29,7 @@ function readMarkdownFile(siteDir: string, slug: string): string { } function readLocalBootstrap(siteDir: string): string { - return joinContentSections( + return goalOnly( readContentSections(siteDir, "recipes", LOCAL_BOOTSTRAP_SLUG), ); } diff --git a/plugins/content-entries.ts b/plugins/content-entries.ts index bd1f781..89662b0 100644 --- a/plugins/content-entries.ts +++ b/plugins/content-entries.ts @@ -6,10 +6,7 @@ import { getSolutionSlugs, readContentSections, } from "../src/lib/content-markdown"; -import { - joinContentSections, - type ContentSections, -} from "../src/lib/content-sections"; +import { goalOnly, type ContentSections } from "../src/lib/content-sections"; import { routePathWithBaseUrl } from "../src/lib/site-paths"; import { recipes, @@ -53,12 +50,21 @@ function createFolderRouteModuleSource( sections: ContentSections, ): string { const section = entryType === "recipe" ? "recipes" : "examples"; + const hasGoal = sections.goal !== undefined; const hasPrereqs = sections.prerequisites !== undefined; const hasDeploy = sections.deployment !== undefined; - const imports: string[] = [ - `import Content from "@site/content/${section}/${slug}/content.md";`, - ]; + const imports: string[] = []; + if (hasGoal) { + imports.push( + `import Goal from "@site/content/${section}/${slug}/goal.md";`, + ); + } + if (sections.content !== undefined) { + imports.push( + `import Content from "@site/content/${section}/${slug}/content.md";`, + ); + } if (hasPrereqs) { imports.push( `import Prerequisites from "@site/content/${section}/${slug}/prerequisites.md";`, @@ -77,7 +83,11 @@ function createFolderRouteModuleSource( ? '

Deployment

\n ' : null; - const children = [prereqsBlock, " ", deployBlock] + const goalBlock = hasGoal ? " " : null; + const contentBlock = + sections.content !== undefined ? " " : null; + + const children = [goalBlock, prereqsBlock, contentBlock, deployBlock] .filter(Boolean) .join("\n"); @@ -214,7 +224,7 @@ export default function contentEntriesPlugin( slug, ); sectionsBySlug[slug] = sections; - rawMarkdownBySlug[slug] = joinContentSections(sections); + rawMarkdownBySlug[slug] = goalOnly(sections); } else { const filePath = resolve( context.siteDir, diff --git a/plugins/cookbooks.ts b/plugins/cookbooks.ts index 239a95e..9258135 100644 --- a/plugins/cookbooks.ts +++ b/plugins/cookbooks.ts @@ -1,12 +1,15 @@ import type { LoadContext, Plugin } from "@docusaurus/types"; import { getCookbookSlugs, + readCookbookGoal, readCookbookIntro, } from "../src/lib/content-markdown"; import { cookbooks } from "../src/lib/recipes/recipes"; type CookbooksGlobalData = { - /** Raw `content/cookbooks//intro.md` bodies keyed by cookbook id. */ + /** Raw `content/cookbooks//goal.md` bodies keyed by cookbook id. Falls back to intro.md. */ + goalsBySlug: Record; + /** @deprecated Use goalsBySlug. Kept for backward compat during transition. */ introsBySlug: Record; }; @@ -27,15 +30,22 @@ export default function cookbooksPlugin(context: LoadContext): Plugin { const contentSlugs = getCookbookSlugs(context.siteDir); assertCookbookSlugParity(contentSlugs); + const goalsBySlug: Record = {}; const introsBySlug: Record = {}; for (const slug of contentSlugs) { + const goal = readCookbookGoal(context.siteDir, slug); const intro = readCookbookIntro(context.siteDir, slug); - if (intro) { - introsBySlug[slug] = intro; + const text = goal ?? intro; + if (text) { + goalsBySlug[slug] = text; + introsBySlug[slug] = text; } } - actions.setGlobalData({ introsBySlug } satisfies CookbooksGlobalData); + actions.setGlobalData({ + goalsBySlug, + introsBySlug, + } satisfies CookbooksGlobalData); }, }; } diff --git a/scripts/validate-content.mjs b/scripts/validate-content.mjs index 0d657b6..2f612ae 100644 --- a/scripts/validate-content.mjs +++ b/scripts/validate-content.mjs @@ -12,14 +12,16 @@ if (!existsSync(resolve(ROOT, "content"))) { } const RESOURCE_ALLOWED_FILES = new Set([ + "goal.md", "content.md", "prerequisites.md", "deployment.md", ]); -const RESOURCE_REQUIRED_FILE = "content.md"; +/** A folder must have at least one of these to be published. */ +const RESOURCE_REQUIRED_FILES = ["goal.md", "content.md"]; const RESOURCE_SECTIONS = /** @type {const} */ (["recipes", "examples"]); -const COOKBOOK_ALLOWED_FILES = new Set(["intro.md"]); +const COOKBOOK_ALLOWED_FILES = new Set(["goal.md", "intro.md"]); /** @type {string[]} */ const errors = []; @@ -33,7 +35,7 @@ const errors = []; * @param {string} opts.sectionPath e.g. "content/recipes" — used in error messages * @param {string} opts.sectionDir absolute filesystem path to the section * @param {Set} opts.allowedFiles whitelist of allowed direct-child filenames - * @param {string=} opts.requiredFile filename that must be present (omit for none) + * @param {string[]=} opts.requiredFiles at least one of these must be present (omit for none) * @param {string} opts.emptyHint trailing instruction appended to the "is empty" error * @param {string} opts.flatHint trailing instruction appended to the "is not a directory" error */ @@ -41,7 +43,7 @@ function validateContentFolder({ sectionPath, sectionDir, allowedFiles, - requiredFile, + requiredFiles, emptyHint, flatHint, }) { @@ -75,9 +77,13 @@ function validateContentFolder({ } } - if (requiredFile && !files.includes(requiredFile)) { + if ( + requiredFiles && + requiredFiles.length > 0 && + !requiredFiles.some((f) => files.includes(f)) + ) { errors.push( - `${sectionPath}/${entry}/ is missing the required ${requiredFile}.`, + `${sectionPath}/${entry}/ is missing a required file. Need at least one of: ${requiredFiles.join(", ")}.`, ); } } @@ -88,8 +94,8 @@ for (const section of RESOURCE_SECTIONS) { sectionPath: `content/${section}`, sectionDir: resolve(ROOT, "content", section), allowedFiles: RESOURCE_ALLOWED_FILES, - requiredFile: RESOURCE_REQUIRED_FILE, - emptyHint: "Add content.md.", + requiredFiles: RESOURCE_REQUIRED_FILES, + emptyHint: "Add goal.md or content.md.", flatHint: `Flat files are not allowed. Move to content/${section}//content.md.`, }); } @@ -136,7 +142,7 @@ if (existsSync(cookbooksDir)) { sectionPath: "content/cookbooks", sectionDir: cookbooksDir, allowedFiles: COOKBOOK_ALLOWED_FILES, - emptyHint: "Add at least intro.md or remove the folder.", + emptyHint: "Add goal.md or intro.md, or remove the folder.", flatHint: "Cookbook content lives under content/cookbooks//.", }); } diff --git a/src/components/examples/example-detail.tsx b/src/components/examples/example-detail.tsx index 1508ca7..f6c3233 100644 --- a/src/components/examples/example-detail.tsx +++ b/src/components/examples/example-detail.tsx @@ -23,7 +23,7 @@ import { import type { Example } from "@/lib/recipes/recipes"; import { cookbooks, recipes } from "@/lib/recipes/recipes"; import { useExampleSections } from "@/lib/use-raw-content-markdown"; -import { joinContentSections } from "@/lib/content-sections"; +import { goalOnly } from "@/lib/content-sections"; import { TemplateImageCarousel } from "@/components/examples/template-image-carousel"; import { TemplatePreviewImage } from "@/components/examples/template-preview-image"; import { FallbackCardArt } from "@/components/examples/fallback-card-art"; @@ -136,7 +136,7 @@ export function ExampleDetail({ const githubUrl = `${GITHUB_BASE}/${example.githubPath}/template`; const sections = useExampleSections(example.id) ?? { content: "" }; - const rawMarkdown = joinContentSections(sections); + const rawMarkdown = goalOnly(sections); const includedCookbooks = example.cookbookIds .map((id) => cookbooks.find((c) => c.id === id)) diff --git a/src/lib/content-markdown.ts b/src/lib/content-markdown.ts index cb12f2f..6d3976b 100644 --- a/src/lib/content-markdown.ts +++ b/src/lib/content-markdown.ts @@ -29,7 +29,10 @@ export function hasSolutionSlug(rootDir: string, slug: string): boolean { return getSolutionSlugs(rootDir).includes(slug); } -/** Recipes and examples live in `content/
//` folders with a required content.md. */ +/** + * Recipes and examples live in `content/
//` folders. + * A folder is published if it has either goal.md or content.md (or both). + */ export function getContentSlugs( rootDir: string, section: FolderContentSection, @@ -39,8 +42,9 @@ export function getContentSlugs( .filter((entry) => { const fullPath = resolve(directory, entry); if (!statSync(fullPath).isDirectory()) return false; - return existsSync( - resolve(fullPath, `${REQUIRED_CONTENT_SECTION_FILE}.md`), + return ( + existsSync(resolve(fullPath, "goal.md")) || + existsSync(resolve(fullPath, `${REQUIRED_CONTENT_SECTION_FILE}.md`)) ); }) .sort(); @@ -97,16 +101,27 @@ export function readCookbookIntro( return readFileSync(filePath, "utf-8"); } -/** Reads all present section files; throws when the required content.md is missing. */ +/** Reads `content/cookbooks//goal.md` if present. */ +export function readCookbookGoal( + rootDir: string, + slug: string, +): string | undefined { + const filePath = resolve(cookbookDirectory(rootDir), slug, "goal.md"); + if (!existsSync(filePath)) return undefined; + return readFileSync(filePath, "utf-8"); +} + +/** Reads all present section files; throws when neither goal.md nor content.md exists. */ export function readContentSections( rootDir: string, section: FolderContentSection, slug: string, ): ContentSections { + const goal = readContentSection(rootDir, section, slug, "goal"); const content = readContentSection(rootDir, section, slug, "content"); - if (content === undefined) { + if (goal === undefined && content === undefined) { throw new Error( - `Missing required content.md for ${section} "${slug}" at content/${section}/${slug}/content.md`, + `Missing required goal.md or content.md for ${section} "${slug}" at content/${section}/${slug}/`, ); } const prerequisites = readContentSection( @@ -116,7 +131,9 @@ export function readContentSections( "prerequisites", ); const deployment = readContentSection(rootDir, section, slug, "deployment"); - const sections: ContentSections = { content }; + const sections: ContentSections = {}; + if (goal !== undefined) sections.goal = goal; + if (content !== undefined) sections.content = content; if (prerequisites !== undefined) sections.prerequisites = prerequisites; if (deployment !== undefined) sections.deployment = deployment; return sections; diff --git a/src/lib/content-sections.ts b/src/lib/content-sections.ts index 0269335..ce2f9b1 100644 --- a/src/lib/content-sections.ts +++ b/src/lib/content-sections.ts @@ -1,20 +1,36 @@ /** Allowed file names inside each content/// folder. */ const CONTENT_SECTION_FILES = [ + "goal", "content", "prerequisites", "deployment", ] as const; export type ContentSectionFile = (typeof CONTENT_SECTION_FILES)[number]; -/** Required file in every content folder — without it the slug is not published. */ +/** + * Legacy constant for backward compat. Slug detection in content-markdown.ts + * now accepts either goal.md or content.md as the required file. + */ export const REQUIRED_CONTENT_SECTION_FILE: ContentSectionFile = "content"; export type ContentSections = { - content: string; + goal?: string; + /** Present when content.md exists. Optional because examples may only have goal.md. */ + content?: string; prerequisites?: string; deployment?: string; }; +/** + * Returns goal-only content for agent prompts. Falls back to + * joinContentSections() when no goal.md exists (backward compat + * during incremental migration). + */ +export function goalOnly(sections: ContentSections): string { + if (sections.goal) return sections.goal.trim(); + return joinContentSections(sections); +} + /** Joins present sections in display order (prerequisites → content → deployment). */ export function joinContentSections(sections: ContentSections): string { const parts = [ diff --git a/src/lib/cookbook-composition.ts b/src/lib/cookbook-composition.ts index 04bcbbd..35c02c8 100644 --- a/src/lib/cookbook-composition.ts +++ b/src/lib/cookbook-composition.ts @@ -1,4 +1,4 @@ -import type { ContentSections } from "@/lib/content-sections"; +import { goalOnly, type ContentSections } from "@/lib/content-sections"; export type CookbookRecipeInput = { id: string; @@ -6,11 +6,15 @@ export type CookbookRecipeInput = { sections: ContentSections; }; +export type CookbookCompositionMode = "agent" | "human"; + type CookbookCompositionInput = { cookbookName: string; cookbookDescription: string; + goal?: string; intro?: string; recipes: CookbookRecipeInput[]; + mode?: CookbookCompositionMode; }; /** Strips a leading `## Prerequisites` heading (and any blank line that follows) from a prereqs body. */ @@ -34,18 +38,36 @@ function wrapRecipeDeployment(recipe: CookbookRecipeInput): string | undefined { } /** - * Reshuffles a cookbook into: intro → combined Prerequisites → all recipe content bodies → - * optional combined Deployment. Recipe content.md bodies keep their own `## ` title - * so they land as peer sections; prereqs are demoted to H3 under one shared H2. + * Composes a cookbook from its constituent recipes. + * + * mode="human" (default): intro/goal → combined Prerequisites → all recipe + * content bodies → optional combined Deployment. + * + * mode="agent": cookbook goal/intro → each recipe's goal under a + * "## Component: " heading. No prerequisites, content bodies, or deployment. */ export function composeCookbookMarkdown( input: CookbookCompositionInput, ): string { - const { intro, recipes } = input; + const mode = input.mode ?? "human"; + const introText = input.goal ?? input.intro; + const { recipes } = input; const parts: string[] = []; - if (intro && intro.trim()) { - parts.push(intro.trim()); + if (introText && introText.trim()) { + parts.push(introText.trim()); + } + + if (mode === "agent") { + for (const recipe of recipes) { + const recipeGoal = goalOnly(recipe.sections); + if (recipeGoal.trim()) { + parts.push( + `${heading(2, `Component: ${recipe.name}`)}\n\n${recipeGoal.trim()}`, + ); + } + } + return parts.join("\n\n"); } const prereqBlocks = recipes @@ -56,8 +78,8 @@ export function composeCookbookMarkdown( } const contentBlocks = recipes - .map((recipe) => recipe.sections.content.trim()) - .filter((block) => Boolean(block)); + .map((recipe) => recipe.sections.content?.trim()) + .filter((block): block is string => Boolean(block)); if (contentBlocks.length > 0) { parts.push(contentBlocks.join("\n\n---\n\n")); } diff --git a/src/lib/copy-preamble.ts b/src/lib/copy-preamble.ts index 10ad049..3df28ad 100644 --- a/src/lib/copy-preamble.ts +++ b/src/lib/copy-preamble.ts @@ -111,7 +111,7 @@ function buildLocalBootstrapBlock( return [ "# Verify your local Databricks dev environment", "", - "A working Databricks CLI profile is the prerequisite for every step that follows. Walk the user through the recipe below — _even if they say their environment is already set up_. The verification steps are quick and prevent confusing failures further down.", + "A working Databricks CLI profile is the prerequisite for every step that follows. The goal below describes what a ready environment looks like. Use your installed Databricks agent skills to verify and set up the environment — _even if the user says their environment is already set up_.", "", absolutizeMarkdown(localBootstrap.trim(), siteOrigin), ].join("\n"); @@ -127,7 +127,7 @@ function buildTemplateBlock(kind: AgentPromptKind, body: string): string { return [ `# The ${label} the user copied`, "", - `The full ${label} prompt is below. This is what the user wants to focus on today. Once the local-bootstrap above passes and the intent questions are answered, work through this content step by step.`, + `The ${label} goal is below — it describes what the user wants to build. Once the local-bootstrap above passes and the intent questions are answered, use your installed Databricks agent skills to implement it.`, "", body.trim(), ].join("\n"); diff --git a/src/lib/examples/build-example-markdown.ts b/src/lib/examples/build-example-markdown.ts index 8ddbbd6..832d422 100644 --- a/src/lib/examples/build-example-markdown.ts +++ b/src/lib/examples/build-example-markdown.ts @@ -80,22 +80,21 @@ export function buildFullPrompt( includedRecipes, baseUrl, } = opts; + const hasGoal = Boolean(sections.goal); const cliTemplateUrl = `https://github.com/databricks/devhub/tree/main/${example.githubPath}`; - const lines: string[] = [ - `# ${example.name}`, - "", - example.description, - "", - "## Get started", - "", - ]; + const lines: string[] = [`# ${example.name}`, "", example.description, ""]; - if (isInitCommand(example.initCommand)) { - const hasPrereqs = Boolean(sections.prerequisites); - const hasDeployBlock = Boolean(sections.deployment); + // When goal.md exists, use it as the body and skip prerequisites/content/deployment. + // The agent gets the outcome description + scaffold entry point; skills handle implementation. + if (hasGoal) { + lines.push(sections.goal!.trim(), ""); + } - if (hasPrereqs) { - lines.push(sections.prerequisites!, ""); + lines.push("## Get started", ""); + + if (isInitCommand(example.initCommand)) { + if (!hasGoal && sections.prerequisites) { + lines.push(sections.prerequisites, ""); } lines.push( @@ -109,8 +108,8 @@ export function buildFullPrompt( "", ); - if (hasDeployBlock) { - lines.push(sections.deployment!, ""); + if (!hasGoal && sections.deployment) { + lines.push(sections.deployment, ""); } else { lines.push( "A **`README.md`** ships inside the scaffolded project. Follow it end to end to configure, run, and deploy the app.", @@ -138,7 +137,7 @@ export function buildFullPrompt( ); } - if (sections.content) { + if (!hasGoal && sections.content) { lines.push("", sections.content); } diff --git a/src/lib/use-cookbook-markdown.ts b/src/lib/use-cookbook-markdown.ts index b836541..d680ff8 100644 --- a/src/lib/use-cookbook-markdown.ts +++ b/src/lib/use-cookbook-markdown.ts @@ -1,7 +1,7 @@ import { cookbooks, recipes, type Cookbook } from "@/lib/recipes/recipes"; import { useAllRecipeSections, - useCookbookIntro, + useCookbookGoal, } from "@/lib/use-raw-content-markdown"; import { composeCookbookMarkdown } from "@/lib/cookbook-composition"; @@ -11,10 +11,9 @@ type UseCookbookMarkdownResult = { }; /** - * Resolves a cookbook by id and assembles its agent-ready markdown by joining - * each child recipe's sections via `composeCookbookMarkdown`. Throws on - * missing cookbook, recipe, or recipe sections so config typos surface at - * page render time rather than producing silently empty exports. + * Resolves a cookbook by id and assembles its agent-ready markdown using + * goal-only mode: the cookbook's goal.md + each recipe's goal.md as + * labeled components. Skills handle implementation. */ export function useCookbookMarkdown( cookbookId: string, @@ -23,7 +22,7 @@ export function useCookbookMarkdown( if (!cookbook) throw new Error(`Cookbook ${cookbookId} not found`); const sectionsBySlug = useAllRecipeSections(); - const intro = useCookbookIntro(cookbookId); + const goal = useCookbookGoal(cookbookId); const recipeInputs = cookbook.recipeIds.map((id) => { const recipe = recipes.find((r) => r.id === id); @@ -37,8 +36,9 @@ export function useCookbookMarkdown( const rawMarkdown = composeCookbookMarkdown({ cookbookName: cookbook.name, cookbookDescription: cookbook.description, - intro, + goal, recipes: recipeInputs, + mode: "agent", }); return { cookbook, rawMarkdown }; diff --git a/src/lib/use-raw-content-markdown.ts b/src/lib/use-raw-content-markdown.ts index cf72f7d..14405b1 100644 --- a/src/lib/use-raw-content-markdown.ts +++ b/src/lib/use-raw-content-markdown.ts @@ -34,14 +34,15 @@ export function useRawSolutionMarkdown(slug: string): string | undefined { } type CookbooksGlobalData = { + goalsBySlug: Record; introsBySlug: Record; }; -export function useCookbookIntro(slug: string): string | undefined { +export function useCookbookGoal(slug: string): string | undefined { const data = usePluginData( "docusaurus-plugin-cookbooks", ) as CookbooksGlobalData; - return data.introsBySlug[slug]; + return data.goalsBySlug[slug]; } export function useExampleSections(slug: string): ContentSections | undefined { diff --git a/src/pages/templates/ai-chat-app.tsx b/src/pages/templates/ai-chat-app.tsx index ceab9b9..5106990 100644 --- a/src/pages/templates/ai-chat-app.tsx +++ b/src/pages/templates/ai-chat-app.tsx @@ -1,7 +1,7 @@ import type { ReactNode } from "react"; import { CookbookDetail } from "@/components/cookbooks/cookbook-detail"; import { useCookbookMarkdown } from "@/lib/use-cookbook-markdown"; -import Intro from "@site/content/cookbooks/ai-chat-app/intro.md"; +import Intro from "@site/content/cookbooks/ai-chat-app/goal.md"; import FoundationModelsApiPrereqs from "@site/content/recipes/foundation-models-api/prerequisites.md"; import FoundationModelsApiContent from "@site/content/recipes/foundation-models-api/content.md"; import AiChatModelServingPrereqs from "@site/content/recipes/ai-chat-model-serving/prerequisites.md"; diff --git a/src/pages/templates/app-with-lakebase.tsx b/src/pages/templates/app-with-lakebase.tsx index f9675a1..7de8c7a 100644 --- a/src/pages/templates/app-with-lakebase.tsx +++ b/src/pages/templates/app-with-lakebase.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from "react"; import { CookbookDetail } from "@/components/cookbooks/cookbook-detail"; import { useCookbookMarkdown } from "@/lib/use-cookbook-markdown"; +import Goal from "@site/content/cookbooks/app-with-lakebase/goal.md"; import LakebaseCreateInstancePrereqs from "@site/content/recipes/lakebase-create-instance/prerequisites.md"; import LakebaseCreateInstanceContent from "@site/content/recipes/lakebase-create-instance/content.md"; import LakebaseDataPersistencePrereqs from "@site/content/recipes/lakebase-data-persistence/prerequisites.md"; @@ -11,6 +12,7 @@ export default function AppWithLakebasePage(): ReactNode { return ( +

Prerequisites

diff --git a/src/pages/templates/genie-analytics-app.tsx b/src/pages/templates/genie-analytics-app.tsx index 8610a5b..ad4a3ee 100644 --- a/src/pages/templates/genie-analytics-app.tsx +++ b/src/pages/templates/genie-analytics-app.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from "react"; import { CookbookDetail } from "@/components/cookbooks/cookbook-detail"; import { useCookbookMarkdown } from "@/lib/use-cookbook-markdown"; +import Goal from "@site/content/cookbooks/genie-analytics-app/goal.md"; import GenieConversationalAnalyticsPrereqs from "@site/content/recipes/genie-conversational-analytics/prerequisites.md"; import GenieConversationalAnalyticsContent from "@site/content/recipes/genie-conversational-analytics/content.md"; @@ -9,6 +10,7 @@ export default function GenieAnalyticsAppPage(): ReactNode { return ( +

Prerequisites


diff --git a/src/pages/templates/lakebase-off-platform.tsx b/src/pages/templates/lakebase-off-platform.tsx index f5a4271..e0c50a6 100644 --- a/src/pages/templates/lakebase-off-platform.tsx +++ b/src/pages/templates/lakebase-off-platform.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from "react"; import { CookbookDetail } from "@/components/cookbooks/cookbook-detail"; import { useCookbookMarkdown } from "@/lib/use-cookbook-markdown"; +import Goal from "@site/content/cookbooks/lakebase-off-platform/goal.md"; import LakebaseCreateInstancePrereqs from "@site/content/recipes/lakebase-create-instance/prerequisites.md"; import LakebaseCreateInstanceContent from "@site/content/recipes/lakebase-create-instance/content.md"; import LakebaseOffPlatformEnvManagementPrereqs from "@site/content/recipes/lakebase-off-platform-env-management/prerequisites.md"; @@ -17,6 +18,7 @@ export default function LakebaseOffPlatformPage(): ReactNode { return ( +

Prerequisites

diff --git a/src/pages/templates/operational-data-analytics.tsx b/src/pages/templates/operational-data-analytics.tsx index 57639d1..d661169 100644 --- a/src/pages/templates/operational-data-analytics.tsx +++ b/src/pages/templates/operational-data-analytics.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from "react"; import { CookbookDetail } from "@/components/cookbooks/cookbook-detail"; import { useCookbookMarkdown } from "@/lib/use-cookbook-markdown"; +import Goal from "@site/content/cookbooks/operational-data-analytics/goal.md"; import UnityCatalogSetupPrereqs from "@site/content/recipes/unity-catalog-setup/prerequisites.md"; import UnityCatalogSetupContent from "@site/content/recipes/unity-catalog-setup/content.md"; import LakebaseCreateInstancePrereqs from "@site/content/recipes/lakebase-create-instance/prerequisites.md"; @@ -19,6 +20,7 @@ export default function OperationalDataAnalyticsPage(): ReactNode { return ( +

Prerequisites

diff --git a/tests/api-markdown.test.ts b/tests/api-markdown.test.ts index 9b09d50..cbb29de 100644 --- a/tests/api-markdown.test.ts +++ b/tests/api-markdown.test.ts @@ -179,7 +179,7 @@ describe("/api/markdown about-devhub preamble policy", () => { }); expect(result.statusCode).toBe(200); expect(result.body.startsWith("# About DevHub")).toBe(true); - expect(result.body).toContain("## Agentic Support Console"); + expect(result.body).toContain("Agentic Support Console"); }); test("templates index does NOT include the preamble", () => { diff --git a/tests/bootstrap-prompt.test.ts b/tests/bootstrap-prompt.test.ts index 1b535d2..e5229d1 100644 --- a/tests/bootstrap-prompt.test.ts +++ b/tests/bootstrap-prompt.test.ts @@ -101,7 +101,7 @@ describe("hero bootstrap prompt composition (matches /api/bootstrap-prompt)", () "# Verify your local Databricks dev environment", ); const localBootstrapBodyIdx = combined.indexOf( - "## Set Up Your Local Dev Environment", + "Install the Databricks CLI", ); expect(aboutIdx).toBe(0); @@ -115,13 +115,14 @@ describe("hero bootstrap prompt composition (matches /api/bootstrap-prompt)", () expect(combined).not.toContain("# The example the user copied"); }); - test("composed hero prompt includes recipe content (databricks -v) and llms.txt URL", () => { + test("composed hero prompt includes goal content and llms.txt URL", () => { const combined = composeAgentPrompt({ parts: loadAgentPromptParts(), kind: "hero", siteOrigin: "https://dev.databricks.com", }); - expect(combined).toContain("databricks -v"); + // goal.md has the outcome description, not CLI commands + expect(combined).toContain("Install the Databricks CLI"); expect(combined).toContain("https://dev.databricks.com/llms.txt"); }); @@ -130,7 +131,8 @@ describe("hero bootstrap prompt composition (matches /api/bootstrap-prompt)", () "recipes", "set-up-your-local-dev-environment", ); - expect(recipe).toContain("## Set Up Your Local Dev Environment"); - expect(recipe).toContain("databricks -v"); + // Agent prompt now returns goal.md content (outcome), not full implementation + expect(recipe).toContain("Install the Databricks CLI"); + expect(recipe).toContain("authenticated CLI profile"); }); }); diff --git a/tests/e2e/copy-markdown.spec.ts b/tests/e2e/copy-markdown.spec.ts index 8888418..1166fa1 100644 --- a/tests/e2e/copy-markdown.spec.ts +++ b/tests/e2e/copy-markdown.spec.ts @@ -43,7 +43,7 @@ async function clickCopyPromptAndWaitForToast( } test.describe("copy markdown exports raw markdown on recipe pages", () => { - test("recipe detail page copies actual markdown with code fences", async ({ + test("recipe detail page copies goal content with agent prompt wrapper", async ({ page, }) => { await setupClipboardMock(page); @@ -53,9 +53,8 @@ test.describe("copy markdown exports raw markdown on recipe pages", () => { const copied = await getCopiedText(page); expect(copied).toContain("# About DevHub"); - expect(copied).toContain("## Set Up Your Local Dev Environment"); - expect(copied).toContain("```bash"); - expect(copied).toContain("databricks -v"); + expect(copied).toContain("Install the Databricks CLI"); + expect(copied).toContain("authenticated CLI profile"); expect(copied).toContain("llms.txt"); }); }); @@ -75,14 +74,15 @@ test.describe("copy markdown exports raw markdown on template pages", () => { expect(copied).toContain("# Working with DevHub prompts"); expect(copied).toContain("# What the user just did"); expect(copied).toContain("# Verify your local Databricks dev environment"); - expect(copied).toContain("## Set Up Your Local Dev Environment"); + expect(copied).toContain("Install the Databricks CLI"); // Cookbook body comes after the meta-prompt, with its own frontmatter: expect(copied).toContain('title: "AI Chat App"'); expect(copied).toContain("# The cookbook the user copied"); - expect(copied).toContain("```bash"); + // Agent mode: recipe goals as components + expect(copied).toContain("## Component:"); }); - test("multi-recipe cookbook body no longer embeds the local-dev-environment recipe", async ({ + test("multi-recipe cookbook body uses agent mode with component headings", async ({ page, }) => { await setupClipboardMock(page); @@ -92,13 +92,8 @@ test.describe("copy markdown exports raw markdown on template pages", () => { const copied = await getCopiedText(page); expect(copied).toContain("# About DevHub"); - // The local-dev-environment recipe heading is present exactly once — - // injected by the meta-prompt, NOT duplicated inside the cookbook body. - const bootstrapHeadings = copied.match( - /^## Set Up Your Local Dev Environment$/gm, - ); - expect(bootstrapHeadings?.length).toBe(1); - expect(copied).toContain("## Lakebase Data Persistence"); + // Agent mode: recipe goals appear as labeled components, not full content + expect(copied).toContain("## Component: Lakebase Data Persistence"); expect(copied).toContain("---"); }); }); @@ -112,7 +107,7 @@ test.describe("copy markdown exports raw markdown on example pages", () => { const copied = await getCopiedText(page); expect(copied).toContain("# About DevHub"); - expect(copied).toContain("## Agentic Support Console"); + expect(copied).toContain("# Agentic Support Console"); expect(copied).toContain("Data Flow"); expect(copied).toContain("Lakehouse Sync"); }); @@ -125,7 +120,7 @@ test.describe("copy markdown exports raw markdown on example pages", () => { const copied = await getCopiedText(page); expect(copied).toContain("# About DevHub"); - expect(copied).toContain("## SaaS Subscription Tracker"); + expect(copied).toContain("# SaaS Subscription Tracker"); expect(copied).toContain("Data Flow"); }); @@ -152,12 +147,10 @@ test.describe("copy markdown exports raw markdown on example pages", () => { expect(copied).toContain( "These **templates** informed how this example was built", ); - expect(copied).toContain( - "### 1. Clone locally and follow `template/README.md`", - ); + expect(copied).toContain("### Clone and follow `template/README.md`"); }); - test("Banner Copy prompt copies full prompt with bash and ### substeps", async ({ + test("Banner Copy prompt copies full prompt with bash and clone substeps", async ({ page, }) => { await setupClipboardMock(page); @@ -168,9 +161,7 @@ test.describe("copy markdown exports raw markdown on example pages", () => { const copied = await getCopiedText(page); expect(copied).toContain("# About DevHub"); expect(copied).toContain("\n---\n\n# "); - expect(copied).toContain( - "### 1. Clone locally and follow `template/README.md`", - ); + expect(copied).toContain("### Clone and follow `template/README.md`"); expect(copied).toContain("```bash"); expect(copied).toContain( "git clone --depth 1 https://github.com/databricks/devhub.git", diff --git a/tests/e2e/navigation.spec.ts b/tests/e2e/navigation.spec.ts index 9f539ee..0aaf7ea 100644 --- a/tests/e2e/navigation.spec.ts +++ b/tests/e2e/navigation.spec.ts @@ -175,7 +175,7 @@ test.describe("home page link navigation", () => { expect(finalCopiedText).toContain( "# Verify your local Databricks dev environment", ); - expect(finalCopiedText).toContain("## Set Up Your Local Dev Environment"); + expect(finalCopiedText).toContain("Install the Databricks CLI"); expect(finalCopiedText).toContain("dev.databricks.com"); expect(finalCopiedText).toContain("llms.txt"); }); diff --git a/tests/markdown.test.ts b/tests/markdown.test.ts index 5901019..a4adedd 100644 --- a/tests/markdown.test.ts +++ b/tests/markdown.test.ts @@ -60,25 +60,27 @@ describe("detail markdown resolver", () => { expect(frontmatterCount).toBe(2); }); - test("resolves recipe markdown", () => { + test("resolves recipe markdown (returns goal when goal.md exists)", () => { const markdown = getDetailMarkdown( "recipes", "set-up-your-local-dev-environment", ); - expect(markdown).toContain("## Set Up Your Local Dev Environment"); - expect(markdown).toContain("databricks -v"); + // Agent prompt returns goal.md content (no heading, just description) + expect(markdown).toContain("Install the Databricks CLI"); + expect(markdown).toContain("authenticated CLI profile"); }); test("resolves example markdown", () => { const markdown = getDetailMarkdown("examples", "agentic-support-console"); - expect(markdown).toContain("## Agentic Support Console"); + expect(markdown).toContain("AI-powered support console"); expect(markdown).toContain("Data Flow"); }); - test("resolves template markdown", () => { + test("resolves template markdown (cookbook in agent mode)", () => { const markdown = getDetailMarkdown("templates", "ai-chat-app"); expect(markdown).toContain("# AI Chat App"); - expect(markdown).toContain("## Lakebase Agent Memory"); + // Agent mode uses "Component:" headings + expect(markdown).toContain("Component: Lakebase Agent Memory"); }); test("template markdown no longer embeds the local dev environment recipe (now injected by the meta-prompt)", () => { @@ -87,33 +89,21 @@ describe("detail markdown resolver", () => { expect(markdown).not.toMatch(/^### Set Up Your Local Dev Environment$/m); }); - test("template markdown hoists all recipe prereqs before any recipe content", () => { + test("template markdown for cookbook uses agent mode (goals only, no prereqs or full content)", () => { const markdown = getDetailMarkdown("templates", "ai-chat-app"); - - const firstLineStart = (pattern: RegExp): number => - markdown.search(pattern); - - const prereqIdx = firstLineStart(/^## Prerequisites$/m); - const foundationContentIdx = firstLineStart( - /^## Query AI Gateway Endpoints$/m, - ); - const lakebaseContentIdx = firstLineStart(/^## Lakebase Agent Memory$/m); - - expect(prereqIdx).toBeGreaterThanOrEqual(0); - expect(prereqIdx).toBeLessThan(foundationContentIdx); - expect(foundationContentIdx).toBeLessThan(lakebaseContentIdx); - // Only one combined `## Prerequisites` heading, with demoted H3 per recipe. - expect(markdown.match(/^## Prerequisites$/gm)?.length).toBe(1); - expect(markdown).toMatch(/^### Query AI Gateway Endpoints$/m); - expect(markdown).toMatch(/^### Lakebase Agent Memory$/m); + // Agent mode: cookbook goal + recipe goals as components + expect(markdown).toContain("streaming AI chat app on Databricks"); + expect(markdown).toContain("## Component:"); + // Agent mode should NOT include prerequisites section + expect(markdown).not.toContain("## Prerequisites"); }); - test("template markdown includes cookbook intro.md above Prerequisites when present", () => { + test("template markdown includes cookbook goal.md above recipe goals", () => { const markdown = getDetailMarkdown("templates", "ai-chat-app"); - const introIdx = markdown.indexOf("## What you are building"); - const prereqIdx = markdown.indexOf("## Prerequisites"); - expect(introIdx).toBeGreaterThanOrEqual(0); - expect(introIdx).toBeLessThan(prereqIdx); + const goalIdx = markdown.indexOf("streaming AI chat app on Databricks"); + const componentIdx = markdown.indexOf("## Component:"); + expect(goalIdx).toBeGreaterThanOrEqual(0); + expect(goalIdx).toBeLessThan(componentIdx); expect(markdown).toContain("How the steps fit together"); }); @@ -130,18 +120,18 @@ describe("templates section resolves recipes, examples, and cookbooks", () => { "templates", "set-up-your-local-dev-environment", ); - expect(markdown).toContain("## Set Up Your Local Dev Environment"); + expect(markdown).toContain("Install the Databricks CLI"); }); test("resolves an example slug via templates", () => { const markdown = getDetailMarkdown("templates", "agentic-support-console"); - expect(markdown).toContain("## Agentic Support Console"); + expect(markdown).toContain("AI-powered support console"); }); test("resolves a cookbook slug via templates", () => { const markdown = getDetailMarkdown("templates", "ai-chat-app"); expect(markdown).toContain("# AI Chat App"); - expect(markdown).toContain("## Lakebase Agent Memory"); + expect(markdown).toContain("Component: Lakebase Agent Memory"); }); test("throws for unknown template slug", () => { @@ -281,7 +271,7 @@ describe("slug normalization strips .md extension", () => { "recipes", "set-up-your-local-dev-environment.md", ); - expect(markdown).toContain("## Set Up Your Local Dev Environment"); + expect(markdown).toContain("Install the Databricks CLI"); }); test("templates slug with .md extension resolves", () => { @@ -289,6 +279,6 @@ describe("slug normalization strips .md extension", () => { "templates", "agentic-support-console.md", ); - expect(markdown).toContain("## Agentic Support Console"); + expect(markdown).toContain("AI-powered support console"); }); }); diff --git a/tests/validate-content.test.ts b/tests/validate-content.test.ts index 172648d..6ae3109 100644 --- a/tests/validate-content.test.ts +++ b/tests/validate-content.test.ts @@ -96,7 +96,7 @@ describe("validate-content script", () => { const result = runValidator(workDir); expect(result.status).toBe(1); expect(result.stderr).toContain("no-content"); - expect(result.stderr).toContain("missing the required content.md"); + expect(result.stderr).toContain("missing a required file"); }); test("fails when a folder contains a disallowed filename", () => {