diff --git a/docs/codelabs/periodic_materialization.md b/docs/codelabs/periodic_materialization.md index 2601abb..afd756a 100644 --- a/docs/codelabs/periodic_materialization.md +++ b/docs/codelabs/periodic_materialization.md @@ -164,45 +164,192 @@ Cloud Shell users can skip this step; credentials are already configured. (In Co ## Get the Codelab Artifacts Duration: 0:02 -The codelab ships a set of ready-to-use artifacts: the property-graph schema, the ontology, the binding, and a synthetic event generator. You do not author any of these yourself; the codelab uses them as-is, and the [README in the artifacts folder](https://github.com/GoogleCloudPlatform/BigQuery-Agent-Analytics-SDK/blob/main/examples/codelab/periodic_materialization/README.md) explains how to adapt them for your own decision domain. +The codelab ships a set of ready-to-use artifacts: the table DDL, the property-graph schema, the ontology, and the binding. You do not author any of these yourself; the codelab uses them as-is, and the [README in the artifacts folder](https://github.com/GoogleCloudPlatform/BigQuery-Agent-Analytics-SDK/blob/main/examples/codelab/periodic_materialization/README.md) explains how to adapt them for your own decision domain. -Download the artifacts to a working directory: +This codelab is self-contained: the cell below writes the four artifacts into a working directory, so there is nothing to download. They are the same files shipped in [`examples/codelab/periodic_materialization/`](https://github.com/GoogleCloudPlatform/BigQuery-Agent-Analytics-SDK/tree/main/examples/codelab/periodic_materialization). - -```bash -mkdir -p ~/bqaa-codelab && cd ~/bqaa-codelab - -BASE="https://raw.githubusercontent.com/GoogleCloudPlatform/BigQuery-Agent-Analytics-SDK/main/examples/codelab/periodic_materialization" -for f in property_graph.sql table_ddl.sql ontology.yaml binding.yaml seed_events.py; do - curl -fsSL "$BASE/$f" -o "$f" -done -ls -``` - - +```python +# This codelab is self-contained: rather than downloading files, this +# cell writes the four artifacts into a working directory. They are +# identical to the files in examples/codelab/periodic_materialization/. +from pathlib import Path + +work = Path.home() / "bqaa-codelab" +work.mkdir(exist_ok=True) + +artifacts = { + "table_ddl.sql": r"""-- Node and edge table DDL for the BQAA codelab. +-- +-- The materializer writes into these tables on every run. +-- ``session_id`` and ``extracted_at`` are SDK metadata columns the +-- materializer fills automatically; they are required on every +-- bound table. +-- +-- Apply with: +-- envsubst < table_ddl.sql | bq query --use_legacy_sql=false +-- +-- Required shell variables: +-- PROJECT_ID : your GCP project ID +-- DATASET : the BigQuery dataset that holds both raw agent_events +-- and the materialized graph tables +CREATE TABLE IF NOT EXISTS `${PROJECT_ID}.${DATASET}.decision_request` ( + request_id STRING, request_text STRING, requested_at TIMESTAMP, + session_id STRING, extracted_at TIMESTAMP +); +CREATE TABLE IF NOT EXISTS `${PROJECT_ID}.${DATASET}.decision_option` ( + option_id STRING, option_label STRING, confidence FLOAT64, + session_id STRING, extracted_at TIMESTAMP +); +CREATE TABLE IF NOT EXISTS `${PROJECT_ID}.${DATASET}.decision_outcome` ( + outcome_id STRING, status STRING, rationale STRING, decided_at TIMESTAMP, + session_id STRING, extracted_at TIMESTAMP +); +CREATE TABLE IF NOT EXISTS `${PROJECT_ID}.${DATASET}.evaluates_option` ( + request_id STRING, option_id STRING, + session_id STRING, extracted_at TIMESTAMP +); +CREATE TABLE IF NOT EXISTS `${PROJECT_ID}.${DATASET}.resulted_in` ( + request_id STRING, outcome_id STRING, + session_id STRING, extracted_at TIMESTAMP +); +""", + "property_graph.sql": r"""-- Property-graph DDL for the BQAA codelab. +-- +-- Models a generic agent decision flow: +-- DecisionRequest -> evaluatesOption -> DecisionOption +-- DecisionRequest -> resultedIn -> DecisionOutcome +-- +-- Apply with: +-- envsubst < property_graph.sql | bq query --use_legacy_sql=false +-- +-- Required shell variables: +-- PROJECT_ID : your GCP project ID +-- DATASET : the BigQuery dataset that holds both raw agent_events +-- and the materialized graph tables (single-dataset shape) +CREATE OR REPLACE PROPERTY GRAPH `${PROJECT_ID}.${DATASET}.agent_decisions_graph` + NODE TABLES ( + `${PROJECT_ID}.${DATASET}.decision_request` AS decision_request + KEY (request_id) + LABEL DecisionRequest PROPERTIES (request_id, request_text, requested_at), + `${PROJECT_ID}.${DATASET}.decision_option` AS decision_option + KEY (option_id) + LABEL DecisionOption PROPERTIES (option_id, option_label, confidence), + `${PROJECT_ID}.${DATASET}.decision_outcome` AS decision_outcome + KEY (outcome_id) + LABEL DecisionOutcome PROPERTIES (outcome_id, status, rationale, decided_at) + ) + EDGE TABLES ( + `${PROJECT_ID}.${DATASET}.evaluates_option` AS evaluates_option + KEY (request_id, option_id) + SOURCE KEY (request_id) REFERENCES decision_request (request_id) + DESTINATION KEY (option_id) REFERENCES decision_option (option_id) + LABEL evaluatesOption, + `${PROJECT_ID}.${DATASET}.resulted_in` AS resulted_in + KEY (request_id, outcome_id) + SOURCE KEY (request_id) REFERENCES decision_request (request_id) + DESTINATION KEY (outcome_id) REFERENCES decision_outcome (outcome_id) + LABEL resultedIn + ); +""", + "ontology.yaml": r"""# Ontology for the BQAA codelab. # -# By default, BASE points at the latest `main` branch. If you are running -# this notebook against a feature branch under review, override BASE in -# your shell before launching the notebook (or edit the line below). -import os -os.environ["BASE"] = os.environ.get( - "BASE", - "https://raw.githubusercontent.com/GoogleCloudPlatform/BigQuery-Agent-Analytics-SDK/main/examples/codelab/periodic_materialization", -) -print(f"Downloading codelab artifacts from: {os.environ['BASE']}") +# Names the entities and relationships in the codelab's sample +# agent decision flow. The materializer uses this vocabulary +# when it constructs the AI.GENERATE extraction prompt. +# +# Paired with binding.yaml (which maps these entities to +# physical BigQuery tables) and property_graph.sql (which +# stitches the tables into a queryable BigQuery property graph). +ontology: agent_decision_flow +entities: + - name: DecisionRequest + keys: + primary: [requestId] + properties: + - {name: requestId, type: string} + - {name: requestText, type: string} + - {name: requestedAt, type: timestamp} + - name: DecisionOption + keys: + primary: [optionId] + properties: + - {name: optionId, type: string} + - {name: optionLabel, type: string} + - {name: confidence, type: double} + - name: DecisionOutcome + keys: + primary: [outcomeId] + properties: + - {name: outcomeId, type: string} + - {name: status, type: string} + - {name: rationale, type: string} + - {name: decidedAt, type: timestamp} +relationships: + - {name: evaluatesOption, from: DecisionRequest, to: DecisionOption} + - {name: resultedIn, from: DecisionRequest, to: DecisionOutcome} +""", + "binding.yaml": r"""# Binding for the BQAA codelab. +# +# Maps the ontology's entities and relationships to physical +# BigQuery tables and columns. Paired with ontology.yaml +# and property_graph.sql. +# +# Before passing this file to bqaa context-graph, render +# the shell placeholders with envsubst: +# envsubst < binding.yaml > binding.yaml.tmp && mv binding.yaml.tmp binding.yaml +# +# Required shell variables: +# PROJECT_ID : your GCP project ID +# DATASET : the BigQuery dataset that holds both raw agent_events +# and the materialized graph tables +binding: agent_decisions_binding +ontology: agent_decision_flow +target: + backend: bigquery + project: ${PROJECT_ID} + dataset: ${DATASET} +entities: + - name: DecisionRequest + source: ${PROJECT_ID}.${DATASET}.decision_request + properties: + - {name: requestId, column: request_id} + - {name: requestText, column: request_text} + - {name: requestedAt, column: requested_at} + - name: DecisionOption + source: ${PROJECT_ID}.${DATASET}.decision_option + properties: + - {name: optionId, column: option_id} + - {name: optionLabel, column: option_label} + - {name: confidence, column: confidence} + - name: DecisionOutcome + source: ${PROJECT_ID}.${DATASET}.decision_outcome + properties: + - {name: outcomeId, column: outcome_id} + - {name: status, column: status} + - {name: rationale, column: rationale} + - {name: decidedAt, column: decided_at} +relationships: + - name: evaluatesOption + source: ${PROJECT_ID}.${DATASET}.evaluates_option + from_columns: [request_id] + to_columns: [option_id] + - name: resultedIn + source: ${PROJECT_ID}.${DATASET}.resulted_in + from_columns: [request_id] + to_columns: [outcome_id] +""", +} +for name, body in artifacts.items(): + (work / name).write_text(body) -!mkdir -p ~/bqaa-codelab && cd ~/bqaa-codelab && \ - for f in property_graph.sql table_ddl.sql ontology.yaml binding.yaml seed_events.py; do \ - curl -fsSL "$BASE/$f" -o "$f"; \ - done && \ - ls -la ---> +print("Wrote:", ", ".join(sorted(artifacts))) +``` -You should see five files: +The cell writes these four artifacts: ``` -binding.yaml ontology.yaml property_graph.sql seed_events.py table_ddl.sql +binding.yaml ontology.yaml property_graph.sql table_ddl.sql ``` The decision flow these artifacts describe has three node types and two heterogeneous edges: diff --git a/examples/codelab/periodic_materialization/colab_notebook.ipynb b/examples/codelab/periodic_materialization/colab_notebook.ipynb index e43e376..42940da 100644 --- a/examples/codelab/periodic_materialization/colab_notebook.ipynb +++ b/examples/codelab/periodic_materialization/colab_notebook.ipynb @@ -186,9 +186,9 @@ "\n", "## Get the Codelab Artifacts\n", "\n", - "The codelab ships a set of ready-to-use artifacts: the property-graph schema, the ontology, the binding, and a synthetic event generator. You do not author any of these yourself; the codelab uses them as-is, and the [README in the artifacts folder](https://github.com/GoogleCloudPlatform/BigQuery-Agent-Analytics-SDK/blob/main/examples/codelab/periodic_materialization/README.md) explains how to adapt them for your own decision domain.\n", + "The codelab ships a set of ready-to-use artifacts: the table DDL, the property-graph schema, the ontology, and the binding. You do not author any of these yourself; the codelab uses them as-is, and the [README in the artifacts folder](https://github.com/GoogleCloudPlatform/BigQuery-Agent-Analytics-SDK/blob/main/examples/codelab/periodic_materialization/README.md) explains how to adapt them for your own decision domain.\n", "\n", - "Download the artifacts to a working directory:" + "This codelab is self-contained: the cell below writes the four artifacts into a working directory, so there is nothing to download. They are the same files shipped in [`examples/codelab/periodic_materialization/`](https://github.com/GoogleCloudPlatform/BigQuery-Agent-Analytics-SDK/tree/main/examples/codelab/periodic_materialization)." ] }, { @@ -197,33 +197,190 @@ "metadata": {}, "outputs": [], "source": [ - "# Source of the bundled codelab artifacts.\n", + "# This codelab is self-contained: rather than downloading files, this\n", + "# cell writes the four artifacts into a working directory. They are\n", + "# identical to the files in examples/codelab/periodic_materialization/.\n", + "from pathlib import Path\n", + "\n", + "work = Path.home() / \"bqaa-codelab\"\n", + "work.mkdir(exist_ok=True)\n", + "\n", + "artifacts = {\n", + " \"table_ddl.sql\": r\"\"\"-- Node and edge table DDL for the BQAA codelab.\n", + "--\n", + "-- The materializer writes into these tables on every run.\n", + "-- ``session_id`` and ``extracted_at`` are SDK metadata columns the\n", + "-- materializer fills automatically; they are required on every\n", + "-- bound table.\n", + "--\n", + "-- Apply with:\n", + "-- envsubst < table_ddl.sql | bq query --use_legacy_sql=false\n", + "--\n", + "-- Required shell variables:\n", + "-- PROJECT_ID : your GCP project ID\n", + "-- DATASET : the BigQuery dataset that holds both raw agent_events\n", + "-- and the materialized graph tables\n", + "CREATE TABLE IF NOT EXISTS `${PROJECT_ID}.${DATASET}.decision_request` (\n", + " request_id STRING, request_text STRING, requested_at TIMESTAMP,\n", + " session_id STRING, extracted_at TIMESTAMP\n", + ");\n", + "CREATE TABLE IF NOT EXISTS `${PROJECT_ID}.${DATASET}.decision_option` (\n", + " option_id STRING, option_label STRING, confidence FLOAT64,\n", + " session_id STRING, extracted_at TIMESTAMP\n", + ");\n", + "CREATE TABLE IF NOT EXISTS `${PROJECT_ID}.${DATASET}.decision_outcome` (\n", + " outcome_id STRING, status STRING, rationale STRING, decided_at TIMESTAMP,\n", + " session_id STRING, extracted_at TIMESTAMP\n", + ");\n", + "CREATE TABLE IF NOT EXISTS `${PROJECT_ID}.${DATASET}.evaluates_option` (\n", + " request_id STRING, option_id STRING,\n", + " session_id STRING, extracted_at TIMESTAMP\n", + ");\n", + "CREATE TABLE IF NOT EXISTS `${PROJECT_ID}.${DATASET}.resulted_in` (\n", + " request_id STRING, outcome_id STRING,\n", + " session_id STRING, extracted_at TIMESTAMP\n", + ");\n", + "\"\"\",\n", + " \"property_graph.sql\": r\"\"\"-- Property-graph DDL for the BQAA codelab.\n", + "--\n", + "-- Models a generic agent decision flow:\n", + "-- DecisionRequest -> evaluatesOption -> DecisionOption\n", + "-- DecisionRequest -> resultedIn -> DecisionOutcome\n", + "--\n", + "-- Apply with:\n", + "-- envsubst < property_graph.sql | bq query --use_legacy_sql=false\n", + "--\n", + "-- Required shell variables:\n", + "-- PROJECT_ID : your GCP project ID\n", + "-- DATASET : the BigQuery dataset that holds both raw agent_events\n", + "-- and the materialized graph tables (single-dataset shape)\n", + "CREATE OR REPLACE PROPERTY GRAPH `${PROJECT_ID}.${DATASET}.agent_decisions_graph`\n", + " NODE TABLES (\n", + " `${PROJECT_ID}.${DATASET}.decision_request` AS decision_request\n", + " KEY (request_id)\n", + " LABEL DecisionRequest PROPERTIES (request_id, request_text, requested_at),\n", + " `${PROJECT_ID}.${DATASET}.decision_option` AS decision_option\n", + " KEY (option_id)\n", + " LABEL DecisionOption PROPERTIES (option_id, option_label, confidence),\n", + " `${PROJECT_ID}.${DATASET}.decision_outcome` AS decision_outcome\n", + " KEY (outcome_id)\n", + " LABEL DecisionOutcome PROPERTIES (outcome_id, status, rationale, decided_at)\n", + " )\n", + " EDGE TABLES (\n", + " `${PROJECT_ID}.${DATASET}.evaluates_option` AS evaluates_option\n", + " KEY (request_id, option_id)\n", + " SOURCE KEY (request_id) REFERENCES decision_request (request_id)\n", + " DESTINATION KEY (option_id) REFERENCES decision_option (option_id)\n", + " LABEL evaluatesOption,\n", + " `${PROJECT_ID}.${DATASET}.resulted_in` AS resulted_in\n", + " KEY (request_id, outcome_id)\n", + " SOURCE KEY (request_id) REFERENCES decision_request (request_id)\n", + " DESTINATION KEY (outcome_id) REFERENCES decision_outcome (outcome_id)\n", + " LABEL resultedIn\n", + " );\n", + "\"\"\",\n", + " \"ontology.yaml\": r\"\"\"# Ontology for the BQAA codelab.\n", "#\n", - "# By default, BASE points at the latest `main` branch. If you are running\n", - "# this notebook against a feature branch under review, override BASE in\n", - "# your shell before launching the notebook (or edit the line below).\n", - "import os\n", - "os.environ[\"BASE\"] = os.environ.get(\n", - " \"BASE\",\n", - " \"https://raw.githubusercontent.com/GoogleCloudPlatform/BigQuery-Agent-Analytics-SDK/main/examples/codelab/periodic_materialization\",\n", - ")\n", - "print(f\"Downloading codelab artifacts from: {os.environ['BASE']}\")\n", + "# Names the entities and relationships in the codelab's sample\n", + "# agent decision flow. The materializer uses this vocabulary\n", + "# when it constructs the AI.GENERATE extraction prompt.\n", + "#\n", + "# Paired with binding.yaml (which maps these entities to\n", + "# physical BigQuery tables) and property_graph.sql (which\n", + "# stitches the tables into a queryable BigQuery property graph).\n", + "ontology: agent_decision_flow\n", + "entities:\n", + " - name: DecisionRequest\n", + " keys:\n", + " primary: [requestId]\n", + " properties:\n", + " - {name: requestId, type: string}\n", + " - {name: requestText, type: string}\n", + " - {name: requestedAt, type: timestamp}\n", + " - name: DecisionOption\n", + " keys:\n", + " primary: [optionId]\n", + " properties:\n", + " - {name: optionId, type: string}\n", + " - {name: optionLabel, type: string}\n", + " - {name: confidence, type: double}\n", + " - name: DecisionOutcome\n", + " keys:\n", + " primary: [outcomeId]\n", + " properties:\n", + " - {name: outcomeId, type: string}\n", + " - {name: status, type: string}\n", + " - {name: rationale, type: string}\n", + " - {name: decidedAt, type: timestamp}\n", + "relationships:\n", + " - {name: evaluatesOption, from: DecisionRequest, to: DecisionOption}\n", + " - {name: resultedIn, from: DecisionRequest, to: DecisionOutcome}\n", + "\"\"\",\n", + " \"binding.yaml\": r\"\"\"# Binding for the BQAA codelab.\n", + "#\n", + "# Maps the ontology's entities and relationships to physical\n", + "# BigQuery tables and columns. Paired with ontology.yaml\n", + "# and property_graph.sql.\n", + "#\n", + "# Before passing this file to bqaa context-graph, render\n", + "# the shell placeholders with envsubst:\n", + "# envsubst < binding.yaml > binding.yaml.tmp && mv binding.yaml.tmp binding.yaml\n", + "#\n", + "# Required shell variables:\n", + "# PROJECT_ID : your GCP project ID\n", + "# DATASET : the BigQuery dataset that holds both raw agent_events\n", + "# and the materialized graph tables\n", + "binding: agent_decisions_binding\n", + "ontology: agent_decision_flow\n", + "target:\n", + " backend: bigquery\n", + " project: ${PROJECT_ID}\n", + " dataset: ${DATASET}\n", + "entities:\n", + " - name: DecisionRequest\n", + " source: ${PROJECT_ID}.${DATASET}.decision_request\n", + " properties:\n", + " - {name: requestId, column: request_id}\n", + " - {name: requestText, column: request_text}\n", + " - {name: requestedAt, column: requested_at}\n", + " - name: DecisionOption\n", + " source: ${PROJECT_ID}.${DATASET}.decision_option\n", + " properties:\n", + " - {name: optionId, column: option_id}\n", + " - {name: optionLabel, column: option_label}\n", + " - {name: confidence, column: confidence}\n", + " - name: DecisionOutcome\n", + " source: ${PROJECT_ID}.${DATASET}.decision_outcome\n", + " properties:\n", + " - {name: outcomeId, column: outcome_id}\n", + " - {name: status, column: status}\n", + " - {name: rationale, column: rationale}\n", + " - {name: decidedAt, column: decided_at}\n", + "relationships:\n", + " - name: evaluatesOption\n", + " source: ${PROJECT_ID}.${DATASET}.evaluates_option\n", + " from_columns: [request_id]\n", + " to_columns: [option_id]\n", + " - name: resultedIn\n", + " source: ${PROJECT_ID}.${DATASET}.resulted_in\n", + " from_columns: [request_id]\n", + " to_columns: [outcome_id]\n", + "\"\"\",\n", + "}\n", + "for name, body in artifacts.items():\n", + " (work / name).write_text(body)\n", "\n", - "!mkdir -p ~/bqaa-codelab && cd ~/bqaa-codelab && \\\n", - " for f in property_graph.sql table_ddl.sql ontology.yaml binding.yaml seed_events.py; do \\\n", - " curl -fsSL \"$BASE/$f\" -o \"$f\"; \\\n", - " done && \\\n", - " ls -la" + "print(\"Wrote:\", \", \".join(sorted(artifacts)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "You should see five files:\n", + "The cell writes these four artifacts:\n", "\n", "```\n", - "binding.yaml ontology.yaml property_graph.sql seed_events.py table_ddl.sql\n", + "binding.yaml ontology.yaml property_graph.sql table_ddl.sql\n", "```\n", "\n", "The decision flow these artifacts describe has three node types and two heterogeneous edges:\n", diff --git a/tests/test_codelab_embedded_artifacts.py b/tests/test_codelab_embedded_artifacts.py new file mode 100644 index 0000000..bbba440 --- /dev/null +++ b/tests/test_codelab_embedded_artifacts.py @@ -0,0 +1,44 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The codelab embeds its artifacts inline so the notebook is self-contained. + +Those embedded copies must stay byte-for-byte in sync with the canonical files +in ``examples/codelab/periodic_materialization/``; otherwise the codelab would +write stale artifacts. This test fails if either side drifts. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +_REPO = Path(__file__).resolve().parents[1] +_CODELAB = _REPO / "docs" / "codelabs" / "periodic_materialization.md" +_ARTIFACTS = _REPO / "examples" / "codelab" / "periodic_materialization" + + +@pytest.mark.parametrize( + "name", + ["table_ddl.sql", "property_graph.sql", "ontology.yaml", "binding.yaml"], +) +def test_embedded_artifact_matches_canonical(name: str) -> None: + canonical = (_ARTIFACTS / name).read_text(encoding="utf-8").rstrip("\n") + codelab = _CODELAB.read_text(encoding="utf-8") + assert canonical in codelab, ( + f"The codelab's embedded copy of {name} has drifted from" + f" examples/codelab/periodic_materialization/{name}. Re-run the embed so" + f" the self-contained notebook writes the current artifact." + )