diff --git a/.gitattributes b/.gitattributes index 37203db..ca492c2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -11,3 +11,4 @@ Dockerfile text eol=lf *.toml text eol=lf *.ps1 text eol=crlf tests/contracts/cas-contracts/v0.1.0/*.json -text -diff +evidence/verified-local-golden-path-v0.1/artifacts/** -text -diff diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dfaff50..ef91196 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,4 +21,5 @@ jobs: - run: python -m ruff check . - run: python -m mypy - run: python -m pytest + - run: python -m cas_reference_product.evidence - run: docker build --platform linux/amd64 -t cas-reference-product:ci . diff --git a/README.md b/README.md index 49c693f..ee0cf11 100644 --- a/README.md +++ b/README.md @@ -63,9 +63,9 @@ The image runs as a non-root user, listens on port `8080`, and provides `/health - [Architecture](docs/architecture.md) - [Threat model](docs/threat-model.md) - [Operations](docs/operations.md) +- [Immutable golden-path case-study evidence](docs/case-study-evidence.md) - [cas-platform interface](deployment/cas-platform.interface.yaml) ## Security Report vulnerabilities through GitHub private vulnerability reporting. Do not include credentials or sensitive prompt data in issues. - diff --git a/docs/case-study-evidence.md b/docs/case-study-evidence.md new file mode 100644 index 0000000..de63388 --- /dev/null +++ b/docs/case-study-evidence.md @@ -0,0 +1,45 @@ +# Immutable Golden-Path Evidence + +The committed bundle under +`evidence/verified-local-golden-path-v0.1/` is the case-study evidence for one +verified local golden-path execution. It connects four independently versioned +CAS repositories without claiming an Azure deployment. + +## What The Bundle Proves + +- `cas-reference-product` served the deterministic local workflow over HTTP. +- `cas-evals` evaluated the actual returned output and preserved lifecycle and + trace identifiers. +- The published `cas-contracts` v0.1.0 registry manifest is pinned by SHA-256. +- Every schema digest referenced by that registry manifest is verified before + the canonical records are validated against the exact published schemas. +- The evaluation fixture, normalized evidence, fixture digest, and returned + output digest are verified together. +- The `cas-platform` what-if script is pinned as a non-deploying interface + reference. No what-if output or Azure deployment is claimed. +- Every included artifact digest and immutable source SHA is checked offline in + tests and CI. + +The canonical `artifact-manifest.json` and `verification-result.json` conform to +the vendored `cas-contracts` `ArtifactManifest` and `VerificationResult` +schemas. `bundle.json` maps their URN evidence identifiers to committed files +and records the claim boundaries. + +## Container Digest Boundary + +The bundle does not claim a container image digest. The repository builds a +local image in CI, but it does not publish a reproducible registry artifact +whose digest can be independently resolved. The canonical verification result +therefore marks this check as `skipped` rather than substituting a mutable tag +or local image ID. + +## Verify + +```powershell +./scripts/validate.ps1 +./scripts/verify-evidence.ps1 +``` + +The verifier is network-free. It fails when an artifact changes, a digest is +malformed, a source URI is not pinned to its full commit SHA, the golden result +does not pass exactly one case, or the platform evidence claims deployment. diff --git a/evidence/verified-local-golden-path-v0.1/artifact-manifest.json b/evidence/verified-local-golden-path-v0.1/artifact-manifest.json new file mode 100644 index 0000000..64a103c --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/artifact-manifest.json @@ -0,0 +1,43 @@ +{ + "kind": "ArtifactManifest", + "correlationId": "cas-reference-product-golden-v0.1", + "promptId": "reference-product-golden-workflow", + "runId": "cas-reference-product-golden-v0.1", + "repo": "Coding-Autopilot-System/cas-reference-product", + "actor": { + "id": "evidence-bundle-ci", + "type": "workflow" + }, + "timestamp": "2026-06-13T00:00:00Z", + "schemaVersion": "0.1.0", + "traceContext": { + "traceparent": "00-0f32a2f22b960e873c92cecabc6f8dea-afb7553009ebcb9d-01" + }, + "artifacts": [ + { + "kind": "source-provenance", + "uri": "urn:cas-evidence:verified-local-golden-path-v0.1:source-provenance", + "sha256": "d621003fea89b9e0a6af8ce87c1e6f60e42ad9a6df5f775a778dcccbafc6aa18" + }, + { + "kind": "contract-registry-manifest", + "uri": "urn:cas-evidence:verified-local-golden-path-v0.1:contract-registry-manifest", + "sha256": "92a09b0907e08b022f96b89ce09764b0416d268acc38653e08fd335b5e5198f9" + }, + { + "kind": "evaluation-result", + "uri": "urn:cas-evidence:verified-local-golden-path-v0.1:eval-evidence", + "sha256": "6d95d47fbdf422ff8fc3608b53b283e99b3f74b135d3876c7a1a4557dde2a8e7" + }, + { + "kind": "evaluation-fixture", + "uri": "urn:cas-evidence:verified-local-golden-path-v0.1:golden-fixture", + "sha256": "667fc47e19c3910dcc202063d8167f1acf6417e4cb35b705f8862a2886c5188a" + }, + { + "kind": "platform-what-if-reference", + "uri": "urn:cas-evidence:verified-local-golden-path-v0.1:platform-what-if-reference", + "sha256": "d6c57d4fb80aa168025368134ff8022f3a8328a92936503518e460aef03907ad" + } + ] +} diff --git a/evidence/verified-local-golden-path-v0.1/artifacts/contract-registry-manifest.json b/evidence/verified-local-golden-path-v0.1/artifacts/contract-registry-manifest.json new file mode 100644 index 0000000..97b19b3 --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/artifacts/contract-registry-manifest.json @@ -0,0 +1,45 @@ +{ + "version": "0.1.0", + "schemas": [ + { + "id": "https://schemas.coding-autopilot.dev/v0.1/artifact-manifest.schema.json", + "path": "artifact-manifest.schema.json", + "sha256": "2864e3ef2c7ab8114395d15a2f6b4b1e832049c0a3a2b8a1165ba287f61caa06" + }, + { + "id": "https://schemas.coding-autopilot.dev/v0.1/common.schema.json", + "path": "common.schema.json", + "sha256": "c7ce72a6f5da8394e48f2421820588a8142546962e05152997bd1e6ced994928" + }, + { + "id": "https://schemas.coding-autopilot.dev/v0.1/evaluation-result.schema.json", + "path": "evaluation-result.schema.json", + "sha256": "be6d3216c95cfa6d2ccda908ff089010765b1c70223a920bfe3cb70a0cd24df5" + }, + { + "id": "https://schemas.coding-autopilot.dev/v0.1/policy-decision.schema.json", + "path": "policy-decision.schema.json", + "sha256": "21a66d651f5c0190fe0b50b81350ab32d29190d1c44719e2584628a8e0e6614f" + }, + { + "id": "https://schemas.coding-autopilot.dev/v0.1/prompt-envelope.schema.json", + "path": "prompt-envelope.schema.json", + "sha256": "baf043344abe6bf9afbc31272bef8034ec46ffc4d3fc799a41fead002b7274d6" + }, + { + "id": "https://schemas.coding-autopilot.dev/v0.1/run-event.schema.json", + "path": "run-event.schema.json", + "sha256": "6e96cb3e690ab4fcbcbe75005b27c14d83595c4951cabc35bed86b41fc2f0a7f" + }, + { + "id": "https://schemas.coding-autopilot.dev/v0.1/verification-result.schema.json", + "path": "verification-result.schema.json", + "sha256": "3ef6085b19c726204b65c9f8fd815a8aa0dcda6a8db82acdbd61e56bce45b4c9" + }, + { + "id": "https://schemas.coding-autopilot.dev/v0.1/work-request.schema.json", + "path": "work-request.schema.json", + "sha256": "7a6d5aee580d0198336c704cd9b154bc6c08bca15ec77be9c5db965cf54a9606" + } + ] +} diff --git a/evidence/verified-local-golden-path-v0.1/artifacts/contracts/artifact-manifest.schema.json b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/artifact-manifest.schema.json new file mode 100644 index 0000000..af27f47 --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/artifact-manifest.schema.json @@ -0,0 +1,31 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/artifact-manifest.schema.json", + "title": "ArtifactManifest", + "type": "object", + "allOf": [ + { + "$ref": "common.schema.json#/$defs/lifecycleMetadata" + }, + { + "type": "object", + "required": [ + "kind", + "artifacts" + ], + "properties": { + "kind": { + "const": "ArtifactManifest" + }, + "artifacts": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "common.schema.json#/$defs/evidence" + } + } + } + } + ], + "unevaluatedProperties": false +} diff --git a/evidence/verified-local-golden-path-v0.1/artifacts/contracts/common.schema.json b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/common.schema.json new file mode 100644 index 0000000..0eec265 --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/common.schema.json @@ -0,0 +1,68 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/common.schema.json", + "title": "CAS Common Definitions", + "$defs": { + "actor": { + "type": "object", + "additionalProperties": false, + "required": ["id", "type"], + "properties": { + "id": { "type": "string", "minLength": 1, "maxLength": 256 }, + "type": { + "type": "string", + "enum": ["human", "agent", "service", "workflow"] + }, + "displayName": { "type": "string", "minLength": 1, "maxLength": 256 } + } + }, + "traceContext": { + "type": "object", + "additionalProperties": false, + "required": ["traceparent"], + "properties": { + "traceparent": { + "type": "string", + "pattern": "^[\\da-f]{2}-[\\da-f]{32}-[\\da-f]{16}-[\\da-f]{2}$" + }, + "tracestate": { "type": "string", "maxLength": 512 } + } + }, + "lifecycleMetadata": { + "type": "object", + "required": [ + "correlationId", + "promptId", + "runId", + "repo", + "actor", + "timestamp", + "schemaVersion", + "traceContext" + ], + "properties": { + "correlationId": { "type": "string", "minLength": 1, "maxLength": 128 }, + "promptId": { "type": "string", "minLength": 1, "maxLength": 128 }, + "runId": { "type": "string", "minLength": 1, "maxLength": 128 }, + "repo": { + "type": "string", + "pattern": "^[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+$" + }, + "actor": { "$ref": "#/$defs/actor" }, + "timestamp": { "type": "string", "format": "date-time" }, + "schemaVersion": { "const": "0.1.0" }, + "traceContext": { "$ref": "#/$defs/traceContext" } + } + }, + "evidence": { + "type": "object", + "additionalProperties": false, + "required": ["kind", "uri"], + "properties": { + "kind": { "type": "string", "minLength": 1, "maxLength": 64 }, + "uri": { "type": "string", "format": "uri" }, + "sha256": { "type": "string", "pattern": "^[\\da-f]{64}$" } + } + } + } +} diff --git a/evidence/verified-local-golden-path-v0.1/artifacts/contracts/evaluation-result.schema.json b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/evaluation-result.schema.json new file mode 100644 index 0000000..719f46a --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/evaluation-result.schema.json @@ -0,0 +1,45 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/evaluation-result.schema.json", + "title": "EvaluationResult", + "type": "object", + "allOf": [ + { + "$ref": "common.schema.json#/$defs/lifecycleMetadata" + }, + { + "type": "object", + "required": [ + "kind", + "evaluator", + "outcome", + "metrics" + ], + "properties": { + "kind": { + "const": "EvaluationResult" + }, + "evaluator": { + "type": "string", + "minLength": 1, + "maxLength": 256 + }, + "outcome": { + "enum": [ + "passed", + "failed", + "inconclusive" + ] + }, + "metrics": { + "type": "object", + "minProperties": 1, + "additionalProperties": { + "type": "number" + } + } + } + } + ], + "unevaluatedProperties": false +} diff --git a/evidence/verified-local-golden-path-v0.1/artifacts/contracts/policy-decision.schema.json b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/policy-decision.schema.json new file mode 100644 index 0000000..411482f --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/policy-decision.schema.json @@ -0,0 +1,47 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/policy-decision.schema.json", + "title": "PolicyDecision", + "type": "object", + "allOf": [ + { + "$ref": "common.schema.json#/$defs/lifecycleMetadata" + }, + { + "type": "object", + "required": [ + "kind", + "decision", + "policyVersion", + "reasons" + ], + "properties": { + "kind": { + "const": "PolicyDecision" + }, + "decision": { + "enum": [ + "allow", + "deny", + "require-approval" + ] + }, + "policyVersion": { + "type": "string", + "minLength": 1, + "maxLength": 64 + }, + "reasons": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "minLength": 1, + "maxLength": 1000 + } + } + } + } + ], + "unevaluatedProperties": false +} diff --git a/evidence/verified-local-golden-path-v0.1/artifacts/contracts/prompt-envelope.schema.json b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/prompt-envelope.schema.json new file mode 100644 index 0000000..24a6e2d --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/prompt-envelope.schema.json @@ -0,0 +1,44 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/prompt-envelope.schema.json", + "title": "PromptEnvelope", + "type": "object", + "allOf": [ + { + "$ref": "common.schema.json#/$defs/lifecycleMetadata" + }, + { + "type": "object", + "required": [ + "kind", + "intent", + "prompt" + ], + "properties": { + "kind": { + "const": "PromptEnvelope" + }, + "intent": { + "type": "string", + "minLength": 1, + "maxLength": 256 + }, + "prompt": { + "type": "string", + "minLength": 1, + "maxLength": 50000 + }, + "constraints": { + "type": "array", + "items": { + "type": "string", + "minLength": 1, + "maxLength": 1000 + }, + "uniqueItems": true + } + } + } + ], + "unevaluatedProperties": false +} diff --git a/evidence/verified-local-golden-path-v0.1/artifacts/contracts/run-event.schema.json b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/run-event.schema.json new file mode 100644 index 0000000..3f89d28 --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/run-event.schema.json @@ -0,0 +1,48 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/run-event.schema.json", + "title": "RunEvent", + "type": "object", + "allOf": [ + { + "$ref": "common.schema.json#/$defs/lifecycleMetadata" + }, + { + "type": "object", + "required": [ + "kind", + "eventType", + "sequence", + "status" + ], + "properties": { + "kind": { + "const": "RunEvent" + }, + "eventType": { + "type": "string", + "minLength": 1, + "maxLength": 128 + }, + "sequence": { + "type": "integer", + "minimum": 0 + }, + "status": { + "enum": [ + "queued", + "running", + "succeeded", + "failed", + "cancelled" + ] + }, + "message": { + "type": "string", + "maxLength": 5000 + } + } + } + ], + "unevaluatedProperties": false +} diff --git a/evidence/verified-local-golden-path-v0.1/artifacts/contracts/verification-result.schema.json b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/verification-result.schema.json new file mode 100644 index 0000000..67719cc --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/verification-result.schema.json @@ -0,0 +1,62 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/verification-result.schema.json", + "title": "VerificationResult", + "type": "object", + "allOf": [ + { + "$ref": "common.schema.json#/$defs/lifecycleMetadata" + }, + { + "type": "object", + "required": [ + "kind", + "outcome", + "checks" + ], + "properties": { + "kind": { + "const": "VerificationResult" + }, + "outcome": { + "enum": [ + "passed", + "failed", + "inconclusive" + ] + }, + "checks": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": [ + "name", + "outcome" + ], + "properties": { + "name": { + "type": "string", + "minLength": 1, + "maxLength": 256 + }, + "outcome": { + "enum": [ + "passed", + "failed", + "skipped" + ] + }, + "evidenceUri": { + "type": "string", + "format": "uri" + } + } + } + } + } + } + ], + "unevaluatedProperties": false +} diff --git a/evidence/verified-local-golden-path-v0.1/artifacts/contracts/work-request.schema.json b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/work-request.schema.json new file mode 100644 index 0000000..367cca8 --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/artifacts/contracts/work-request.schema.json @@ -0,0 +1,54 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.coding-autopilot.dev/v0.1/work-request.schema.json", + "title": "WorkRequest", + "type": "object", + "allOf": [ + { + "$ref": "common.schema.json#/$defs/lifecycleMetadata" + }, + { + "type": "object", + "required": [ + "kind", + "workType", + "objective", + "targetRef", + "riskLevel" + ], + "properties": { + "kind": { + "const": "WorkRequest" + }, + "workType": { + "enum": [ + "analyze", + "change", + "verify", + "repair", + "deploy" + ] + }, + "objective": { + "type": "string", + "minLength": 1, + "maxLength": 5000 + }, + "targetRef": { + "type": "string", + "minLength": 1, + "maxLength": 256 + }, + "riskLevel": { + "enum": [ + "low", + "medium", + "high", + "critical" + ] + } + } + } + ], + "unevaluatedProperties": false +} diff --git a/evidence/verified-local-golden-path-v0.1/artifacts/eval-evidence.json b/evidence/verified-local-golden-path-v0.1/artifacts/eval-evidence.json new file mode 100644 index 0000000..4d2ca59 --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/artifacts/eval-evidence.json @@ -0,0 +1,126 @@ +{ + "evidence": [ + { + "caseId": "reference-product-golden-workflow", + "execution": { + "adapter": "cas-reference-product", + "events": [ + { + "correlationId": "eval-reference-product-golden-workflow", + "eventType": "workflow.started", + "promptId": "reference-product-golden-workflow", + "runId": "cas-reference-product-golden-v0.1", + "sequence": 0, + "status": "running", + "traceContext": { + "traceparent": "00-0f32a2f22b960e873c92cecabc6f8dea-afb7553009ebcb9d-01" + } + }, + { + "correlationId": "eval-reference-product-golden-workflow", + "eventType": "workflow.completed", + "promptId": "reference-product-golden-workflow", + "runId": "cas-reference-product-golden-v0.1", + "sequence": 1, + "status": "succeeded", + "traceContext": { + "traceparent": "00-0f32a2f22b960e873c92cecabc6f8dea-afb7553009ebcb9d-01" + } + } + ], + "lifecycle": { + "correlationId": "eval-reference-product-golden-workflow", + "promptId": "reference-product-golden-workflow", + "runId": "cas-reference-product-golden-v0.1", + "traceContext": { + "traceparent": "00-0f32a2f22b960e873c92cecabc6f8dea-afb7553009ebcb9d-01" + } + }, + "responseDigest": "sha256:41290a8442527585b7deaf674f029c88642225e84ac7f4392d3dec3c52d25504", + "target": "cas-reference-product/api/v1/workflows", + "timing": { + "latencyMs": 100.0, + "normalization": "fixture-observed" + } + }, + "fixtureDigest": "sha256:767c92c216c2ca062959b5183c826808c6a260dfcdc423654a4196643e0d7e2b", + "metrics": { + "cost_usd": { + "details": { + "source": "fixture" + }, + "passed": true, + "threshold": 0.01, + "value": 0.0 + }, + "latency_ms": { + "details": { + "source": "fixture" + }, + "passed": true, + "threshold": 1000.0, + "value": 100.0 + }, + "quality": { + "details": { + "expected": [ + "reference workflow accepted", + "golden-path", + "0 constraints" + ], + "matched": [ + "reference workflow accepted", + "golden-path", + "0 constraints" + ] + }, + "passed": true, + "threshold": 1.0, + "value": 1.0 + }, + "safety": { + "details": { + "violations": [] + }, + "passed": true, + "threshold": 1.0, + "value": 1.0 + } + }, + "passed": true + } + ], + "results": [ + { + "actor": { + "id": "cas-evals", + "type": "service" + }, + "correlationId": "eval-reference-product-golden-workflow", + "evaluator": "cas-evals/0.2.0", + "kind": "EvaluationResult", + "metrics": { + "costUsd": 0.0, + "latencyMs": 100.0, + "quality": 1.0, + "safety": 1.0 + }, + "outcome": "passed", + "promptId": "reference-product-golden-workflow", + "repo": "Coding-Autopilot-System/cas-evals", + "runId": "cas-reference-product-golden-v0.1", + "schemaVersion": "0.1.0", + "timestamp": "2026-06-12T00:00:00Z", + "traceContext": { + "traceparent": "00-0f32a2f22b960e873c92cecabc6f8dea-afb7553009ebcb9d-01" + } + } + ], + "schemaVersion": "0.2.0", + "suiteId": "cas-reference-product-golden-v0.1", + "summary": { + "failed": 0, + "passed": 1, + "total": 1 + } +} diff --git a/evidence/verified-local-golden-path-v0.1/artifacts/golden-fixture.json b/evidence/verified-local-golden-path-v0.1/artifacts/golden-fixture.json new file mode 100644 index 0000000..8ae918a --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/artifacts/golden-fixture.json @@ -0,0 +1,26 @@ +{ + "suiteId": "cas-reference-product-golden-v0.1", + "releasedAt": "2026-06-12T00:00:00Z", + "cases": [ + { + "id": "reference-product-golden-workflow", + "kind": "golden", + "capability": "golden-path", + "prompt": "Execute the deterministic reference workflow.", + "response": "Reference workflow accepted 'golden-path' with 0 constraints.", + "expected": { + "keywords": ["reference workflow accepted", "golden-path", "0 constraints"], + "prohibited": ["failed", "secret"] + }, + "limits": { + "min_quality": 1.0, + "max_cost_usd": 0.01, + "max_latency_ms": 1000 + }, + "observed": { + "cost_usd": 0.0, + "latency_ms": 100 + } + } + ] +} diff --git a/evidence/verified-local-golden-path-v0.1/artifacts/platform-what-if.ps1 b/evidence/verified-local-golden-path-v0.1/artifacts/platform-what-if.ps1 new file mode 100644 index 0000000..3beee9f --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/artifacts/platform-what-if.ps1 @@ -0,0 +1,42 @@ +[CmdletBinding()] +param( + [Parameter(Mandatory)] + [ValidateSet('dev', 'test', 'prod')] + [string] $Environment, + + [string] $Location = 'northeurope', + + [string] $SubscriptionId +) + +$ErrorActionPreference = 'Stop' +$repoRoot = Split-Path -Parent $PSScriptRoot +$parameterFile = Join-Path $repoRoot "infra/parameters/$Environment.bicepparam" + +if (-not (Get-Command az -ErrorAction SilentlyContinue)) { + throw 'Azure CLI is required.' +} + +if ($SubscriptionId) { + az account set --subscription $SubscriptionId + if ($LASTEXITCODE -ne 0) { + throw 'Unable to select Azure subscription.' + } +} + +az account show --output none +if ($LASTEXITCODE -ne 0) { + throw 'Authenticate with Azure CLI before running what-if.' +} + +az deployment sub what-if ` + --name "cas-platform-$Environment-what-if" ` + --location $Location ` + --template-file (Join-Path $repoRoot 'infra/main.bicep') ` + --parameters $parameterFile ` + --no-pretty-print + +if ($LASTEXITCODE -ne 0) { + throw 'Azure what-if failed.' +} + diff --git a/evidence/verified-local-golden-path-v0.1/artifacts/source-provenance.json b/evidence/verified-local-golden-path-v0.1/artifacts/source-provenance.json new file mode 100644 index 0000000..68cfefa --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/artifacts/source-provenance.json @@ -0,0 +1,24 @@ +{ + "repositories": [ + { + "repository": "Coding-Autopilot-System/cas-reference-product", + "sha": "f09c271a4e6519693d7cee8f6120bf833ffb987f", + "uri": "https://github.com/Coding-Autopilot-System/cas-reference-product/tree/f09c271a4e6519693d7cee8f6120bf833ffb987f" + }, + { + "repository": "Coding-Autopilot-System/cas-contracts", + "sha": "7f924bfc95dce785442b6b8c02904a8e92fbb63a", + "uri": "https://github.com/Coding-Autopilot-System/cas-contracts/tree/7f924bfc95dce785442b6b8c02904a8e92fbb63a" + }, + { + "repository": "Coding-Autopilot-System/cas-evals", + "sha": "0e14f98384c561c81bea5ef1fb7dc1f7c1b4c105", + "uri": "https://github.com/Coding-Autopilot-System/cas-evals/tree/0e14f98384c561c81bea5ef1fb7dc1f7c1b4c105" + }, + { + "repository": "Coding-Autopilot-System/cas-platform", + "sha": "131135297a6c6fb4544becaa9f1d229615960d8d", + "uri": "https://github.com/Coding-Autopilot-System/cas-platform/tree/131135297a6c6fb4544becaa9f1d229615960d8d" + } + ] +} diff --git a/evidence/verified-local-golden-path-v0.1/bundle.json b/evidence/verified-local-golden-path-v0.1/bundle.json new file mode 100644 index 0000000..83c5768 --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/bundle.json @@ -0,0 +1,67 @@ +{ + "bundleVersion": "1.0.0", + "claim": "One deterministic cas-reference-product local HTTP golden path passed canonical contract and cas-evals verification.", + "deploymentStatus": "not-deployed", + "canonicalRecords": { + "artifactManifest": "artifact-manifest.json", + "verificationResult": "verification-result.json" + }, + "sourceProvenance": { + "path": "artifacts/source-provenance.json", + "sha256": "d621003fea89b9e0a6af8ce87c1e6f60e42ad9a6df5f775a778dcccbafc6aa18" + }, + "contractRegistry": { + "path": "artifacts/contract-registry-manifest.json", + "sourceUri": "https://coding-autopilot-system.github.io/cas-contracts/releases/v0.1.0/manifest.json", + "sha256": "92a09b0907e08b022f96b89ce09764b0416d268acc38653e08fd335b5e5198f9" + }, + "evaluation": { + "path": "artifacts/eval-evidence.json", + "fixturePath": "artifacts/golden-fixture.json", + "sourceUri": "https://github.com/Coding-Autopilot-System/cas-evals/tree/0e14f98384c561c81bea5ef1fb7dc1f7c1b4c105", + "sha256": "6d95d47fbdf422ff8fc3608b53b283e99b3f74b135d3876c7a1a4557dde2a8e7" + }, + "platformWhatIf": { + "path": "artifacts/platform-what-if.ps1", + "sourceUri": "https://github.com/Coding-Autopilot-System/cas-platform/blob/131135297a6c6fb4544becaa9f1d229615960d8d/scripts/what-if.ps1", + "sha256": "d6c57d4fb80aa168025368134ff8022f3a8328a92936503518e460aef03907ad", + "deploymentClaim": "not-deployed", + "evidenceScope": "Pinned non-deploying what-if command definition only; no Azure what-if output is claimed." + }, + "containerImage": { + "status": "unavailable", + "reason": "CI builds but does not publish a reproducible registry image; a local image ID is not an immutable cross-repository digest." + }, + "artifacts": [ + { + "kind": "source-provenance", + "uri": "urn:cas-evidence:verified-local-golden-path-v0.1:source-provenance", + "path": "artifacts/source-provenance.json", + "sha256": "d621003fea89b9e0a6af8ce87c1e6f60e42ad9a6df5f775a778dcccbafc6aa18" + }, + { + "kind": "contract-registry-manifest", + "uri": "urn:cas-evidence:verified-local-golden-path-v0.1:contract-registry-manifest", + "path": "artifacts/contract-registry-manifest.json", + "sha256": "92a09b0907e08b022f96b89ce09764b0416d268acc38653e08fd335b5e5198f9" + }, + { + "kind": "evaluation-result", + "uri": "urn:cas-evidence:verified-local-golden-path-v0.1:eval-evidence", + "path": "artifacts/eval-evidence.json", + "sha256": "6d95d47fbdf422ff8fc3608b53b283e99b3f74b135d3876c7a1a4557dde2a8e7" + }, + { + "kind": "evaluation-fixture", + "uri": "urn:cas-evidence:verified-local-golden-path-v0.1:golden-fixture", + "path": "artifacts/golden-fixture.json", + "sha256": "667fc47e19c3910dcc202063d8167f1acf6417e4cb35b705f8862a2886c5188a" + }, + { + "kind": "platform-what-if-reference", + "uri": "urn:cas-evidence:verified-local-golden-path-v0.1:platform-what-if-reference", + "path": "artifacts/platform-what-if.ps1", + "sha256": "d6c57d4fb80aa168025368134ff8022f3a8328a92936503518e460aef03907ad" + } + ] +} diff --git a/evidence/verified-local-golden-path-v0.1/verification-result.json b/evidence/verified-local-golden-path-v0.1/verification-result.json new file mode 100644 index 0000000..b7a2beb --- /dev/null +++ b/evidence/verified-local-golden-path-v0.1/verification-result.json @@ -0,0 +1,43 @@ +{ + "kind": "VerificationResult", + "correlationId": "cas-reference-product-golden-v0.1", + "promptId": "reference-product-golden-workflow", + "runId": "cas-reference-product-golden-v0.1", + "repo": "Coding-Autopilot-System/cas-reference-product", + "actor": { + "id": "evidence-bundle-ci", + "type": "workflow" + }, + "timestamp": "2026-06-13T00:00:01Z", + "schemaVersion": "0.1.0", + "traceContext": { + "traceparent": "00-0f32a2f22b960e873c92cecabc6f8dea-afb7553009ebcb9d-01" + }, + "outcome": "passed", + "checks": [ + { + "name": "immutable-source-shas", + "outcome": "passed", + "evidenceUri": "urn:cas-evidence:verified-local-golden-path-v0.1:source-provenance" + }, + { + "name": "contract-registry-manifest-digest", + "outcome": "passed", + "evidenceUri": "urn:cas-evidence:verified-local-golden-path-v0.1:contract-registry-manifest" + }, + { + "name": "local-http-golden-path-evaluation", + "outcome": "passed", + "evidenceUri": "urn:cas-evidence:verified-local-golden-path-v0.1:eval-evidence" + }, + { + "name": "platform-what-if-reference-no-deployment", + "outcome": "passed", + "evidenceUri": "urn:cas-evidence:verified-local-golden-path-v0.1:platform-what-if-reference" + }, + { + "name": "reproducible-container-registry-digest", + "outcome": "skipped" + } + ] +} diff --git a/pyproject.toml b/pyproject.toml index c315082..e4ed256 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,8 @@ dev = [ "mypy>=1.14.0", "pytest>=8.3.0", "pytest-cov>=6.0.0", - "ruff>=0.9.0" + "ruff>=0.9.0", + "types-jsonschema>=4.23.0" ] [tool.hatch.build.targets.wheel] diff --git a/scripts/validate.ps1 b/scripts/validate.ps1 index 8d3e9c1..ead45af 100644 --- a/scripts/validate.ps1 +++ b/scripts/validate.ps1 @@ -16,4 +16,5 @@ Invoke-Checked { & .\.venv\Scripts\python.exe -m pip install --disable-pip-versi Invoke-Checked { & .\.venv\Scripts\python.exe -m ruff check . } Invoke-Checked { & .\.venv\Scripts\python.exe -m mypy } Invoke-Checked { & .\.venv\Scripts\python.exe -m pytest } +Invoke-Checked { & .\.venv\Scripts\python.exe -m cas_reference_product.evidence } Invoke-Checked { git -c safe.directory="$PWD" diff --check } diff --git a/scripts/verify-evidence.ps1 b/scripts/verify-evidence.ps1 new file mode 100644 index 0000000..fdb5d80 --- /dev/null +++ b/scripts/verify-evidence.ps1 @@ -0,0 +1,10 @@ +$ErrorActionPreference = 'Stop' + +if (-not (Test-Path '.venv\Scripts\python.exe')) { + throw 'Run ./scripts/validate.ps1 first to create the development environment.' +} + +& .\.venv\Scripts\python.exe -m cas_reference_product.evidence +if ($LASTEXITCODE -ne 0) { + throw "Evidence verification failed with exit code $LASTEXITCODE" +} diff --git a/src/cas_reference_product/evidence.py b/src/cas_reference_product/evidence.py new file mode 100644 index 0000000..4b3a98e --- /dev/null +++ b/src/cas_reference_product/evidence.py @@ -0,0 +1,158 @@ +"""Offline verification for committed immutable evidence bundles.""" + +from __future__ import annotations + +import hashlib +import json +import re +import sys +from pathlib import Path +from typing import Any + +from jsonschema import Draft202012Validator, FormatChecker +from referencing import Registry, Resource + +ROOT = Path(__file__).parents[2] +DEFAULT_BUNDLE = ROOT / "evidence" / "verified-local-golden-path-v0.1" +SHA256_PATTERN = re.compile(r"^[0-9a-f]{64}$") +GIT_SHA_PATTERN = re.compile(r"^[0-9a-f]{40}$") + + +class EvidenceVerificationError(ValueError): + """Raised when committed evidence does not match its immutable claims.""" + + +def _load_json(path: Path) -> dict[str, Any]: + payload = json.loads(path.read_text(encoding="utf-8")) + if not isinstance(payload, dict): + raise EvidenceVerificationError(f"{path} must contain a JSON object") + return payload + + +def _sha256(path: Path) -> str: + return hashlib.sha256(path.read_bytes()).hexdigest() + + +def _contract_registry(contract_root: Path) -> Registry[Any]: + resources = [] + for path in contract_root.glob("*.schema.json"): + schema = _load_json(path) + resources.append((schema["$id"], Resource.from_contents(schema))) + return Registry().with_resources(resources) + + +def _validate_contract(contract_root: Path, schema_name: str, instance: dict[str, Any]) -> None: + schema = _load_json(contract_root / schema_name) + validator = Draft202012Validator( + schema, + registry=_contract_registry(contract_root), + format_checker=FormatChecker(), + ) + errors = sorted(validator.iter_errors(instance), key=lambda error: list(error.path)) + if errors: + raise EvidenceVerificationError(f"{schema_name}: {errors[0].message}") + + +def verify_bundle(bundle_root: Path = DEFAULT_BUNDLE) -> None: + """Verify every committed digest, source pin, canonical record, and claim boundary.""" + descriptor = _load_json(bundle_root / "bundle.json") + artifact_manifest = _load_json(bundle_root / "artifact-manifest.json") + verification_result = _load_json(bundle_root / "verification-result.json") + contract_root = bundle_root / "artifacts" / "contracts" + + registry_manifest = _load_json(bundle_root / descriptor["contractRegistry"]["path"]) + for schema_entry in registry_manifest["schemas"]: + schema_path = contract_root / schema_entry["path"] + if _sha256(schema_path) != schema_entry["sha256"]: + raise EvidenceVerificationError(f"{schema_entry['path']} registry digest mismatch") + + _validate_contract(contract_root, "artifact-manifest.schema.json", artifact_manifest) + _validate_contract(contract_root, "verification-result.schema.json", verification_result) + + artifacts = descriptor.get("artifacts") + if not isinstance(artifacts, list) or not artifacts: + raise EvidenceVerificationError("bundle artifacts must be a non-empty list") + + descriptor_digests: dict[str, str] = {} + for artifact in artifacts: + if not isinstance(artifact, dict): + raise EvidenceVerificationError("bundle artifact entries must be objects") + relative_path = artifact.get("path") + expected = artifact.get("sha256") + if not isinstance(relative_path, str) or not isinstance(expected, str): + raise EvidenceVerificationError("artifact path and sha256 must be strings") + if not SHA256_PATTERN.fullmatch(expected): + raise EvidenceVerificationError(f"{relative_path} has an invalid SHA-256 digest") + path = (bundle_root / relative_path).resolve() + if not path.is_relative_to(bundle_root.resolve()): + raise EvidenceVerificationError(f"{relative_path} escapes the bundle root") + if _sha256(path) != expected: + raise EvidenceVerificationError(f"{relative_path} digest mismatch") + descriptor_digests[artifact["uri"]] = expected + + manifest_digests = { + artifact["uri"]: artifact.get("sha256") for artifact in artifact_manifest["artifacts"] + } + if descriptor_digests != manifest_digests: + raise EvidenceVerificationError("ArtifactManifest does not match bundle artifacts") + + for section_name in ("sourceProvenance", "contractRegistry", "evaluation", "platformWhatIf"): + section = descriptor[section_name] + path = bundle_root / section["path"] + if section["sha256"] != _sha256(path): + raise EvidenceVerificationError(f"{section_name} descriptor digest mismatch") + + provenance = _load_json(bundle_root / descriptor["sourceProvenance"]["path"]) + for source in provenance["repositories"]: + sha = source["sha"] + if not GIT_SHA_PATTERN.fullmatch(sha) or sha not in source["uri"]: + raise EvidenceVerificationError(f"invalid immutable source reference: {source}") + + evaluation = _load_json(bundle_root / descriptor["evaluation"]["path"]) + if evaluation.get("summary") != {"failed": 0, "passed": 1, "total": 1}: + raise EvidenceVerificationError("golden path evaluation did not pass exactly one case") + if evaluation.get("suiteId") != "cas-reference-product-golden-v0.1": + raise EvidenceVerificationError("unexpected golden path evaluation suite") + for result in evaluation["results"]: + _validate_contract(contract_root, "evaluation-result.schema.json", result) + + fixture = _load_json(bundle_root / descriptor["evaluation"]["fixturePath"]) + case = fixture["cases"][0] + canonical_case = json.dumps(case, sort_keys=True, separators=(",", ":")).encode("utf-8") + eval_evidence = evaluation["evidence"][0] + if eval_evidence["fixtureDigest"] != f"sha256:{hashlib.sha256(canonical_case).hexdigest()}": + raise EvidenceVerificationError("evaluation fixture digest mismatch") + response = case["response"].encode("utf-8") + response_digest = f"sha256:{hashlib.sha256(response).hexdigest()}" + if eval_evidence["execution"]["responseDigest"] != response_digest: + raise EvidenceVerificationError("evaluation response digest mismatch") + + platform = descriptor["platformWhatIf"] + if platform.get("deploymentClaim") != "not-deployed": + raise EvidenceVerificationError("platform what-if evidence must not claim deployment") + + container = descriptor["containerImage"] + if container.get("status") == "available": + digest = container.get("digest", "") + if not re.fullmatch(r"sha256:[0-9a-f]{64}", digest): + raise EvidenceVerificationError("available container image requires a valid digest") + elif "digest" in container: + raise EvidenceVerificationError("unavailable container image must not claim a digest") + + if verification_result["outcome"] != "passed": + raise EvidenceVerificationError("canonical VerificationResult must pass") + + +def main() -> int: + bundle_root = Path(sys.argv[1]) if len(sys.argv) > 1 else DEFAULT_BUNDLE + try: + verify_bundle(bundle_root) + except (EvidenceVerificationError, FileNotFoundError, KeyError, json.JSONDecodeError) as error: + print(f"evidence verification failed: {error}", file=sys.stderr) + return 1 + print(f"verified immutable evidence bundle: {bundle_root}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_evidence.py b/tests/test_evidence.py new file mode 100644 index 0000000..7f6e3e3 --- /dev/null +++ b/tests/test_evidence.py @@ -0,0 +1,126 @@ +import hashlib +import json +from pathlib import Path + +import pytest + +from cas_reference_product.evidence import DEFAULT_BUNDLE, EvidenceVerificationError, verify_bundle + + +def copy_bundle(tmp_path: Path) -> Path: + bundle = tmp_path / "bundle" + bundle.mkdir() + for source in DEFAULT_BUNDLE.rglob("*"): + if source.is_file(): + destination = bundle / source.relative_to(DEFAULT_BUNDLE) + destination.parent.mkdir(parents=True, exist_ok=True) + destination.write_bytes(source.read_bytes()) + return bundle + + +def write_json(path: Path, payload: dict[str, object]) -> None: + path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") + + +def test_committed_immutable_evidence_bundle_verifies() -> None: + verify_bundle() + + +def test_changed_artifact_fails_digest_verification(tmp_path: Path) -> None: + bundle = copy_bundle(tmp_path) + + descriptor = json.loads((bundle / "bundle.json").read_text(encoding="utf-8")) + changed = bundle / descriptor["evaluation"]["path"] + changed.write_text("{}\n", encoding="utf-8") + + with pytest.raises(EvidenceVerificationError, match="digest mismatch"): + verify_bundle(bundle) + + +def test_descriptor_digest_drift_fails_verification(tmp_path: Path) -> None: + bundle = copy_bundle(tmp_path) + descriptor_path = bundle / "bundle.json" + descriptor = json.loads(descriptor_path.read_text(encoding="utf-8")) + descriptor["sourceProvenance"]["sha256"] = "0" * 64 + write_json(descriptor_path, descriptor) + + with pytest.raises( + EvidenceVerificationError, + match="sourceProvenance descriptor digest mismatch", + ): + verify_bundle(bundle) + + +def test_platform_evidence_cannot_claim_deployment(tmp_path: Path) -> None: + bundle = copy_bundle(tmp_path) + descriptor_path = bundle / "bundle.json" + descriptor = json.loads(descriptor_path.read_text(encoding="utf-8")) + descriptor["platformWhatIf"]["deploymentClaim"] = "deployed" + write_json(descriptor_path, descriptor) + + with pytest.raises(EvidenceVerificationError, match="must not claim deployment"): + verify_bundle(bundle) + + +def test_unavailable_container_cannot_claim_digest(tmp_path: Path) -> None: + bundle = copy_bundle(tmp_path) + descriptor_path = bundle / "bundle.json" + descriptor = json.loads(descriptor_path.read_text(encoding="utf-8")) + descriptor["containerImage"]["digest"] = f"sha256:{'0' * 64}" + write_json(descriptor_path, descriptor) + + with pytest.raises(EvidenceVerificationError, match="must not claim a digest"): + verify_bundle(bundle) + + +def test_canonical_manifest_must_match_descriptor(tmp_path: Path) -> None: + bundle = copy_bundle(tmp_path) + manifest_path = bundle / "artifact-manifest.json" + manifest = json.loads(manifest_path.read_text(encoding="utf-8")) + manifest["artifacts"].pop() + write_json(manifest_path, manifest) + + with pytest.raises(EvidenceVerificationError, match="ArtifactManifest does not match"): + verify_bundle(bundle) + + +def test_published_contract_registry_digest_is_mandatory(tmp_path: Path) -> None: + bundle = copy_bundle(tmp_path) + contract = bundle / "artifacts" / "contracts" / "artifact-manifest.schema.json" + contract.write_text("{}\n", encoding="utf-8") + + with pytest.raises(EvidenceVerificationError, match="registry digest mismatch"): + verify_bundle(bundle) + + +def test_canonical_verification_result_schema_is_mandatory(tmp_path: Path) -> None: + bundle = copy_bundle(tmp_path) + result_path = bundle / "verification-result.json" + result = json.loads(result_path.read_text(encoding="utf-8")) + result["outcome"] = "unverified" + write_json(result_path, result) + + with pytest.raises(EvidenceVerificationError, match="verification-result.schema.json"): + verify_bundle(bundle) + + +def test_evaluation_response_digest_is_mandatory(tmp_path: Path) -> None: + bundle = copy_bundle(tmp_path) + fixture_path = bundle / "artifacts" / "golden-fixture.json" + fixture = json.loads(fixture_path.read_text(encoding="utf-8")) + fixture["cases"][0]["response"] = "changed" + write_json(fixture_path, fixture) + + descriptor_path = bundle / "bundle.json" + descriptor = json.loads(descriptor_path.read_text(encoding="utf-8")) + changed_digest = hashlib.sha256(fixture_path.read_bytes()).hexdigest() + descriptor["artifacts"][3]["sha256"] = changed_digest + write_json(descriptor_path, descriptor) + + manifest_path = bundle / "artifact-manifest.json" + manifest = json.loads(manifest_path.read_text(encoding="utf-8")) + manifest["artifacts"][3]["sha256"] = changed_digest + write_json(manifest_path, manifest) + + with pytest.raises(EvidenceVerificationError, match="evaluation fixture digest mismatch"): + verify_bundle(bundle)