diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 1e9b0a4bd..0b3f65d25 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -19,6 +19,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 15 permissions: + actions: read security-events: write contents: read diff --git a/.github/workflows/e2e-tests-full.yml b/.github/workflows/e2e-tests-full.yml index 50528a8b3..8c95c1015 100644 --- a/.github/workflows/e2e-tests-full.yml +++ b/.github/workflows/e2e-tests-full.yml @@ -30,7 +30,7 @@ jobs: steps: - uses: actions/checkout@v6 with: - ref: ${{ github.event_name == 'workflow_dispatch' && github.ref || 'main' }} + ref: main - uses: actions/setup-node@v6 with: node-version: '20.x' @@ -49,7 +49,7 @@ jobs: id: aws run: echo "account_id=$(aws sts get-caller-identity --query Account --output text)" >> "$GITHUB_OUTPUT" - name: Get API keys from Secrets Manager - uses: aws-actions/aws-secretsmanager-get-secrets@v3 + uses: aws-actions/aws-secretsmanager-get-secrets@v2 with: secret-ids: | E2E,${{ secrets.E2E_SECRET_ARN }} diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index 025b7b600..eb93e98c4 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -73,7 +73,7 @@ jobs: id: aws run: echo "account_id=$(aws sts get-caller-identity --query Account --output text)" >> "$GITHUB_OUTPUT" - name: Get API keys from Secrets Manager - uses: aws-actions/aws-secretsmanager-get-secrets@v3 + uses: aws-actions/aws-secretsmanager-get-secrets@v2 with: secret-ids: | E2E,${{ secrets.E2E_SECRET_ARN }} diff --git a/.github/workflows/slack-issue-notification.yml b/.github/workflows/slack-issue-notification.yml index 4f2b76597..1d3bbc4ee 100644 --- a/.github/workflows/slack-issue-notification.yml +++ b/.github/workflows/slack-issue-notification.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Send issue details to Slack - uses: slackapi/slack-github-action@v3.0.1 + uses: slackapi/slack-github-action@v2.1.1 with: webhook: ${{ secrets.SLACK_WEBHOOK_URL }} webhook-type: webhook-trigger diff --git a/.github/workflows/slack-open-prs-notification.yml b/.github/workflows/slack-open-prs-notification.yml index 51ec8078b..82330527b 100644 --- a/.github/workflows/slack-open-prs-notification.yml +++ b/.github/workflows/slack-open-prs-notification.yml @@ -40,7 +40,7 @@ jobs: ); - name: Send open PRs summary to Slack - uses: slackapi/slack-github-action@v3.0.1 + uses: slackapi/slack-github-action@v2.1.1 with: webhook: ${{ secrets.SLACK_OPEN_PRS_WEBHOOK_URL }} webhook-type: webhook-trigger diff --git a/.github/workflows/sync-from-public.yml b/.github/workflows/sync-from-public.yml new file mode 100644 index 000000000..94e279079 --- /dev/null +++ b/.github/workflows/sync-from-public.yml @@ -0,0 +1,104 @@ +name: Sync from Public Repo + +on: + schedule: + - cron: '0 */6 * * *' # Every 6 hours + workflow_dispatch: # Manual trigger via Actions tab + +permissions: + contents: write + pull-requests: write + +jobs: + sync: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Configure Git + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + - name: Fetch public main + run: | + git remote add public https://github.com/aws/agentcore-cli.git + git fetch public main + + - name: Sync main with public/main + run: | + git checkout main + git reset --hard origin/main + + # Check if public/main is already merged + if git merge-base --is-ancestor public/main HEAD; then + echo "✅ main is already up to date with public/main" + exit 0 + fi + + # Merge but exclude .github/workflows/ (GITHUB_TOKEN lacks workflow permission) + if git merge public/main --no-commit --no-ff; then + git checkout HEAD -- .github/workflows/ 2>/dev/null || true + git commit -m "chore: sync main with public/main" + git push origin main + echo "✅ main synced successfully" + else + echo "⚠️ Conflict detected in main" + + # Capture conflicted files before aborting + conflicted_files=$(git diff --name-only --diff-filter=U 2>/dev/null || echo "Unable to determine conflicted files") + git merge --abort + + # Check if a sync PR already exists + existing_pr=$(gh pr list --base "main" --search "Merge public/main" --state open --json number --jq '.[0].number' 2>/dev/null || echo "") + + if [ -n "$existing_pr" ]; then + echo "ℹ️ PR #$existing_pr already exists, skipping" + exit 0 + fi + + conflict_branch="sync-conflict-main-$(date +%Y%m%d-%H%M%S)" + git checkout -b "$conflict_branch" + + git merge public/main --no-commit --no-ff || true + git checkout HEAD -- .github/workflows/ 2>/dev/null || true + git add -A + git commit -m "chore: sync main with public/main (conflicts present) + + This automated sync detected merge conflicts that require manual resolution. + + Source: public/main (https://github.com/aws/agentcore-cli) + Target: main + + Please resolve conflicts and merge this PR." || true + + git push origin "$conflict_branch" + + gh pr create \ + --title "🔀 [Sync Conflict] Merge public/main → main" \ + --body "## Automated Sync Conflict + + This PR was automatically created because merging \`public/main\` into \`main\` encountered conflicts. + + **Source:** \`main\` from [aws/agentcore-cli](https://github.com/aws/agentcore-cli) + **Target:** \`main\` + + ### Action Required + 1. \`git fetch origin && git checkout $conflict_branch\` + 2. Resolve merge conflicts + 3. \`git add . && git commit\` + 4. \`git push origin $conflict_branch\` + 5. Merge this PR + + ### Files with Conflicts + \`\`\` + $conflicted_files + \`\`\`" \ + --base "main" \ + --head "$conflict_branch" || echo "⚠️ Failed to create PR" + fi + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/CHANGELOG.md b/CHANGELOG.md index ef9b03949..dc46b34ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,102 @@ All notable changes to this project will be documented in this file. +## [1.0.0-preview.1.0] - 2026-04-22 + +### Added +- feat: upgrade @aws/agentinspector to 0.2.0 (#919) (35ec0c1) +- feat: remove inline_function tool approval — all tools execute automatically (#119) (06deb79) +- feat: make update command aware of preview dist-tag (#118) (b8691c3) +- feat: add Custom JWT inbound auth support for harnesses (#109) (54b3f4e) +- feat: skip deploy when project config unchanged (#111) (fc764a6) +- feat: add --with-invoke-script and tools in TUI wizard (#105) (25466d3) +- feat: auto-deploy resources before dev server starts (#101) (202b7e9) +- feat: add session storage support for harnesses (#102) (c1d97f8) +- feat: add tool management commands for harnesses (#100) (26ea7b1) +- feat: add --harness support to traces and logs commands (#98) (765f9d5) +- feat: display harnesses in status screen (#96) (7a5305c) +- feat: add session filesystem storage support (#893) (b97e337) +- feat: add container support to harness create/add (#93) (10d8dd8) +- feat: add harness validation to validate command (#91) (08e4f2f) +- feat: add harness invoke support to CLI and TUI (#89) (c87c8af) +- feat: harness create/add with deploy and bug fixes (#90) (e77dc2d) +- feat: harness deploy support (#87) (cf762bd) +- feat: add shared SigV4 API client and Harness HTTP operations (#84) (89fed85) +- feat: add sync workflow (d15ce2e) + +### Fixed +- fix: update asset snapshots and fix useDevDeploy skip test (#921) (1bde086) +- fix: use nullish coalescing operator in CreateScreen (#918) (e850c9e) +- fix: revert vended CDK dep to ^0.1.0-alpha.19 (#910) (34b41e7) +- fix: show tools in harness review screen and wire up invoke --tools (#127) (7889e84) +- fix: align harness networkConfiguration with Smithy model (#126) (5f6f98c) +- fix: align harness model config with Smithy API and add invoke overrides (#122) (34b7639) +- fix: harness bug bash fixes — status duplicate, provider names, session storage, fetch access (#123) (3115e02) +- fix: avoid double JSON parse in harness invocation, extract parseToolResponseRequest (#125) (969cfe6) +- fix: add deploy hash check to CLI deploy path (#120) (4fe3ab7) +- fix: harness add bugs - no deploy, project name prefix, interactive mode (#116) (1c74d43) +- fix: auto-populate aws-targets.json on dev deploy (#117) (df03fe5) +- fix: harness TUI copy and usability improvements (#113) (ddfbfdb) +- fix: add harness to TUI remove flow and CLI status output (#112) (5d625a5) +- fix: TUI deploy not persisting deployed state (#110) (c2e206e) +- fix: add default memory strategies to harness memory creation (#108) (d9d1f81) +- fix: harness deploy bugfixes and model update (#99) (47882d0) +- fix: propagate sessionId as A2A contextId in Inspector proxy (#892) (08d452e) +- fix: make harnesses field optional in AgentCoreProjectSpec (#86) (62c432c) +- fix: codeql permissions (05258f3) +- fix: only sync to main branch (2acb841) +- fix: formatting (7d35986) + +### Documentation +- docs: add harness documentation and fix inline_function help text (#124) (1a1de9b) + +### Other Changes +- chore: bump version to 1.0.0-preview.0 and pin agentcore-cdk to alpha.20 (#128) (2c42299) +- Move Memory out of Advanced settings in harness wizard (#121) (5917220) +- removing bad files (971004c) +- fix(tui): polish harness wizard ordering, help text, and summary (#115) (dd2c8b1) +- avi harness progress (#114) (aa4e8b9) +- Merge origin/feat/harness-implementation and fix eslint errors (2ea3b6b) +- Merge remote-tracking branch 'origin/main' into feat/harness-implementation (5046f61) +- chore: sync main with public/main (7e836e4) +- fix(invoke): pass session ID to local invoke log files (#894) (e966cb6) +- chore(schema): regenerate JSON schema with harnesses registry (902c95c) +- feat(schema): add harnesses[] registry to AgentCoreProjectSpec (9eab540) +- chore: sync main with public/main (49932f2) +- feat(schema): add Harness primitive schema definitions (#85) (9151a14) +- chore: sync main with public/main (c5e80ca) +- chore: sync main with public/main (d3ad7f8) +- chore: sync main with public/main (874814a) +- chore: sync main with public/main (7877ead) +- chore: sync main with public/main (eeb444c) +- chore: sync main with public/main (ff86dc1) +- chore: sync main with public/main (50b389a) +- chore: sync main with public/main (b1f9711) +- chore: sync main with public/main (5d4bd38) +- chore: sync main with public/main (e9ab06e) +- chore: sync main with public/main (08872f5) +- chore: sync main with public/main v0.8.0 (63e05fd) +- chore: sync main with public/main (493e1f9) +- chore: sync main with public/main (5af06f5) +- chore: sync main with public/main (832a5c3) +- chore: sync main with public/main (584fd0e) +- fix(ci): exclude .github/workflows/ from public repo sync (#54) (37a0ff8) +- Merge remote-tracking branch 'public/main' (d3f6c81) +- chore: sync main with public/main (c2121ec) +- chore: sync main with public/main (d0c495f) +- chore: sync main with public/main (93d7bbc) +- chore: sync main with public/main (cfd1cdb) +- Merge remote-tracking branch 'origin/main' (ee71ff3) +- chore: sync main with public/main (4c2c674) +- chore: sync main with public/main (1d64fd8) +- chore: sync main with public/main (11ec86e) +- chore: sync main with public/main (6054e95) +- chore: sync main with public/main (cc83d81) +- chore: sync main with public/main (1a361af) +- Merge remote-tracking branch 'public/main' (c2bef91) +- chore: sync main with public/main (c6099d4) +- Merge pull request #1 from aws/aalpert/workflow (216140f) + ## [0.9.1] - 2026-04-17 ## [0.9.0] - 2026-04-17 diff --git a/create-bundle.mjs b/create-bundle.mjs new file mode 100644 index 000000000..89bf1d221 --- /dev/null +++ b/create-bundle.mjs @@ -0,0 +1,53 @@ +import { Sha256 } from '@aws-crypto/sha256-js'; +import { defaultProvider } from '@aws-sdk/credential-provider-node'; +import { SignatureV4 } from '@aws-sdk/signature-v4'; +import { randomUUID } from 'crypto'; + +const region = 'us-east-1'; +const endpoint = `https://gamma.${region}.elcapcp.genesis-primitives.aws.dev`; + +const body = JSON.stringify({ + bundleName: 'test_rec_bundle', + description: 'Test bundle for recommendation', + clientToken: randomUUID(), + components: { + ['arn:aws:bedrock-agentcore:us-east-1:998846730471:runtime/myproject_MyAgent-QMd093Gl4O']: { + configuration: { + system_prompt: 'You are a helpful assistant that helps users.', + modelId: 'anthropic.claude-sonnet-4-20250514', + }, + }, + }, + branchName: 'mainline', + commitMessage: 'Initial version for rec test', +}); + +const signer = new SignatureV4({ + credentials: defaultProvider(), + region, + service: 'bedrock-agentcore-control', + sha256: Sha256, +}); + +const url = new URL('/configuration-bundles/create', endpoint); +const request = { + method: 'POST', + hostname: url.hostname, + path: url.pathname, + headers: { + 'content-type': 'application/json', + host: url.hostname, + }, + body, +}; + +const signed = await signer.sign(request); +const resp = await fetch(`${endpoint}/configuration-bundles/create`, { + method: 'POST', + headers: signed.headers, + body, +}); + +const data = await resp.text(); +console.log(`Status: ${resp.status}`); +console.log(data); diff --git a/docs/evo-bug-bash.md b/docs/evo-bug-bash.md new file mode 100644 index 000000000..03f1c5655 --- /dev/null +++ b/docs/evo-bug-bash.md @@ -0,0 +1,430 @@ +# EVO Feature Bug Bash — `feat/evo-implementation` + +**Branch:** `feat/evo-implementation` **PRs merged:** #37, #46 (Config Bundles), #45 (Recommendations), #26 (Batch +Evaluation) **Total changes:** 92 files, ~9,000 lines added + +--- + +## Installation + +### Option A: Install from tarball (recommended for testers) + +Download the tarball (`aws-agentcore-0.5.1-evo-pb-bug-bash.tgz`) and install globally: + +```bash +npm install -g ./aws-agentcore-0.5.1-evo-pb-bug-bash.tgz +``` + +Verify installation: + +```bash +agentcore --version +# Expected: 0.5.1-evo-pb-bug-bash +``` + +To uninstall after testing: + +```bash +npm uninstall -g @aws/agentcore +``` + +### Option B: Install from tarball URL + +If the tarball is hosted (e.g. S3 presigned URL, internal artifact store): + +```bash +npm install -g https:///aws-agentcore-0.5.1-evo-pb-bug-bash.tgz +``` + +### Option C: Build from source + +```bash +git clone https://github.com/aws/private-agentcore-cli-staging.git +cd private-agentcore-cli-staging +git checkout feat/evo-implementation +npm install +npm run build +npm link +``` + +Verify: + +```bash +agentcore --version +# Expected: 0.5.1-evo-pb-bug-bash +``` + +To unlink after testing: + +```bash +npm unlink -g @aws/agentcore +``` + +--- + +## Prerequisites + +1. AWS credentials configured (SSO or environment variables) +2. An AgentCore project with at least one deployed agent (`agentcore deploy`) +3. Agent must have recent invocations (sessions/traces in CloudWatch) + +### Targeting Gamma us-east-1 + +All EVO APIs (batch eval, recommendations, config bundles) use **raw HTTP + SigV4** — not the AWS SDK. Endpoint +selection is controlled by `AGENTCORE_STAGE` env var: + +| `AGENTCORE_STAGE` | Data Plane (DP) | Control Plane (CP) | +| ----------------- | --------------------------------------------------- | --------------------------------------------------- | +| _(unset)_ | `bedrock-agentcore.{region}.amazonaws.com` | `bedrock-agentcore-control.{region}.amazonaws.com` | +| `gamma` | `gamma.{region}.elcapdp.genesis-primitives.aws.dev` | `gamma.{region}.elcapcp.genesis-primitives.aws.dev` | +| `beta` | `beta.{region}.elcapdp.genesis-primitives.aws.dev` | `beta.{region}.elcapcp.genesis-primitives.aws.dev` | + +**Set these before running any test:** + +```bash +export AGENTCORE_STAGE=gamma +export AWS_REGION=us-east-1 +``` + +Or pass `--region us-east-1` on individual commands. The `AGENTCORE_STAGE` env var **must** be set — there is no +`--stage` CLI flag. + +**Which APIs hit which plane:** + +- **Batch Evaluation** → DP (`elcapdp` / `bedrock-agentcore`) +- **Recommendations** → DP (`elcapdp` / `bedrock-agentcore`) +- **Config Bundles** → CP (`elcapcp` / `bedrock-agentcore-control`) + +--- + +## Feature 1: Config Bundles + +Config bundles let you version and track configuration changes for deployed agents. + +### New Commands + +| Command | Description | +| -------------------------------------------------------------------- | ------------------------------------------------ | +| `agentcore add config-bundle` | Add a config bundle to the project (TUI wizard) | +| `agentcore remove config-bundle` | Remove a config bundle from the project | +| `agentcore config-bundle versions --bundle ` | List version history | +| `agentcore config-bundle diff --bundle --from --to ` | Diff two versions | +| `agentcore deploy` | Deploys config bundles alongside other resources | + +### Test Flows + +#### 1.1 — Add config bundle via TUI + +- [ ] Run `agentcore` (interactive TUI) +- [ ] Navigate to **Add** → select **Config Bundle** +- [ ] Walk through wizard: name, description, components +- [ ] Verify `agentcore.json` now contains the config bundle entry +- [ ] Verify Esc/back navigation works at each step + +#### 1.2 — Add config bundle via CLI + +- [ ] Run `agentcore add config-bundle` with flags (if supported) +- [ ] Verify it writes to `agentcore.json` correctly + +#### 1.3 — Deploy config bundle + +- [ ] Run `agentcore deploy` +- [ ] Verify config bundle is created in AWS (check CloudFormation outputs or `agentcore status`) +- [ ] Verify deployed state tracks `configBundleId` and `configBundleArn` + +#### 1.4 — List version history + +- [ ] `agentcore config-bundle versions --bundle ` — verify versions are listed with branch, creator, timestamp +- [ ] `agentcore config-bundle versions --bundle --json` — verify JSON output +- [ ] `agentcore config-bundle versions --bundle --branch main` — verify branch filter +- [ ] `agentcore config-bundle versions --bundle --latest-per-branch` — verify filter +- [ ] `agentcore config-bundle versions --bundle --created-by user` — verify creator filter +- [ ] Test with a bundle that has no versions yet +- [ ] Test with an invalid bundle name (should show error) + +#### 1.5 — Diff two versions + +- [ ] `agentcore config-bundle diff --bundle --from --to ` — verify diff output shows changes +- [ ] `agentcore config-bundle diff --bundle --from --to --json` — verify JSON output +- [ ] Diff identical versions — should show "No differences found" +- [ ] Diff with invalid version ID — should show error + +#### 1.6 — Config bundle TUI hub + +- [ ] From TUI, navigate to **Config Bundle** command +- [ ] Verify hub shows options: Version History, Diff +- [ ] Walk through version history screen — verify versions display correctly +- [ ] Walk through diff screen — select two versions, verify diff renders +- [ ] Test Esc/back navigation at each screen + +#### 1.7 — Remove config bundle + +- [ ] `agentcore remove` → select Config Bundle → confirm removal +- [ ] Verify entry removed from `agentcore.json` +- [ ] Verify `agentcore deploy` cleans up the resource + +#### 1.8 — Alias + +- [ ] `agentcore cb versions --bundle ` — verify `cb` alias works + +--- + +## Feature 2: Recommendations + +Recommendations analyze agent traces and suggest optimized system prompts or tool descriptions. + +### New Commands + +| Command | Description | +| ----------------------------------- | ---------------------------------------------- | +| `agentcore run recommendation` | Run a recommendation (CLI) | +| `agentcore recommendations history` | View past recommendation runs (local) | +| TUI: **Recommendations** hub | Interactive wizard for running recommendations | + +### Test Flows + +#### 2.1 — Run recommendation via CLI (system prompt) + +- [ ] `````bash + agentcore run recommendation \ + --type system-prompt \ + --agent \ + --evaluator \ + --inline "You are a helpful assistant..." \ + --session-id + ``` + ```` + ````` +- [ ] Verify output shows recommendation ID, explanation, and recommended system prompt +- [ ] `--json` flag outputs valid JSON +- [ ] Results saved locally (check `.cli/recommendation-results/`) + +#### 2.2 — Run recommendation via CLI (tool description) + +- [ ] `````bash + agentcore run recommendation \ + --type tool-description \ + --agent \ + --evaluator \ + --inline "search:Find documents" \ + --tools "search:Find documents,calculator:Compute math" \ + --session-id + ``` + ```` + ````` +- [ ] Verify output shows tool-level recommendations + +#### 2.3 — Run recommendation with file input + +- [ ] Create a file with a system prompt +- [ ] `agentcore run recommendation --type system-prompt --agent --evaluator --prompt-file ./prompt.txt` +- [ ] Verify it reads from file correctly + +#### 2.4 — Run recommendation with CloudWatch traces + +- [ ] `````bash + agentcore run recommendation \ + --type system-prompt \ + --agent \ + --evaluator \ + --inline "You are helpful" \ + --lookback 7 + ``` + ```` + ````` +- [ ] Verify it discovers traces from CloudWatch (no `--session-id`) + +#### 2.5 — Run recommendation with spans file + +- [ ] Prepare a JSON spans file +- [ ] `agentcore run recommendation --type system-prompt --agent --evaluator --inline "..." --spans-file ./spans.json` +- [ ] Verify it uses inline spans instead of CloudWatch + +#### 2.6 — Recommendation TUI wizard + +- [ ] From TUI, navigate to **Recommendations** → **Run Recommendation** +- [ ] Step through wizard: + 1. Select type (System Prompt / Tool Description) + 2. Select agent + 3. Select evaluator(s) (multi-select) + 4. Choose input source (inline / file) + 5. Enter content + 6. (If tool-desc) Enter tools + 7. Choose trace source (CloudWatch / Session IDs) + 8. (If CloudWatch) Set lookback days + 9. (If Sessions) Multi-select discovered sessions + 10. Confirm and run +- [ ] Verify progress steps display (fetching spans → starting → polling → saving) +- [ ] Verify results screen shows recommendation ID, explanation, recommended content +- [ ] Verify "Run another recommendation" action works +- [ ] Verify Esc/back navigation works at each wizard step + +#### 2.7 — Recommendation TUI — tool description flow + +- [ ] Repeat 2.6 but select "Tool Description" type +- [ ] Verify CloudWatch trace source is disabled (only Sessions available) +- [ ] Verify tool input step appears +- [ ] Verify results show per-tool recommendations + +#### 2.8 — Recommendations history (CLI) + +- [ ] `agentcore recommendations history` — verify table output with date, type, agent, ID +- [ ] `agentcore recommendations history --json` — verify JSON output +- [ ] Run with no prior recommendations — should show helpful message + +#### 2.9 — Recommendations history (TUI) + +- [ ] From TUI, navigate to **Recommendations** → **History** +- [ ] Verify past runs are listed +- [ ] Verify navigation works + +#### 2.10 — Error handling + +- [ ] Run recommendation without `--agent` — should show error +- [ ] Run recommendation without `--evaluator` — should show error +- [ ] Run recommendation with invalid `--type` — should show error +- [ ] Run recommendation with non-existent agent — should show error +- [ ] Run with expired/invalid AWS credentials — should show credentials error + +--- + +## Feature 3: Batch Evaluation + +Batch evaluation runs evaluators against agent sessions in bulk via the DP API. + +### New Commands + +| Command | Description | +| ------------------------------------------- | ------------------------------- | +| `agentcore run batch-evaluation` | Run a batch evaluation (CLI) | +| `agentcore stop batch-evaluation --id ` | Stop a running batch evaluation | +| TUI: **Run** → **Batch Evaluation** | Interactive wizard | + +### Test Flows + +#### 3.1 — Run batch evaluation via CLI + +- [ ] `````bash + agentcore run batch-evaluation \ + --agent \ + --evaluator Builtin.Faithfulness \ + --evaluator Builtin.Helpfulness + ``` + ```` + ````` +- [ ] Verify progress messages print (starting → polling → fetching → saving) +- [ ] Verify output shows evaluator scores grouped by evaluator +- [ ] Results saved locally (check `.cli/eval-job-results/`) +- [ ] `--json` flag outputs valid JSON + +#### 3.2 — Run batch evaluation with options + +- [ ] `--name my-eval-run` — verify custom name appears in output +- [ ] `--region us-west-2` — verify region override works +- [ ] `--execution-role ` — verify role is passed (temporary flag) + +#### 3.3 — Stop batch evaluation via CLI + +- [ ] Start a batch evaluation +- [ ] While running, in another terminal: `agentcore stop batch-evaluation --id ` +- [ ] Verify success message with ID and status +- [ ] `--json` flag outputs valid JSON +- [ ] Test with invalid ID — should show error + +#### 3.4 — Batch evaluation TUI wizard + +- [ ] From TUI, navigate to **Run** → **Batch Evaluation** +- [ ] Step through wizard: + 1. Select agent + 2. Select evaluator(s) (multi-select) + 3. Choose session source (CloudWatch / Manual) + 4. (If CloudWatch) Set lookback days → multi-select discovered sessions + 5. (If Manual) Enter session IDs + 6. Enter run name (optional) + 7. Confirm and run +- [ ] Verify progress steps display with elapsed timer +- [ ] Verify results screen shows scores per evaluator +- [ ] Verify "Run another" action works +- [ ] Verify Esc/back at each step + +#### 3.5 — Batch evaluation TUI — CloudWatch session discovery + +- [ ] Select CloudWatch as session source +- [ ] Verify "Discovering sessions..." loading indicator +- [ ] Verify sessions appear with span counts and timestamps +- [ ] Select multiple sessions → confirm +- [ ] Test with agent that has no sessions — should show error message +- [ ] Test Esc during loading — should go back + +#### 3.6 — Error handling + +- [ ] Run without `--agent` — should show error (required option) +- [ ] Run without `--evaluator` — should show error (required option) +- [ ] Run with non-existent agent — should show resolution error +- [ ] Run with invalid evaluator ID — should show API error +- [ ] Run with expired AWS credentials — should show credentials error + +--- + +## Feature 4: Cross-Feature & General + +### 4.1 — TUI navigation + +- [ ] Open TUI (`agentcore`) — verify all new commands appear in the help/command list: + - `run` (now shows Batch Evaluation option) + - `recommendations` + - `config-bundle` + - `stop` +- [ ] Navigate to each new feature and back — no crashes +- [ ] Verify help text updates correctly per screen (multi-select hints, navigate hints, etc.) + +### 4.2 — Stage-aware endpoints + +- [ ] Set `AGENTCORE_STAGE=gamma` and run commands — verify they hit gamma endpoints +- [ ] Unset `AGENTCORE_STAGE` — verify prod endpoints are used +- [ ] Set `AGENTCORE_STAGE=beta` — verify beta endpoints and SigV4 service name + +### 4.3 — Existing features not broken + +- [ ] `agentcore run eval` — still works as before +- [ ] `agentcore evals history` — still works +- [ ] `agentcore pause online-eval` / `agentcore resume online-eval` — still work +- [ ] `agentcore deploy` — deploys correctly with config bundles in project +- [ ] `agentcore status` — shows config bundle status alongside other resources +- [ ] `agentcore add agent` / `agentcore remove agent` — unchanged behavior + +### 4.4 — Local result storage + +- [ ] After running batch eval, check `.cli/eval-job-results/` for saved JSON +- [ ] After running recommendation, check `.cli/recommendation-results/` for saved JSON +- [ ] Verify files contain complete result data + +### 4.5 — Help output + +- [ ] `agentcore run --help` — shows `eval`, `batch-evaluation`, `recommendation` subcommands +- [ ] `agentcore stop --help` — shows `batch-evaluation` subcommand +- [ ] `agentcore config-bundle --help` — shows `versions`, `diff` subcommands +- [ ] `agentcore recommendations --help` — shows `history` subcommand +- [ ] `agentcore --help` — shows all new top-level commands + +--- + +## Known Issues / Limitations + +1. **Batch eval**: No aggregated scores persisted — averages computed on-the-fly +2. **Batch eval**: Fixed 5s poll interval, not configurable +3. **Batch eval**: No retry on transient poll failures +4. **Batch eval**: TUI results show evaluator-level averages only, no per-session drill-down +5. **Recommendations**: Tool description type with CloudWatch trace source is not supported (sessions only) +6. **Recommendations**: `--execution-role` is temporary and may be removed +7. **Config bundles**: Edit command removed — users should edit `agentcore.json` directly + +--- + +## Environment Info + +- **Account:** **\*\***\_\_\_**\*\*** +- **Region:** **\*\***\_\_\_**\*\*** +- **CLI version:** **\*\***\_\_\_**\*\*** +- **Date:** **\*\***\_\_\_**\*\*** +- **Tester:** **\*\***\_\_\_**\*\*** diff --git a/docs/evo-cli-equivalents.md b/docs/evo-cli-equivalents.md new file mode 100644 index 000000000..5373623b1 --- /dev/null +++ b/docs/evo-cli-equivalents.md @@ -0,0 +1,467 @@ +# EVO Private Beta — AgentCore CLI Equivalents + +This document maps each API operation from the EVO Private Beta Getting Started Guide to its AgentCore CLI equivalent. + +> **Note:** The CLI auto-detects region, resolves runtime IDs and log groups from your deployed project state, generates +> client tokens, and polls async operations to completion. Most commands also have an interactive TUI wizard that +> launches when required options are omitted. + +--- + +## 3. Evaluations (Batch) + +### 3.2 Start Batch Evaluation + +**API:** + +```bash +aws evodp start-batch-evaluation \ + --endpoint-url $DP_ENDPOINT \ + --region $REGION \ + --name "acme-baseline-eval" \ + --evaluation-config '{ "evaluators": [...] }' \ + --session-source '{ "cloudWatchSource": { ... } }' \ + --execution-role-arn "$ROLE_ARN" \ + --client-token "$(uuidgen)" +``` + +**CLI:** + +```bash +agentcore run batch-evaluation \ + -a acme-support-agent \ + -e Builtin.GoalSuccessRate Builtin.Helpfulness Builtin.Faithfulness \ + --execution-role "$ROLE_ARN" +``` + +**CLI with all options:** + +```bash +agentcore run batch-evaluation \ + --agent # Agent name from project config (required) + --evaluator # Evaluator ID(s) — Builtin.* or custom (required) + --name # Custom name (auto-generated if omitted) + --region # AWS region (auto-detected if omitted) + --execution-role # IAM execution role ARN (temporary) + --json # Output as JSON +``` + +**TUI:** `agentcore` → **Run** → **Batch Evaluation** — auto-discovers deployed agents, evaluators, and sessions via +CloudWatch. + +The CLI automatically: + +- Resolves the agent's runtime ID and CloudWatch log group from deployed state +- Generates a unique name and client token +- Polls `get-batch-evaluation` until `COMPLETED` or `FAILED` +- Displays per-evaluator average scores and session counts +- Saves results locally to `.cli/eval-job-results/` + +### 3.3 Check Results + +**API:** + +```bash +aws evodp get-batch-evaluation \ + --endpoint-url $DP_ENDPOINT \ + --region $REGION \ + --batch-evaluate-id "$BATCH_EVALUATE_ID" +``` + +**CLI:** The `run batch-evaluation` command polls automatically — no separate get call needed. + +### 3.4 Stop Batch Evaluation + +**API:** + +```bash +aws evodp stop-batch-evaluation \ + --endpoint-url $DP_ENDPOINT \ + --region $REGION \ + --batch-evaluate-id "$BATCH_EVALUATE_ID" +``` + +**CLI:** + +```bash +agentcore stop batch-evaluation --id "$BATCH_EVALUATE_ID" +``` + +**CLI with all options:** + +```bash +agentcore stop batch-evaluation \ + --id # Batch evaluation ID to stop (required) + --region # AWS region (auto-detected if omitted) + --json # Output as JSON +``` + +### 3.5 List Batch Evaluations + +**API:** + +```bash +aws evodp list-batch-evaluations \ + --endpoint-url $DP_ENDPOINT \ + --region $REGION \ + --max-results 10 +``` + +**CLI:** _Not yet implemented as a CLI command._ The API client (`listBatchEvaluations`) exists internally. Use +`agentcore evals history` for locally saved results. + +--- + +## 5. Recommendations + +### 5.2 Start a System Prompt Recommendation + +**API (Option A — Inline Session Spans):** + +```bash +aws evodp start-recommendation \ + --endpoint-url $DP_ENDPOINT \ + --region $REGION \ + --name "improve-system-prompt" \ + --type "SYSTEM_PROMPT_RECOMMENDATION" \ + --recommendation-config "$(jq -n --slurpfile spans session_spans.json '{ ... }')" +``` + +**CLI:** + +```bash +# System prompt from CloudWatch traces (auto-discovered) +agentcore run recommendation \ + -t system-prompt \ + -a acme-support-agent \ + -e Builtin.GoalSuccessRate \ + --lookback 7 + +# System prompt inline with specific sessions +agentcore run recommendation \ + -t system-prompt \ + -a acme-support-agent \ + -e Builtin.GoalSuccessRate \ + --inline "You are a helpful customer support assistant for Acme Store." \ + -s SESSION_ID_1 SESSION_ID_2 + +# System prompt from file +agentcore run recommendation \ + -t system-prompt \ + -a acme-support-agent \ + -e Builtin.GoalSuccessRate \ + --prompt-file ./my-prompt.txt + +# With inline session spans file (Option A equivalent) +agentcore run recommendation \ + -t system-prompt \ + -a acme-support-agent \ + -e Builtin.GoalSuccessRate \ + --inline "You are helpful" \ + --spans-file session_spans.json + +# From a configuration bundle (Option C equivalent) +agentcore run recommendation \ + -t system-prompt \ + -a acme-support-agent \ + -e Builtin.GoalSuccessRate \ + --bundle-name acme-support-config \ + --bundle-version "$VERSION_ID" +``` + +**CLI with all options:** + +```bash +agentcore run recommendation \ + --type # system-prompt or tool-description (default: system-prompt) + --agent # Agent name from project (required) + --evaluator # Evaluator name(s) or Builtin.* ID(s) (required, repeatable) + --prompt-file # Load system prompt from file + --inline # Provide content inline + --bundle-name # Config bundle name + --bundle-version # Config bundle version + --tools # Comma-separated toolName:description pairs (tool-description type) + --spans-file # JSON file with session spans (instead of CloudWatch) + --lookback # Lookback window in days (default: 7) + --session-id # Specific session IDs for traces + --run # Run name prefix + --region # AWS region (auto-detected if omitted) + --json # Output as JSON +``` + +**TUI:** `agentcore` → **Recommendations** → **Run Recommendation** — wizard walks through type → agent → evaluators → +input source → trace source → confirm. + +The CLI automatically: + +- Resolves the agent's deployed runtime, log group, and service name +- Discovers session spans from CloudWatch (or reads from `--spans-file`) +- Polls `get-recommendation` until `COMPLETED` +- Prints the recommended prompt and explanation +- Saves results locally to `.cli/recommendation-results/` + +### 5.3 Start a Tool Description Recommendation + +**API:** + +```bash +aws evodp start-recommendation \ + --endpoint-url $DP_ENDPOINT \ + --region $REGION \ + --name "improve-tool-desc" \ + --type "TOOL_DESCRIPTION_RECOMMENDATION" \ + --recommendation-config "$(jq -n --slurpfile spans session_spans.json '{ ... }')" +``` + +**CLI:** + +```bash +agentcore run recommendation \ + -t tool-description \ + -a acme-support-agent \ + -e Builtin.GoalSuccessRate \ + --inline "search_flights:Search for available flights" \ + --tools "search_flights:Search for available flights,book_seat:Book a seat on a flight" \ + -s SESSION_ID_1 + +# From a configuration bundle +agentcore run recommendation \ + -t tool-description \ + -a acme-support-agent \ + -e Builtin.GoalSuccessRate \ + --bundle-name acme-support-config \ + --bundle-version "$VERSION_ID" \ + --tools "search_flights,book_seat" +``` + +**Note:** Tool description recommendations only support session-based trace source (no CloudWatch discovery). + +### 5.4 Check Recommendation Status + +**API:** + +```bash +aws evodp get-recommendation \ + --endpoint-url $DP_ENDPOINT \ + --region $REGION \ + --recommendation-id "$RECOMMENDATION_ID" +``` + +**CLI:** The `run recommendation` command polls automatically — no separate get call needed. + +### 5.5 View Past Recommendations + +**CLI:** + +```bash +# List locally saved recommendation runs +agentcore recommendations history + +# JSON output +agentcore recommendations history --json +``` + +**TUI:** `agentcore` → **Recommendations** → **History** + +### 5.6 List and Delete (Remote) + +**API:** + +```bash +aws evodp list-recommendations --endpoint-url $DP_ENDPOINT --region $REGION --max-results 10 +aws evodp delete-recommendation --endpoint-url $DP_ENDPOINT --region $REGION --recommendation-id "$RECOMMENDATION_ID" +``` + +**CLI:** _Not yet implemented._ Use `agentcore recommendations history` for locally saved results. + +--- + +## 2. Configuration Bundles + +### 2.1 Create a Configuration Bundle + +**API:** + +```bash +aws evocp create-configuration-bundle \ + --endpoint-url $CP_ENDPOINT \ + --region $REGION \ + --bundle-name "my_agent_config" \ + --description "Initial agent configuration" \ + --components '{ ... }' \ + --branch-name "mainline" \ + --commit-message "Initial version" \ + --created-by '{ ... }' \ + --client-token "$(uuidgen)" +``` + +**CLI:** + +```bash +# Interactive TUI wizard +agentcore add config-bundle +``` + +**TUI:** `agentcore` → **Add** → **Config Bundle** — prompts for system prompt, model ID, branch name, commit message, +and handles `createdBy` metadata and client token automatically. + +The wizard detects existing bundles and creates a new version with the correct `parentVersionIds` to maintain the +version chain. + +### 2.2 Get a Configuration Bundle Version + +**API:** + +```bash +# Specific version +aws evocp get-configuration-bundle-version \ + --endpoint-url $CP_ENDPOINT --region $REGION \ + --bundle-id "$BUNDLE_ID" --version-id "$VERSION_ID" +``` + +**CLI:** _No standalone get-version command._ Use `agentcore config-bundle versions` to list versions, and +`agentcore config-bundle diff` to compare them. Use `--json` for full version data. + +### 2.3 Update a Configuration Bundle + +**API:** + +```bash +aws evocp update-configuration-bundle \ + --endpoint-url $CP_ENDPOINT \ + --region $REGION \ + --bundle-id "$BUNDLE_ID" \ + --components '{ ... }' \ + --parent-version-ids '["PARENT_VERSION_ID"]' \ + --branch-name "mainline" \ + --commit-message "Improve system prompt" \ + --created-by '{ ... }' \ + --client-token "$(uuidgen)" +``` + +**CLI:** + +```bash +# TUI wizard — creates a new version with parent chain +agentcore add config-bundle +``` + +### 2.4 List Configuration Bundle Versions + +**API:** + +```bash +aws evocp list-configuration-bundle-versions \ + --endpoint-url $CP_ENDPOINT --region $REGION \ + --bundle-id "$BUNDLE_ID" \ + --filter '{"branchName": "mainline"}' +``` + +**CLI:** + +```bash +# List all versions +agentcore config-bundle versions --bundle "my-agent-config" + +# Filter by branch +agentcore config-bundle versions --bundle "my-agent-config" --branch mainline + +# Latest version per branch +agentcore config-bundle versions --bundle "my-agent-config" --latest-per-branch + +# Filter by creator +agentcore config-bundle versions --bundle "my-agent-config" --created-by recommendation + +# JSON output +agentcore config-bundle versions --bundle "my-agent-config" --json +``` + +**CLI with all options:** + +```bash +agentcore config-bundle versions \ + --bundle # Bundle name (required) + --branch # Filter by branch name + --latest-per-branch # Show only the latest version per branch + --created-by # Filter by creator (e.g. "user", "recommendation") + --region # AWS region override + --json # Output as JSON +``` + +**TUI:** `agentcore` → **Config Bundle** → **Version History** + +### 2.5 Diff Configuration Bundle Versions + +**CLI:** + +```bash +agentcore config-bundle diff --bundle "my-agent-config" --from "$V1" --to "$V2" +``` + +**CLI with all options:** + +```bash +agentcore config-bundle diff \ + --bundle # Bundle name (required) + --from # Source version ID (required) + --to # Target version ID (required) + --region # AWS region override + --json # Output as JSON +``` + +**TUI:** `agentcore` → **Config Bundle** → **Diff** + +### 2.6 Delete a Configuration Bundle + +**API:** + +```bash +aws evocp delete-configuration-bundle \ + --endpoint-url $CP_ENDPOINT --region $REGION \ + --bundle-id "$BUNDLE_ID" +``` + +**CLI:** + +```bash +# Interactive TUI +agentcore remove config-bundle +``` + +**Note:** The `config-bundle` command also supports the `cb` alias (e.g. `agentcore cb versions ...`). + +--- + +## Quick Reference Table + +| API Operation | CLI Command | TUI | +| ----------------------------------------- | --------------------------------------------------------------------------- | ------------------------- | +| `start-batch-evaluation` | `agentcore run batch-evaluation -a -e ` | Run → Batch Evaluation | +| `get-batch-evaluation` | _(auto-polled by `run batch-evaluation`)_ | — | +| `stop-batch-evaluation` | `agentcore stop batch-evaluation --id ` | — | +| `list-batch-evaluations` | _Not yet implemented_ | — | +| `start-recommendation` (system prompt) | `agentcore run recommendation -t system-prompt -a -e ` | Recommendations → Run | +| `start-recommendation` (tool description) | `agentcore run recommendation -t tool-description -a --tools <...>` | Recommendations → Run | +| `get-recommendation` | _(auto-polled by `run recommendation`)_ | — | +| `list-recommendations` | _Not yet implemented (remote)_ | — | +| `delete-recommendation` | _Not yet implemented_ | — | +| View past recommendations | `agentcore recommendations history` | Recommendations → History | +| `create-configuration-bundle` | `agentcore add config-bundle` | Add → Config Bundle | +| `update-configuration-bundle` | `agentcore add config-bundle` _(new version)_ | Add → Config Bundle | +| `get-configuration-bundle-version` | _No standalone command — use `versions --json`_ | — | +| `list-configuration-bundle-versions` | `agentcore config-bundle versions --bundle ` | Config Bundle → Versions | +| Diff versions | `agentcore config-bundle diff --bundle --from --to ` | Config Bundle → Diff | +| `delete-configuration-bundle` | `agentcore remove config-bundle` | Remove → Config Bundle | + +--- + +## Not Yet Implemented in CLI + +| API Operation | Notes | +| ------------------------------------- | --------------------------------------------------- | +| **A/B Tests** (full CRUD) | No API client or CLI commands | +| **Gateway Routing Rules** (CRUD) | No API client — needed for "deploy the winner" step | +| **List Batch Evaluations** (remote) | API client exists internally, no CLI command | +| **List Recommendations** (remote) | API client exists internally, no CLI command | +| **Delete Recommendation** | API client exists, no CLI command | +| **Promote** (recommendation → bundle) | Not implemented | diff --git a/docs/evo-dev-tarball-test-results.md b/docs/evo-dev-tarball-test-results.md new file mode 100644 index 000000000..afe7db48c --- /dev/null +++ b/docs/evo-dev-tarball-test-results.md @@ -0,0 +1,143 @@ +# EVO Dev Tarball Test Results + +**Tarball:** `aws-agentcore-dev-0.8.0-dev-20260413190122.tgz` +**Package:** `@aws/agentcore-dev` +**Binary:** `agentcore-dev` +**Version:** `0.8.0-dev-20260413190122` +**Date:** 2026-04-13 +**Account:** 998846730471 +**Region:** us-east-1 + +## What's Bundled + +- **CLI:** `feat/evo-implementation` + public main sync (includes region fix #818) +- **CDK:** `feat/evo-implementation` + main sync + CDK region fix (PR #145) +- **Python SDK:** `bedrock_agentcore-1.6.0.dev20260413` wheel from `feat/evo_main` +- **PR #66:** Wheel bundling support (local SDK wheel used at deploy time) + +--- + +## Test Results Summary + +| Flow | Status | Notes | +|------|--------|-------| +| `agentcore-dev --version` | PASS | `0.8.0-dev-20260413190122` | +| `agentcore-dev --help` | PASS | All commands listed (run, stop, config-bundle, ab-test, recommendations) | +| `agentcore-dev run --help` | PASS | Shows eval, batch-evaluation, recommendation | +| `agentcore-dev config-bundle --help` | PASS | Shows versions, diff | +| `agentcore-dev stop --help` | PASS | Shows ab-test, batch-evaluation, recommendation | +| `agentcore-dev validate` | PASS | | +| `agentcore-dev deploy --target dev --yes` | PASS | Config bundle created, runtime deployed | +| `agentcore-dev status --target dev` | PASS | Shows runtime READY + config bundle deployed | +| `agentcore-dev invoke --runtime --target dev` | PASS | Agent responds correctly | + +### Config Bundles + +| Flow | Status | Notes | +|------|--------|-------| +| `cb versions --bundle ` | PASS | Shows version tree with branch, creator, parent chain | +| `cb versions --bundle --json` | PASS | Full JSON with lineageMetadata | +| `cb alias` (`cb` = `config-bundle`) | PASS | | +| `cb diff --from --to ` | PASS | Shows changed fields with old/new values | +| `cb diff` (identical versions) | PASS | "No differences found" | +| `cb diff` (invalid version ID) | PASS | API error with regex pattern message | +| `cb versions` (nonexistent bundle) | PASS | "not found. Has it been deployed?" | + +### Recommendations + +| Flow | Status | Notes | +|------|--------|-------| +| Recommendation with `--bundle-name` + `--bundle-version` + `--system-prompt-json-path` | PASS | Bundle ARN resolves correctly, COMPLETED | +| Recommendation with `--inline` + CloudWatch traces (`--lookback 7`) | PASS | COMPLETED with learned behaviors | +| Recommendation with short-form json path (`systemPrompt`) | PASS | Resolves to full `$.ARN.configuration.systemPrompt` | +| `recommendations history --json` | PASS | Shows past runs with full result data | +| Error: missing `--runtime` | PASS | "--runtime is required" | +| Error: missing `--evaluator` | PASS | "--evaluator is required for system-prompt recommendations" | +| Error: invalid `--type` | PASS | 'Must be one of: system-prompt, tool-description' | +| Error: nonexistent agent | PASS | 'Agent "X" not deployed. Run `agentcore deploy` first.' | + +### Batch Evaluation + +| Flow | Status | Notes | +|------|--------|-------| +| `run batch-evaluation --runtime --evaluator Builtin.GoalSuccessRate` | PASS | COMPLETED, 5 sessions evaluated, avg score 1.0 | +| `evals history --json` | PASS | Returns saved results (empty for on-demand evals in this project) | + +### Region Fix + +| Flow | Status | Notes | +|------|--------|-------| +| Deploy with `aws-targets.json: us-west-2` while `AWS_REGION=us-east-1` | PASS | Stack + runtime correctly in us-west-2 (CDK region fix PR #145) | + +--- + +## Cosmetic Issues + +| Issue | Severity | Description | +|-------|----------|-------------| +| Help text says `Usage: agentcore` not `agentcore-dev` | Low | Commander uses program name from argv[1], but the binary is `agentcore-dev`. Functional, just cosmetic. | +| Update nag shows `npm install -g @aws/agentcore@latest` | Low | Should say `@aws/agentcore-dev` for the dev package. Non-blocking since dev installs are from tarball. | + +--- + +## Installation + +```bash +# Install from tarball +npm install -g ~/Downloads/aws-agentcore-dev-0.8.0-dev-20260413190122.tgz + +# Verify +agentcore-dev --version +# 0.8.0-dev-20260413190122 + +# Uninstall +npm uninstall -g @aws/agentcore-dev +``` + +--- + +## Command Reference (agentcore-dev namespace) + +All commands use `agentcore-dev` instead of `agentcore`: + +```bash +# Project lifecycle +agentcore-dev create --name myproject --framework Strands --defaults +agentcore-dev deploy --target dev --yes +agentcore-dev status --target dev +agentcore-dev invoke --runtime --target dev --prompt "Hello" +agentcore-dev validate + +# Config bundles +agentcore-dev add config-bundle +agentcore-dev config-bundle versions --bundle +agentcore-dev config-bundle diff --bundle --from --to +agentcore-dev cb versions --bundle # alias + +# Recommendations +agentcore-dev run recommendation \ + --runtime \ + --evaluator Builtin.GoalSuccessRate \ + --bundle-name \ + --bundle-version \ + --system-prompt-json-path systemPrompt + +agentcore-dev run recommendation \ + --runtime \ + --evaluator Builtin.GoalSuccessRate \ + --inline "You are a helpful assistant." \ + --lookback 7 + +agentcore-dev recommendations history + +# Batch evaluation +agentcore-dev run batch-evaluation \ + --runtime \ + --evaluator Builtin.GoalSuccessRate Builtin.Helpfulness + +agentcore-dev stop batch-evaluation --id + +# Logs and traces +agentcore-dev logs --runtime +agentcore-dev traces list --runtime +``` diff --git a/docs/harness.md b/docs/harness.md new file mode 100644 index 000000000..db51d2b7f --- /dev/null +++ b/docs/harness.md @@ -0,0 +1,263 @@ +# Harness + +A **harness** is a managed agent runtime that connects a foundation model to tools, memory, and configuration — without +requiring you to write agent framework code. You define the model, tools, and settings; AgentCore handles the +orchestration. + +Use a harness when you want a quick, config-driven agent. Use a traditional agent (with `--framework`) when you need +custom code, a specific framework (Strands, LangChain, etc.), or full control over the agent loop. + +## Creating a Harness Project + +```bash +# Minimal — defaults to Bedrock provider, Claude Sonnet +agentcore create --name myharness + +# Specify provider and model +agentcore create --name myharness --model-provider bedrock --model-id global.anthropic.claude-sonnet-4-6 + +# OpenAI provider (requires --api-key-arn) +agentcore create --name myharness --model-provider open_ai --model-id gpt-4o \ + --api-key-arn arn:aws:secretsmanager:us-west-2:123456789012:secret:openai-key + +# Gemini provider +agentcore create --name myharness --model-provider gemini --model-id gemini-2.5-flash \ + --api-key-arn arn:aws:secretsmanager:us-west-2:123456789012:secret:gemini-key + +# Skip auto-created memory +agentcore create --name myharness --no-harness-memory + +# With all optional settings +agentcore create --name myharness \ + --model-provider bedrock \ + --max-iterations 10 \ + --max-tokens 4096 \ + --timeout 120 \ + --truncation-strategy sliding_window \ + --session-storage-mount-path /mnt/data +``` + +### Model Providers + +| Provider | `--model-provider` value | Requires `--api-key-arn` | +| -------- | ------------------------ | ------------------------ | +| Bedrock | `bedrock` | No | +| OpenAI | `open_ai` or `openai` | Yes | +| Gemini | `gemini` | Yes | + +> Aliases `Bedrock`, `OpenAI`, `Gemini`, `Anthropic` (maps to bedrock) are also accepted. + +### Harness vs Agent + +If you pass `--framework`, `--language`, or other agent-specific flags, the CLI creates a traditional agent project +instead. These flags cannot be mixed with harness-only flags (`--model-id`, `--max-iterations`, etc.). + +## Project Structure + +``` +myharness/ + agentcore/ # Config and CDK project + agentcore.json # Project manifest (lists harnesses, memories, etc.) + aws-targets.json # Deployment targets + cdk/ # CDK infrastructure code + app/myharness/ # Harness configuration + harness.json # Harness spec (model, tools, settings) + system-prompt.md # System prompt (editable) +``` + +## Deployment Targets (`aws-targets.json`) + +Before deploying, ensure `aws-targets.json` has at least one target: + +```json +[ + { + "name": "default", + "account": "123456789012", + "region": "us-west-2" + } +] +``` + +Fields: + +- `name` — target name (use `"default"` for the primary target) +- `account` — AWS account ID (string) +- `region` — AWS region + +## Adding a Harness to an Existing Project + +```bash +agentcore add harness --name myharness --model-provider bedrock +agentcore add harness --name myharness --model-provider bedrock --session-storage /mnt/data +agentcore add harness --name myharness --model-provider bedrock --with-invoke-script +``` + +### Custom JWT Auth + +```bash +agentcore add harness --name myharness --model-provider bedrock \ + --authorizer-type CUSTOM_JWT \ + --discovery-url https://example.auth0.com/.well-known/openid-configuration \ + --allowed-audience myapp +``` + +## Tools + +Harnesses support four built-in tool types plus inline functions: + +### Adding Tools + +```bash +# Remote MCP server +agentcore add tool --harness myharness --type remote_mcp --name mytool \ + --url https://mcp-server.example.com/sse + +# Browser tool +agentcore add tool --harness myharness --type agentcore_browser --name browser + +# Code interpreter +agentcore add tool --harness myharness --type agentcore_code_interpreter --name codeinterp + +# Gateway tool (by ARN) +agentcore add tool --harness myharness --type agentcore_gateway --name gwtool \ + --gateway-arn arn:aws:bedrock-agentcore:us-west-2:123456789012:gateway/gw-abc + +# Gateway tool (by project gateway name — resolves ARN from deployed state) +agentcore add tool --harness myharness --type agentcore_gateway --name gwtool \ + --gateway mygateway +``` + +### Removing Tools + +```bash +agentcore remove tool --harness myharness --name mytool +``` + +## Session Storage + +Session storage provides a persistent filesystem mount for the harness runtime. Files written to the mount path persist +across invocations within the same session. + +```bash +# Via add harness +agentcore add harness --name myharness --model-provider bedrock --session-storage /mnt/data + +# Via create +agentcore create --name myharness --session-storage-mount-path /mnt/data +``` + +The path must be an absolute path under `/mnt/` (e.g., `/mnt/data`, `/mnt/workspace`). + +**Important:** Only files written to the configured mount path are persistent and visible to `--exec` commands. Files +written to other paths (e.g., `/home`, `/tmp`) may be created in an ephemeral context and will not appear when +inspecting the container via `--exec`. If your tools write files, configure them to use the session storage path. + +## Deploying + +```bash +agentcore deploy # Interactive — prompts for confirmation +agentcore deploy -y # Auto-confirm +agentcore deploy --dry-run # Preview without deploying +agentcore deploy --diff # Show CDK diff +``` + +Deploy creates: + +1. CloudFormation stack (IAM role, memory) +2. Harness resource via AgentCore API + +## Checking Status + +```bash +agentcore status # All resources +agentcore status --type harness # Harness resources only +agentcore status --json # JSON output +``` + +## Invoking + +```bash +# Basic invoke +agentcore invoke --harness myharness "What can you do?" + +# With session continuity +agentcore invoke --harness myharness --session-id "Follow up question" + +# Verbose — shows raw streaming events +agentcore invoke --harness myharness --verbose "Hello" + +# JSON output +agentcore invoke --harness myharness --json "Hello" +``` + +### Invoke Overrides + +These flags override harness settings for a single invocation only (they do not persist): + +| Flag | Description | +| ----------------------------- | ------------------------------------- | +| `--model-id ` | Use a different model | +| `--system-prompt ` | Override the system prompt | +| `--max-iterations ` | Override max agent loop iterations | +| `--max-tokens ` | Override max tokens per iteration | +| `--harness-timeout ` | Override execution timeout | +| `--tools ` | Override tools (comma-separated) | +| `--allowed-tools ` | Restrict which tools can be used | +| `--skills ` | Skills to use (comma-separated paths) | +| `--actor-id ` | Override memory actor ID | +| `--bearer-token ` | Bearer token for CUSTOM_JWT auth | + +## Logs and Traces + +```bash +# View logs +agentcore logs --harness myharness --limit 20 +agentcore logs --harness myharness --since 1h --level error + +# List traces +agentcore traces list --harness myharness +agentcore traces list --harness myharness --since 30m --limit 10 + +# Download a trace +agentcore traces get --harness myharness +agentcore traces get --harness myharness --output ./trace.json +``` + +## Fetching Access Info + +For harnesses with CUSTOM_JWT auth: + +```bash +agentcore fetch access --name myharness --type harness +agentcore fetch access --name myharness --type harness --json +``` + +## Removing a Harness + +```bash +agentcore remove harness --name myharness -y +agentcore deploy # Apply removal to AWS +``` + +## Validating Configuration + +```bash +agentcore validate +``` + +Checks: + +- Harness schema validity (model, tools, settings) +- Cross-references (memory names exist in project) +- Tool configuration completeness + +## Invoke Script + +Pass `--with-invoke-script` to generate a standalone Python script for invoking the harness outside the CLI: + +```bash +agentcore add harness --name myharness --model-provider bedrock --with-invoke-script +``` + +This creates `app/myharness/invoke.py` which uses `boto3` to invoke the harness directly. diff --git a/docs/superpowers/plans/2026-04-16-harness-tui-cli.md b/docs/superpowers/plans/2026-04-16-harness-tui-cli.md new file mode 100644 index 000000000..6b584ddbd --- /dev/null +++ b/docs/superpowers/plans/2026-04-16-harness-tui-cli.md @@ -0,0 +1,1532 @@ +# Harness TUI + CLI Commander Implementation Plan (Draft 2) + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or +> superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add Harness (LoopyAgent) as a first-class resource in the AgentCore CLI with a **file-per-harness** +architecture — each harness has its own directory (`agentcore/harnesses//`) with `harness.json` config + +`system-prompt.md` + optional `skills/`. `agentcore.json` holds lightweight `{name, path}` pointers for CLI discovery. + +**Architecture:** Harness config is split from the project spec. `agentcore.json` gets a `harnesses[]` array of +`{name, path}` refs. Each harness directory contains a `harness.json` (full config), `system-prompt.md` (referenced by +file path in JSON), and optional `skills/*.md`. No app code is scaffolded by default — harness is config-only. Long-term +memory (SEMANTIC + SUMMARIZATION) is enabled by default. Deploy reads `harnesses/*/harness.json`, resolves markdown +references, and calls CreateHarness/UpdateHarness imperatively. A new `HarnessPrimitive` extends `BasePrimitive` for CLI +wiring. + +**Tech Stack:** TypeScript, Zod (schema), Ink/React (TUI), Commander.js (CLI) + +**Draft 2 key decisions reflected:** + +1. File-per-harness, not monolithic schema +2. System prompts and skills as markdown, not JSON +3. No scaffolded application code by default +4. Harness is additive, not a new default +5. Long-term memory enabled by default + +--- + +## File Structure + +### New Files + +| File | Responsibility | +| ------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | +| `src/schema/schemas/harness.ts` | Zod schemas: `HarnessConfigSchema` (per-harness file), `HarnessRefSchema` (project-level pointer), `HarnessToolSchema`, `HarnessNameSchema` | +| `src/cli/primitives/HarnessPrimitive.ts` | `HarnessPrimitive` extending `BasePrimitive` — add/remove lifecycle, reads/writes harness dirs | +| `src/cli/tui/screens/harness/types.ts` | `AddHarnessConfig`, `AddHarnessStep`, UI option constants (providers, tools, models) | +| `src/cli/tui/screens/harness/AddHarnessFlow.tsx` | Multi-step TUI wizard with breadcrumb progress bar | +| `src/cli/tui/screens/harness/useAddHarness.ts` | Hook: creates harness dir, writes harness.json + system-prompt.md, updates agentcore.json ref | +| `src/cli/tui/screens/harness/index.ts` | Barrel export | +| `src/cli/operations/deploy/deploy-harness.ts` | `deployHarness()` — reads harness.json + resolves markdown refs → imperative CreateHarness/UpdateHarness | +| `src/cli/operations/invoke/invoke-harness.ts` | `invokeHarnessStreaming()` — streaming InvokeHarness with inline function tool support | +| `src/lib/harness-io.ts` | `HarnessIO` — read/write harness.json, resolve system-prompt.md, list harness dirs | +| `src/assets/harness/harness.json` | Template harness.json | +| `src/assets/harness/system-prompt.md` | Template system prompt markdown | +| `src/assets/harness/invoke-script/main.py` | Optional invoke script (generated via `--with-invoke-script`) | +| `src/assets/harness/invoke-script/pyproject.toml` | Optional pyproject.toml for invoke script | + +### Modified Files + +| File | Change | +| --------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------- | +| `src/schema/schemas/agentcore-project.ts` | Add `harnesses` array of `HarnessRefSchema` (`{name, path}`) to `AgentCoreProjectSpecSchema` | +| `src/schema/schemas/deployed-state.ts` | Add `harnesses` record to `DeployedResourceStateSchema` with `HarnessDeployedStateSchema` | +| `src/schema/index.ts` | Re-export harness types | +| `src/cli/primitives/registry.ts` | Register `harnessPrimitive` singleton | +| `src/cli/tui/screens/add/AddScreen.tsx` | Add "Harness" to `ADD_RESOURCES` list | +| `src/cli/tui/screens/add/AddFlow.tsx` | Add `harness-wizard` flow state, route to `AddHarnessFlow` | +| `src/cli/tui/screens/create/CreateScreen.tsx` | Add template type selection (Agent vs Harness) after name input | +| `src/cli/commands/create/command.tsx` | Add `--template harness` flag | +| `src/cli/commands/create/action.ts` | Add `createHarnessProject()` — scaffolds harness dir, no app/ | +| `src/cli/commands/create/types.ts` | Add `template` to `CreateOptions` | +| `src/cli/commands/invoke/command.tsx` | Add `--harness`, `--raw-events`, `--model-id`, `--tools`, `--max-iterations`, `--timeout`, `--max-tokens`, `--skills` flags | +| `src/cli/tui/screens/invoke/InvokeScreen.tsx` | Show harnesses alongside runtimes; inline function tool approve/deny UI | +| `src/cli/tui/screens/invoke/useInvokeFlow.ts` | Load harness deployed state, call `invokeHarnessStreaming()` | +| `src/cli/tui/screens/remove/RemoveFlow.tsx` | Add harness removal flow + delete harness directory (preserve memory) | +| `src/cli/tui/screens/status/StatusScreen.tsx` | Display harness entries in resource graph | + +### Test Files + +| File | What it tests | +| ------------------------------------------------------- | ---------------------------------------------------------------------------------- | +| `src/schema/schemas/__tests__/harness.test.ts` | Zod schema validation for HarnessConfigSchema, HarnessRefSchema, HarnessToolSchema | +| `src/cli/primitives/__tests__/HarnessPrimitive.test.ts` | add/remove lifecycle, directory creation, duplicate detection | +| `src/lib/__tests__/harness-io.test.ts` | HarnessIO read/write, markdown resolution | + +--- + +## Task 1: Harness Zod Schemas (Config + Ref + Deployed State) + +The key architectural change from Draft 1: **two schemas**. `HarnessRefSchema` is the lightweight pointer stored in +`agentcore.json`. `HarnessConfigSchema` is the full config stored in each `harness.json` file. + +**Files:** + +- Create: `src/schema/schemas/harness.ts` +- Modify: `src/schema/schemas/agentcore-project.ts` +- Modify: `src/schema/schemas/deployed-state.ts` +- Modify: `src/schema/index.ts` +- Test: `src/schema/schemas/__tests__/harness.test.ts` + +- [ ] **Step 1: Write failing test for harness schemas** + +Create `src/schema/schemas/__tests__/harness.test.ts`: + +```typescript +import { HarnessConfigSchema, HarnessNameSchema, HarnessRefSchema, HarnessToolSchema } from '../harness'; +import { describe, expect, it } from 'vitest'; + +describe('HarnessNameSchema', () => { + it('accepts valid harness name', () => { + expect(HarnessNameSchema.safeParse('my_loopy_agent').success).toBe(true); + }); + + it('rejects empty name', () => { + expect(HarnessNameSchema.safeParse('').success).toBe(false); + }); + + it('rejects name starting with number', () => { + expect(HarnessNameSchema.safeParse('1agent').success).toBe(false); + }); + + it('rejects name exceeding 48 chars', () => { + expect(HarnessNameSchema.safeParse('a'.repeat(49)).success).toBe(false); + }); +}); + +describe('HarnessRefSchema', () => { + it('accepts valid harness ref', () => { + const result = HarnessRefSchema.safeParse({ + name: 'my_agent', + path: './harnesses/my_agent', + }); + expect(result.success).toBe(true); + }); + + it('rejects ref without path', () => { + const result = HarnessRefSchema.safeParse({ name: 'my_agent' }); + expect(result.success).toBe(false); + }); +}); + +describe('HarnessToolSchema', () => { + it('accepts agentcore_browser tool', () => { + const result = HarnessToolSchema.safeParse({ + type: 'agentcore_browser', + name: 'browser', + }); + expect(result.success).toBe(true); + }); + + it('accepts agentcore_code_interpreter tool', () => { + const result = HarnessToolSchema.safeParse({ + type: 'agentcore_code_interpreter', + name: 'code_interpreter', + }); + expect(result.success).toBe(true); + }); + + it('accepts remote_mcp tool with url', () => { + const result = HarnessToolSchema.safeParse({ + type: 'remote_mcp', + name: 'exa', + config: { remoteMcp: { url: 'https://mcp.exa.ai/mcp' } }, + }); + expect(result.success).toBe(true); + }); + + it('accepts agentcore_gateway tool with arn', () => { + const result = HarnessToolSchema.safeParse({ + type: 'agentcore_gateway', + name: 'my_gateway', + config: { agentCoreGateway: { gatewayArn: 'arn:aws:bedrock-agentcore:us-west-2:123:gateway/gw-123' } }, + }); + expect(result.success).toBe(true); + }); + + it('accepts inline_function tool', () => { + const result = HarnessToolSchema.safeParse({ + type: 'inline_function', + name: 'approve_purchase', + config: { + inlineFunction: { + description: 'Request human approval', + inputSchema: { + type: 'object', + properties: { item: { type: 'string' } }, + required: ['item'], + }, + }, + }, + }); + expect(result.success).toBe(true); + }); +}); + +describe('HarnessConfigSchema', () => { + it('accepts minimal harness config', () => { + const result = HarnessConfigSchema.safeParse({ + name: 'my_agent', + model: { + bedrockModelConfig: { modelId: 'us.anthropic.claude-sonnet-4-5-20250514-v1:0' }, + }, + }); + expect(result.success).toBe(true); + }); + + it('accepts systemPrompt as file path string', () => { + const result = HarnessConfigSchema.safeParse({ + name: 'my_agent', + model: { bedrockModelConfig: { modelId: 'us.anthropic.claude-sonnet-4-5-20250514-v1:0' } }, + systemPrompt: './system-prompt.md', + }); + expect(result.success).toBe(true); + }); + + it('accepts skills array with paths', () => { + const result = HarnessConfigSchema.safeParse({ + name: 'my_agent', + model: { bedrockModelConfig: { modelId: 'us.anthropic.claude-sonnet-4-5-20250514-v1:0' } }, + skills: [{ path: './skills/research' }, { path: '.agents/skills/xlsx' }], + }); + expect(result.success).toBe(true); + }); + + it('accepts fully specified harness config', () => { + const result = HarnessConfigSchema.safeParse({ + name: 'research_agent', + model: { bedrockModelConfig: { modelId: 'us.anthropic.claude-sonnet-4-5-20250514-v1:0' } }, + systemPrompt: './system-prompt.md', + tools: [ + { type: 'agentcore_browser', name: 'browser' }, + { type: 'remote_mcp', name: 'exa', config: { remoteMcp: { url: 'https://mcp.exa.ai/mcp' } } }, + ], + skills: [{ path: './skills/research' }], + memory: { name: 'research_memory' }, + maxIterations: 75, + timeoutSeconds: 3600, + maxTokens: 16384, + allowedTools: ['*'], + truncation: { + strategy: 'sliding_window', + config: { slidingWindow: { messagesCount: 150 } }, + }, + }); + expect(result.success).toBe(true); + }); + + it('rejects harness without model', () => { + const result = HarnessConfigSchema.safeParse({ name: 'my_agent' }); + expect(result.success).toBe(false); + }); + + it('defaults tools to empty array', () => { + const result = HarnessConfigSchema.parse({ + name: 'my_agent', + model: { bedrockModelConfig: { modelId: 'us.anthropic.claude-sonnet-4-5-20250514-v1:0' } }, + }); + expect(result.tools).toEqual([]); + }); +}); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: +`cd /Volumes/workplace/agentcore/agentcore-gh/private/private-agentcore-cli-staging && npx vitest run src/schema/schemas/__tests__/harness.test.ts` + +Expected: FAIL — module `../harness` does not exist. + +- [ ] **Step 3: Create harness schema file** + +Create `src/schema/schemas/harness.ts`: + +```typescript +import { TagsSchema } from './primitives/tags'; +import { z } from 'zod'; + +// ============================================================================ +// Name Schema (shared by ref and config) +// ============================================================================ + +export const HarnessNameSchema = z + .string() + .min(1, 'Harness name is required') + .max(48) + .regex( + /^[a-zA-Z][a-zA-Z0-9_-]{0,47}$/, + 'Must begin with a letter and contain only alphanumeric characters, underscores, and hyphens (max 48 chars)' + ); + +// ============================================================================ +// Ref Schema — lightweight pointer stored in agentcore.json +// ============================================================================ + +export const HarnessRefSchema = z.object({ + name: HarnessNameSchema, + path: z.string().min(1, 'Path to harness directory is required'), +}); + +export type HarnessRef = z.infer; + +// ============================================================================ +// Tool Schema — used inside harness.json +// ============================================================================ + +const RemoteMcpConfigSchema = z.object({ + remoteMcp: z.object({ + url: z.string().url(), + }), +}); + +const AgentCoreGatewayConfigSchema = z.object({ + agentCoreGateway: z.object({ + gatewayArn: z.string().min(1), + }), +}); + +const InlineFunctionConfigSchema = z.object({ + inlineFunction: z.object({ + description: z.string().optional(), + inputSchema: z.record(z.unknown()).optional(), + }), +}); + +export const HarnessToolSchema = z.object({ + type: z.enum([ + 'agentcore_browser', + 'agentcore_code_interpreter', + 'remote_mcp', + 'agentcore_gateway', + 'inline_function', + ]), + name: z.string().min(1), + browserArn: z.string().optional(), + codeInterpreterArn: z.string().optional(), + config: z.union([RemoteMcpConfigSchema, AgentCoreGatewayConfigSchema, InlineFunctionConfigSchema]).optional(), +}); + +export type HarnessTool = z.infer; + +// ============================================================================ +// Model Config Schema +// ============================================================================ + +const HarnessModelConfigSchema = z.object({ + bedrockModelConfig: z.object({ modelId: z.string().min(1) }).optional(), + anthropicModelConfig: z + .object({ modelId: z.string().min(1), apiKeyCredentialProviderArn: z.string().optional() }) + .optional(), + openAIModelConfig: z + .object({ modelId: z.string().min(1), apiKeyCredentialProviderArn: z.string().optional() }) + .optional(), + geminiModelConfig: z + .object({ modelId: z.string().min(1), apiKeyCredentialProviderArn: z.string().optional() }) + .optional(), +}); + +// ============================================================================ +// Skill Schema +// ============================================================================ + +export const HarnessSkillSchema = z.object({ + path: z.string().min(1), +}); + +export type HarnessSkill = z.infer; + +// ============================================================================ +// Truncation Schema +// ============================================================================ + +const TruncationConfigSchema = z.object({ + strategy: z.enum(['sliding_window']), + config: z.object({ + slidingWindow: z.object({ + messagesCount: z.number().int().min(1), + }), + }), +}); + +// ============================================================================ +// Environment Schema +// ============================================================================ + +const HarnessEnvironmentArtifactSchema = z.object({ + containerConfiguration: z.object({ containerUri: z.string().min(1) }).optional(), +}); + +const HarnessEnvironmentSchema = z.object({ + agentCoreRuntimeEnvironment: z + .object({ + executionRoleArn: z.string().optional(), + networkConfiguration: z.record(z.unknown()).optional(), + filesystemConfigurations: z.array(z.record(z.unknown())).optional(), + }) + .optional(), +}); + +// ============================================================================ +// Config Schema — full config stored in harness.json +// ============================================================================ + +export const HarnessConfigSchema = z.object({ + name: HarnessNameSchema, + model: HarnessModelConfigSchema, + systemPrompt: z.string().optional(), + tools: z.array(HarnessToolSchema).default([]), + skills: z.array(HarnessSkillSchema).optional(), + memory: z.object({ name: z.string().min(1) }).optional(), + maxIterations: z.number().int().min(1).optional(), + timeoutSeconds: z.number().int().min(1).optional(), + maxTokens: z.number().int().min(1).optional(), + allowedTools: z.array(z.string()).optional(), + truncation: TruncationConfigSchema.optional(), + environmentArtifact: HarnessEnvironmentArtifactSchema.optional(), + dockerfile: z.string().optional(), + environment: HarnessEnvironmentSchema.optional(), + tags: TagsSchema.optional(), +}); + +export type HarnessConfig = z.infer; + +// ============================================================================ +// Deployed State Schema +// ============================================================================ + +export const HarnessDeployedStateSchema = z.object({ + harnessId: z.string().min(1), + harnessArn: z.string().min(1), + roleArn: z.string().min(1), + agentRuntimeArn: z.string().optional(), +}); + +export type HarnessDeployedState = z.infer; + +// ============================================================================ +// Constants +// ============================================================================ + +export const HARNESS_TOOL_TYPES = [ + 'agentcore_browser', + 'agentcore_code_interpreter', + 'remote_mcp', + 'agentcore_gateway', + 'inline_function', +] as const; + +export type HarnessToolType = (typeof HARNESS_TOOL_TYPES)[number]; + +export const DEFAULT_HARNESS_MODEL_IDS: Record = { + bedrock: 'us.anthropic.claude-sonnet-4-6-20250514-v1:0', + anthropic: 'claude-sonnet-4-6-20250514', + openai: 'gpt-4.1', + gemini: 'gemini-2.5-flash', +}; + +export const HARNESS_CONFIG_FILENAME = 'harness.json'; +export const HARNESS_SYSTEM_PROMPT_FILENAME = 'system-prompt.md'; +export const HARNESS_SKILLS_DIR = 'skills'; +export const HARNESSES_DIR = 'harnesses'; +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: +`cd /Volumes/workplace/agentcore/agentcore-gh/private/private-agentcore-cli-staging && npx vitest run src/schema/schemas/__tests__/harness.test.ts` + +Expected: All tests PASS. + +- [ ] **Step 5: Add `harnesses` ref array to AgentCoreProjectSpecSchema** + +Modify `src/schema/schemas/agentcore-project.ts`: + +Add import at top: + +```typescript +import { HarnessRefSchema } from './harness'; +``` + +Add re-exports: + +```typescript +export { + HarnessConfigSchema, + HarnessRefSchema, + HarnessNameSchema, + HarnessToolSchema, + HarnessSkillSchema, +} from './harness'; +export type { + HarnessConfig, + HarnessRef, + HarnessTool, + HarnessSkill, + HarnessToolType, + HarnessDeployedState, +} from './harness'; +export { + HarnessDeployedStateSchema, + DEFAULT_HARNESS_MODEL_IDS, + HARNESS_CONFIG_FILENAME, + HARNESS_SYSTEM_PROMPT_FILENAME, + HARNESSES_DIR, +} from './harness'; +``` + +Add `harnesses` array to the `AgentCoreProjectSpecSchema` `.object({...})`, after `runtimes`: + +```typescript + harnesses: z + .array(HarnessRefSchema) + .default([]) + .superRefine( + uniqueBy( + harness => harness.name, + name => `Duplicate harness name: ${name}` + ) + ), +``` + +Note: This is an array of `HarnessRefSchema` (name + path), NOT the full config. The full config lives in `harness.json` +files. + +- [ ] **Step 6: Add HarnessDeployedState to deployed-state.ts** + +Modify `src/schema/schemas/deployed-state.ts`: + +Add import at top: + +```typescript +import { HarnessDeployedStateSchema } from './harness'; +``` + +Re-export: + +```typescript +export { HarnessDeployedStateSchema } from './harness'; +export type { HarnessDeployedState } from './harness'; +``` + +Add `harnesses` to `DeployedResourceStateSchema` object: + +```typescript + harnesses: z.record(z.string(), HarnessDeployedStateSchema).optional(), +``` + +- [ ] **Step 7: Update schema barrel export** + +Modify `src/schema/index.ts` — add all harness re-exports. Check what's already re-exported from `agentcore-project.ts` +and add any missing ones: + +```typescript +export { + HarnessConfigSchema, + HarnessRefSchema, + HarnessNameSchema, + HarnessToolSchema, + HarnessSkillSchema, + HarnessDeployedStateSchema, + DEFAULT_HARNESS_MODEL_IDS, + HARNESS_CONFIG_FILENAME, + HARNESS_SYSTEM_PROMPT_FILENAME, + HARNESSES_DIR, + HARNESS_TOOL_TYPES, +} from './schemas/harness'; +export type { + HarnessConfig, + HarnessRef, + HarnessTool, + HarnessSkill, + HarnessToolType, + HarnessDeployedState, +} from './schemas/harness'; +``` + +- [ ] **Step 8: Run full schema tests + typecheck** + +Run: +`cd /Volumes/workplace/agentcore/agentcore-gh/private/private-agentcore-cli-staging && npx vitest run src/schema/ && npm run typecheck` + +Expected: All pass. + +- [ ] **Step 9: Commit** + +```bash +git add src/schema/schemas/harness.ts src/schema/schemas/__tests__/harness.test.ts src/schema/schemas/agentcore-project.ts src/schema/schemas/deployed-state.ts src/schema/index.ts +git commit -m "$(cat <<'EOF' +feat: add Harness schemas with file-per-harness architecture + +HarnessRefSchema ({name, path}) for agentcore.json discovery. +HarnessConfigSchema (full config) for per-harness harness.json files. +System prompt is a file path string (./system-prompt.md), not inline JSON. +Skills are path references. HarnessDeployedState for deployed-state.json. +EOF +)" +``` + +--- + +## Task 2: HarnessIO — Read/Write Harness Directories + +This is a new module that handles the file-per-harness I/O: creating harness directories, reading/writing +`harness.json`, resolving `system-prompt.md` content, listing harness dirs. + +**Files:** + +- Create: `src/lib/harness-io.ts` +- Test: `src/lib/__tests__/harness-io.test.ts` + +- [ ] **Step 1: Write failing test for HarnessIO** + +Create `src/lib/__tests__/harness-io.test.ts`: + +```typescript +import { HarnessIO } from '../harness-io'; +import fs from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +describe('HarnessIO', () => { + let tmpDir: string; + let agentcoreDir: string; + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'harness-io-test-')); + agentcoreDir = path.join(tmpDir, 'agentcore'); + await fs.mkdir(agentcoreDir, { recursive: true }); + }); + + afterEach(async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + it('scaffolds a harness directory with harness.json and system-prompt.md', async () => { + const io = new HarnessIO(agentcoreDir); + await io.scaffoldHarness( + 'my-agent', + { + name: 'my-agent', + model: { bedrockModelConfig: { modelId: 'us.anthropic.claude-sonnet-4-6-20250514-v1:0' } }, + tools: [], + }, + 'You are a helpful assistant.' + ); + + const harnessDir = path.join(agentcoreDir, 'harnesses', 'my-agent'); + const configRaw = await fs.readFile(path.join(harnessDir, 'harness.json'), 'utf-8'); + const config = JSON.parse(configRaw); + expect(config.name).toBe('my-agent'); + expect(config.systemPrompt).toBe('./system-prompt.md'); + + const prompt = await fs.readFile(path.join(harnessDir, 'system-prompt.md'), 'utf-8'); + expect(prompt).toBe('You are a helpful assistant.'); + }); + + it('reads a harness config from disk', async () => { + const io = new HarnessIO(agentcoreDir); + await io.scaffoldHarness( + 'test-agent', + { + name: 'test-agent', + model: { bedrockModelConfig: { modelId: 'test-model' } }, + tools: [], + }, + 'Test prompt' + ); + + const config = await io.readHarnessConfig('test-agent'); + expect(config.name).toBe('test-agent'); + }); + + it('resolves system prompt markdown to text', async () => { + const io = new HarnessIO(agentcoreDir); + await io.scaffoldHarness( + 'test-agent', + { + name: 'test-agent', + model: { bedrockModelConfig: { modelId: 'test-model' } }, + tools: [], + }, + 'My system prompt content' + ); + + const text = await io.resolveSystemPrompt('test-agent'); + expect(text).toBe('My system prompt content'); + }); + + it('lists harness directories', async () => { + const io = new HarnessIO(agentcoreDir); + await io.scaffoldHarness( + 'agent-a', + { + name: 'agent-a', + model: { bedrockModelConfig: { modelId: 'm' } }, + tools: [], + }, + 'prompt a' + ); + await io.scaffoldHarness( + 'agent-b', + { + name: 'agent-b', + model: { bedrockModelConfig: { modelId: 'm' } }, + tools: [], + }, + 'prompt b' + ); + + const names = await io.listHarnesses(); + expect(names.sort()).toEqual(['agent-a', 'agent-b']); + }); + + it('removes a harness directory', async () => { + const io = new HarnessIO(agentcoreDir); + await io.scaffoldHarness( + 'to-remove', + { + name: 'to-remove', + model: { bedrockModelConfig: { modelId: 'm' } }, + tools: [], + }, + 'prompt' + ); + + await io.removeHarness('to-remove'); + const names = await io.listHarnesses(); + expect(names).toEqual([]); + }); +}); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: +`cd /Volumes/workplace/agentcore/agentcore-gh/private/private-agentcore-cli-staging && npx vitest run src/lib/__tests__/harness-io.test.ts` + +Expected: FAIL — module `../harness-io` does not exist. + +- [ ] **Step 3: Implement HarnessIO** + +Create `src/lib/harness-io.ts`: + +```typescript +import type { HarnessConfig } from '../schema'; +import { + HARNESSES_DIR, + HARNESS_CONFIG_FILENAME, + HARNESS_SKILLS_DIR, + HARNESS_SYSTEM_PROMPT_FILENAME, + HarnessConfigSchema, +} from '../schema'; +import fs from 'node:fs/promises'; +import path from 'node:path'; + +export class HarnessIO { + private readonly harnessesDir: string; + + constructor(private readonly agentcoreDir: string) { + this.harnessesDir = path.join(agentcoreDir, HARNESSES_DIR); + } + + async scaffoldHarness(name: string, config: HarnessConfig, systemPromptText: string): Promise { + const harnessDir = path.join(this.harnessesDir, name); + await fs.mkdir(harnessDir, { recursive: true }); + await fs.mkdir(path.join(harnessDir, HARNESS_SKILLS_DIR), { recursive: true }); + + const configWithPromptRef: HarnessConfig = { + ...config, + systemPrompt: `./${HARNESS_SYSTEM_PROMPT_FILENAME}`, + }; + + await fs.writeFile( + path.join(harnessDir, HARNESS_CONFIG_FILENAME), + JSON.stringify(configWithPromptRef, null, 2) + '\n' + ); + + await fs.writeFile(path.join(harnessDir, HARNESS_SYSTEM_PROMPT_FILENAME), systemPromptText); + + return harnessDir; + } + + async readHarnessConfig(name: string): Promise { + const configPath = path.join(this.harnessesDir, name, HARNESS_CONFIG_FILENAME); + const raw = await fs.readFile(configPath, 'utf-8'); + return HarnessConfigSchema.parse(JSON.parse(raw)); + } + + async writeHarnessConfig(name: string, config: HarnessConfig): Promise { + const configPath = path.join(this.harnessesDir, name, HARNESS_CONFIG_FILENAME); + await fs.writeFile(configPath, JSON.stringify(config, null, 2) + '\n'); + } + + async resolveSystemPrompt(name: string): Promise { + const config = await this.readHarnessConfig(name); + if (!config.systemPrompt) return undefined; + + const harnessDir = path.join(this.harnessesDir, name); + const promptPath = path.resolve(harnessDir, config.systemPrompt); + try { + return await fs.readFile(promptPath, 'utf-8'); + } catch { + return undefined; + } + } + + async listHarnesses(): Promise { + try { + const entries = await fs.readdir(this.harnessesDir, { withFileTypes: true }); + const names: string[] = []; + for (const entry of entries) { + if (!entry.isDirectory()) continue; + const configPath = path.join(this.harnessesDir, entry.name, HARNESS_CONFIG_FILENAME); + try { + await fs.access(configPath); + names.push(entry.name); + } catch { + // Not a valid harness directory + } + } + return names; + } catch { + return []; + } + } + + async removeHarness(name: string): Promise { + const harnessDir = path.join(this.harnessesDir, name); + await fs.rm(harnessDir, { recursive: true, force: true }); + } + + getHarnessDir(name: string): string { + return path.join(this.harnessesDir, name); + } + + getRelativeHarnessPath(name: string): string { + return `./${HARNESSES_DIR}/${name}`; + } +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: +`cd /Volumes/workplace/agentcore/agentcore-gh/private/private-agentcore-cli-staging && npx vitest run src/lib/__tests__/harness-io.test.ts` + +Expected: All tests PASS. + +- [ ] **Step 5: Export from lib barrel** + +Add to `src/lib/index.ts`: + +```typescript +export { HarnessIO } from './harness-io'; +``` + +- [ ] **Step 6: Run typecheck** + +Run: `cd /Volumes/workplace/agentcore/agentcore-gh/private/private-agentcore-cli-staging && npm run typecheck` + +Expected: No errors. + +- [ ] **Step 7: Commit** + +```bash +git add src/lib/harness-io.ts src/lib/__tests__/harness-io.test.ts src/lib/index.ts +git commit -m "$(cat <<'EOF' +feat: add HarnessIO for file-per-harness directory management + +Scaffolds agentcore/harnesses// with harness.json, system-prompt.md, +and skills/ dir. Reads/writes config, resolves markdown system prompts, +lists/removes harness directories. +EOF +)" +``` + +--- + +## Task 3: HarnessPrimitive — Add/Remove Lifecycle + +Now uses `HarnessIO` to create harness directories and writes `HarnessRef` pointers to `agentcore.json`. Remove deletes +the harness directory but preserves memory resources. + +**Files:** + +- Create: `src/cli/primitives/HarnessPrimitive.ts` +- Modify: `src/cli/primitives/registry.ts` +- Test: `src/cli/primitives/__tests__/HarnessPrimitive.test.ts` + +- [ ] **Step 1: Write failing test** + +Create `src/cli/primitives/__tests__/HarnessPrimitive.test.ts`: + +```typescript +import { HarnessPrimitive } from '../HarnessPrimitive'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('../../../lib', async importOriginal => { + const original = await importOriginal(); + return { + ...original, + ConfigIO: vi.fn().mockImplementation(() => ({ + readProjectSpec: vi.fn().mockResolvedValue({ + name: 'test', + version: 1, + harnesses: [], + runtimes: [], + memories: [], + credentials: [], + evaluators: [], + onlineEvalConfigs: [], + agentCoreGateways: [], + policyEngines: [], + }), + writeProjectSpec: vi.fn(), + })), + findConfigRoot: vi.fn().mockReturnValue('/mock/agentcore'), + getWorkingDirectory: vi.fn().mockReturnValue('/mock'), + HarnessIO: vi.fn().mockImplementation(() => ({ + scaffoldHarness: vi.fn().mockResolvedValue('/mock/agentcore/harnesses/test'), + removeHarness: vi.fn(), + listHarnesses: vi.fn().mockResolvedValue([]), + getRelativeHarnessPath: vi.fn().mockReturnValue('./harnesses/test'), + })), + }; +}); + +describe('HarnessPrimitive', () => { + let primitive: HarnessPrimitive; + + beforeEach(() => { + primitive = new HarnessPrimitive(); + }); + + it('has kind "harness"', () => { + expect(primitive.kind).toBe('harness'); + }); + + it('has label "Harness"', () => { + expect(primitive.label).toBe('Harness'); + }); +}); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: +`cd /Volumes/workplace/agentcore/agentcore-gh/private/private-agentcore-cli-staging && npx vitest run src/cli/primitives/__tests__/HarnessPrimitive.test.ts` + +Expected: FAIL — `HarnessPrimitive` not found. + +- [ ] **Step 3: Create HarnessPrimitive** + +Create `src/cli/primitives/HarnessPrimitive.ts`. Key differences from Draft 1: + +- `add()` creates harness directory via `HarnessIO.scaffoldHarness()`, then adds `HarnessRef` to `agentcore.json` +- `remove()` removes `HarnessRef` from `agentcore.json` AND deletes `agentcore/harnesses//` directory +- Memory resources are preserved on remove +- Default memory is long-term (SEMANTIC + SUMMARIZATION) — when adding a harness with memory enabled, also add a memory + resource to `agentcore.json` if not already present +- `systemPrompt` is written to `system-prompt.md`, referenced by path in `harness.json` +- `registerCommands()` adds `--skills` flag + +The implementation follows `AgentPrimitive` patterns exactly. The `add()` method: + +1. Reads project spec +2. Checks for duplicate harness name +3. Builds `HarnessConfig` from options +4. Calls `harnessIO.scaffoldHarness(name, config, systemPromptText)` to create the directory +5. Adds `{name, path: harnessIO.getRelativeHarnessPath(name)}` to `spec.harnesses` +6. If memory enabled (default), adds a memory resource to `spec.memories` if not present +7. Writes updated project spec + +- [ ] **Step 4: Run test to verify it passes** + +Run: +`cd /Volumes/workplace/agentcore/agentcore-gh/private/private-agentcore-cli-staging && npx vitest run src/cli/primitives/__tests__/HarnessPrimitive.test.ts` + +Expected: PASS. + +- [ ] **Step 5: Register in registry.ts** + +Modify `src/cli/primitives/registry.ts`: + +```typescript +import { HarnessPrimitive } from './HarnessPrimitive'; + +export const harnessPrimitive = new HarnessPrimitive(); +``` + +Add `harnessPrimitive` to `ALL_PRIMITIVES` after `agentPrimitive`. + +- [ ] **Step 6: Run typecheck** + +Run: `cd /Volumes/workplace/agentcore/agentcore-gh/private/private-agentcore-cli-staging && npm run typecheck` + +- [ ] **Step 7: Commit** + +```bash +git add src/cli/primitives/HarnessPrimitive.ts src/cli/primitives/__tests__/HarnessPrimitive.test.ts src/cli/primitives/registry.ts +git commit -m "$(cat <<'EOF' +feat: add HarnessPrimitive with file-per-harness lifecycle + +Creates agentcore/harnesses// directory with harness.json + +system-prompt.md. Adds HarnessRef pointer to agentcore.json. Remove +deletes the harness directory but preserves memory resources. Default +memory is long-term (SEMANTIC + SUMMARIZATION). +EOF +)" +``` + +--- + +## Task 4: Add "Harness" to AddScreen + AddFlow + AddHarnessFlow TUI Wizard + +The TUI wizard follows the Draft 2 visual spec with breadcrumb progress (● Name → ○ Model → ...), system prompt step, +tools multi-select with shell/file_operations defaults, MCP sub-flow, advanced config group. + +**Files:** + +- Modify: `src/cli/tui/screens/add/AddScreen.tsx` +- Modify: `src/cli/tui/screens/add/AddFlow.tsx` +- Create: `src/cli/tui/screens/harness/types.ts` +- Create: `src/cli/tui/screens/harness/AddHarnessFlow.tsx` +- Create: `src/cli/tui/screens/harness/useAddHarness.ts` +- Create: `src/cli/tui/screens/harness/index.ts` + +- [ ] **Step 1: Create harness TUI types** + +Create `src/cli/tui/screens/harness/types.ts`. Key differences from Draft 1: + +- `AddHarnessStep` includes `'systemPrompt'`, `'modelId'`, `'mcpConfig'`, `'gatewayConfig'`, `'advanced'` +- `AddHarnessConfig` has `systemPrompt: string` (the text, not file path — hook writes it to `.md`) +- `HARNESS_MODEL_PROVIDER_OPTIONS` includes Gemini (4 providers) +- `HARNESS_MODEL_OPTIONS` per-provider (e.g., Bedrock: Sonnet 4.6, Sonnet 4.5, Haiku 4.5, Opus 4.5) +- `HARNESS_TOOL_OPTIONS` includes shell + file_operations as defaults, plus Browser, Code Interpreter, Remote MCP, + Gateway +- `HARNESS_MEMORY_OPTIONS` defaults to long-term (longTerm is first/pre-selected) +- `HARNESS_ADVANCED_OPTIONS`: container, VPC, execution limits, memory override, truncation, skills + +```typescript +import type { HarnessToolType } from '../../../../schema'; + +export type AddHarnessStep = + | 'name' + | 'modelProvider' + | 'modelId' + | 'apiKey' + | 'systemPrompt' + | 'tools' + | 'mcpConfig' + | 'gatewayConfig' + | 'memory' + | 'advanced' + | 'confirm'; + +export interface AddHarnessConfig { + name: string; + modelProvider: 'bedrock' | 'anthropic' | 'openai' | 'gemini'; + modelId: string; + apiKey?: string; + systemPrompt: string; + tools: HarnessToolType[]; + mcpServers: Array<{ name: string; url: string }>; + gatewayName?: string; + gatewayArn?: string; + memory: 'none' | 'longTerm'; + containerMode: 'none' | 'dockerfile' | 'uri'; + dockerfile?: string; + containerUri?: string; +} + +export const HARNESS_MODEL_PROVIDER_OPTIONS = [ + { id: 'bedrock', title: 'Bedrock', description: 'AWS-managed models (default)' }, + { id: 'anthropic', title: 'Anthropic', description: 'Direct API' }, + { id: 'openai', title: 'OpenAI', description: 'OpenAI API' }, + { id: 'gemini', title: 'Gemini', description: 'Google Gemini API' }, +] as const; + +export const HARNESS_BEDROCK_MODEL_OPTIONS = [ + { id: 'us.anthropic.claude-sonnet-4-6-20250514-v1:0', title: 'Claude Sonnet 4.6 (recommended)' }, + { id: 'us.anthropic.claude-sonnet-4-5-20250514-v1:0', title: 'Claude Sonnet 4.5' }, + { id: 'us.anthropic.claude-haiku-4-5-20251001-v1:0', title: 'Claude Haiku 4.5' }, + { id: 'us.anthropic.claude-opus-4-5-20250514-v1:0', title: 'Claude Opus 4.5' }, +] as const; + +export const HARNESS_TOOL_OPTIONS = [ + { id: 'shell', title: 'Shell', description: 'Execute shell commands (default)', defaultEnabled: true }, + { id: 'file_operations', title: 'File operations', description: 'Read/write files (default)', defaultEnabled: true }, + { id: 'agentcore_browser', title: 'AgentCore Browser', description: 'Web browsing & automation' }, + { id: 'agentcore_code_interpreter', title: 'Code Interpreter', description: 'Sandboxed code execution' }, + { id: 'remote_mcp', title: 'Remote MCP Server', description: 'Connect to an MCP server' }, + { id: 'agentcore_gateway', title: 'AgentCore Gateway', description: 'Connect via gateway' }, +] as const; + +export const HARNESS_MEMORY_OPTIONS = [ + { + id: 'longTerm', + title: 'Long-term memory (default)', + description: 'SEMANTIC + SUMMARIZATION, persistent across sessions', + }, + { id: 'none', title: 'No memory', description: 'Stateless conversations' }, +] as const; + +export const ADD_HARNESS_STEP_LABELS: Record = { + name: 'Name', + modelProvider: 'Model', + modelId: 'Model', + apiKey: 'API Key', + systemPrompt: 'Prompt', + tools: 'Tools', + mcpConfig: 'Tools', + gatewayConfig: 'Tools', + memory: 'Memory', + advanced: 'Advanced', + confirm: 'Confirm', +}; + +export const DEFAULT_SYSTEM_PROMPT = 'You are a helpful assistant.'; +``` + +- [ ] **Step 2: Create useAddHarness hook** + +Create `src/cli/tui/screens/harness/useAddHarness.ts`. Key differences from Draft 1: + +- Uses `HarnessIO` to scaffold the harness directory (writes `harness.json` + `system-prompt.md`) +- Adds `HarnessRef` (`{name, path}`) to `agentcore.json`, not the full config +- If memory is `'longTerm'`, auto-creates a memory resource (e.g., `"-memory"`) with SEMANTIC + SUMMARIZATION + strategies in `spec.memories` and sets `memory.name` in `harness.json` + +- [ ] **Step 3: Create AddHarnessFlow component** + +Create `src/cli/tui/screens/harness/AddHarnessFlow.tsx`. Key differences from Draft 1: + +- Breadcrumb progress bar: `● Name → ○ Model → ○ Prompt → ○ Tools → ○ Memory → ○ Advanced → ○ Confirm` +- After model provider selection, shows model ID sub-step (e.g., Bedrock model list) +- API key step appears for non-Bedrock providers +- System prompt step: text input with default "You are a helpful assistant." and hint that `system-prompt.md` will be + created +- Tools step: multi-select with Space toggle, shell + file_operations pre-checked +- If Remote MCP Server selected: MCP name + URL sub-flow with "Add another?" option +- If Gateway selected: select from project gateways or enter ARN +- Memory defaults to Long-term (pre-selected first item) +- Advanced step: checkboxes for optional config (container, VPC, limits, memory override, truncation) +- Confirm step: review panel showing all selections including system prompt file path + +- [ ] **Step 4: Create barrel export** + +Create `src/cli/tui/screens/harness/index.ts`: + +```typescript +export { AddHarnessFlow } from './AddHarnessFlow'; +export { useAddHarness } from './useAddHarness'; +export type { AddHarnessConfig, AddHarnessStep } from './types'; +``` + +- [ ] **Step 5: Add "Harness" to AddScreen** + +Modify `src/cli/tui/screens/add/AddScreen.tsx` — add after `agent`: + +```typescript + { id: 'harness', title: 'Harness', description: 'Managed agent loop (configure model + tools)' }, +``` + +- [ ] **Step 6: Wire harness-wizard into AddFlow** + +Modify `src/cli/tui/screens/add/AddFlow.tsx`: + +- Add `harness-wizard` and `harness-success` to `FlowState` +- Route `'harness'` resource type to `harness-wizard` +- Success screen shows created files: `agentcore/harnesses//harness.json` and `system-prompt.md` + +- [ ] **Step 7: Run typecheck + lint** + +Run: +`cd /Volumes/workplace/agentcore/agentcore-gh/private/private-agentcore-cli-staging && npm run typecheck && npm run lint` + +- [ ] **Step 8: Commit** + +```bash +git add src/cli/tui/screens/harness/ src/cli/tui/screens/add/AddScreen.tsx src/cli/tui/screens/add/AddFlow.tsx +git commit -m "$(cat <<'EOF' +feat: add Harness TUI wizard with file-per-harness scaffolding + +Breadcrumb progress bar (Name → Model → Prompt → Tools → Memory → +Advanced → Confirm). Creates agentcore/harnesses// with +harness.json + system-prompt.md. Long-term memory default. Tools +multi-select with shell + file_operations pre-checked. +EOF +)" +``` + +--- + +## Task 5: Create Command — `--template harness` + +Config-only project scaffolding. No `app/` directory by default. Optional `--with-invoke-script` flag. + +**Files:** + +- Modify: `src/cli/commands/create/command.tsx` +- Modify: `src/cli/commands/create/action.ts` +- Modify: `src/cli/commands/create/types.ts` +- Modify: `src/cli/tui/screens/create/CreateScreen.tsx` + +- [ ] **Step 1: Add `template` and `withInvokeScript` to CreateOptions** + +Modify `src/cli/commands/create/types.ts`: + +```typescript + template?: 'agent' | 'harness'; + withInvokeScript?: boolean; +``` + +- [ ] **Step 2: Add `--template` and `--with-invoke-script` flags** + +Modify `src/cli/commands/create/command.tsx`: + +```typescript + .option('--template