From 677ab958a1d075cf9bab0a9b1f045d1177cdf4a0 Mon Sep 17 00:00:00 2001 From: Tejas Kashinath Date: Tue, 5 May 2026 15:02:20 -0400 Subject: [PATCH 1/5] test: per-directory coverage thresholds + E2E cleanup hardening Day 5: Coverage gate - Add scripts/check-coverage.mjs with ratchet thresholds per source directory (schema 78%, operations 52%, lib 82%, aws 40%, tui/hooks 14%, tui/components 68%, commands 46%). Target thresholds documented in the report, ratchet set ~5% below current to prevent regression. - Also check PR changed-line coverage (>=50%) and post a report comment. - Switch davelosert/vitest-coverage-report-action to file-coverage-mode: changes so PR comments highlight uncovered new lines. Day 6: E2E cleanup - teardownE2EProject() now retries deploy up to 3x and throws on failure instead of swallowing silently. The remove step logs but does not retry. - Tag CDK stacks with Environment=e2e-test / CreatedAt / CreatedBy when AGENTCORE_E2E_TEST=1 is set. The CLI's e2e helper sets this env var; user projects are unaffected. - Shrink cleanupStaleCredentialProviders() window from 30min -> 5min to catch orphans faster. - byo-custom-jwt.test.ts Cognito cleanup now retries 3x with 5s delay instead of swallowing errors on first try. - Add docs/test-resource-sweeper-spec.md for the infra team to implement a scheduled sweeper (CFN stacks, credential providers, log groups, ECR). --- .github/workflows/build-and-test.yml | 19 +- docs/test-resource-sweeper-spec.md | 115 +++++++++++ e2e-tests/byo-custom-jwt.test.ts | 55 ++++-- e2e-tests/e2e-helper.ts | 45 ++++- scripts/check-coverage.mjs | 281 +++++++++++++++++++++++++++ src/assets/cdk/bin/cdk.ts | 18 +- 6 files changed, 503 insertions(+), 30 deletions(-) create mode 100644 docs/test-resource-sweeper-spec.md create mode 100644 scripts/check-coverage.mjs diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 80b17f987..f083e8cdc 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -134,16 +134,29 @@ jobs: steps: - uses: actions/checkout@v6 + with: + fetch-depth: 0 + - uses: actions/setup-node@v6 + with: + node-version: 20.x + cache: 'npm' - name: Download coverage artifact uses: actions/download-artifact@v8 with: name: coverage-report path: coverage/ - - name: Coverage Report + - name: Coverage Report (PR comment) uses: davelosert/vitest-coverage-report-action@v2 with: json-summary-path: coverage/coverage-summary.json json-final-path: coverage/coverage-final.json vite-config-path: vitest.config.ts - file-coverage-mode: none - coverage-thresholds: '{ "lines": 50, "branches": 50, "functions": 50, "statements": 50 }' + file-coverage-mode: changes + - name: Per-directory coverage gate + env: + BASE_SHA: ${{ github.event.pull_request.base.sha }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + PR_NUMBER: ${{ github.event.pull_request.number }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + run: node scripts/check-coverage.mjs diff --git a/docs/test-resource-sweeper-spec.md b/docs/test-resource-sweeper-spec.md new file mode 100644 index 000000000..38dbea4ef --- /dev/null +++ b/docs/test-resource-sweeper-spec.md @@ -0,0 +1,115 @@ +# E2E Test Resource Sweeper — Specification + +## Purpose + +The e2e test suite provisions real AWS resources: CloudFormation stacks, credential providers, CloudWatch log groups, ECR images, and S3 artifacts. When a test crashes, times out, or is cancelled mid-run, its `afterAll` teardown may not execute, leaving orphaned resources behind. + +Orphan cost estimates from the April 2026 audit put sustained leaks at $2.5k-$10k/week depending on which stacks escape (runtimes, memory, credential providers). Teardown is now fatal on failure (PR bundled with this spec), but belt-and-suspenders dictates a scheduled sweeper that catches anything slipping through. + +This document specifies what a daily sweeper must do. The infra team owns implementation. + +## Scope + +The sweeper runs on a schedule (daily at 04:00 UTC recommended) in the same AWS account used by CI. It identifies and deletes resources that meet all three criteria: + +1. Match a well-known e2e naming or tagging pattern. +2. Created more than N hours ago (N=4 recommended — longer than the longest e2e run). +3. Are not part of an active workflow run (check GitHub Actions API for running e2e jobs). + +Dry-run mode must print what would be deleted without acting. Default to dry-run for the first two weeks. + +## Resources to Sweep + +### 1. CloudFormation Stacks + +**Identification:** Stacks tagged with `Environment=e2e-test` (this PR adds the tag via `AGENTCORE_E2E_TEST=1` env var in `src/assets/cdk/bin/cdk.ts`). Also match by stack name prefix `AgentCore-E2e*` as a fallback for older stacks. + +**Action:** `DeleteStack`. Watch for `DELETE_FAILED` and surface to the on-call rotation — some failures (S3 buckets with objects, ENIs held by Lambda) need manual intervention. + +**API calls:** +- `cloudformation:DescribeStacks` (paginate, filter by tag) +- `cloudformation:DeleteStack` +- `cloudformation:DescribeStackEvents` (on DELETE_FAILED for context) + +### 2. API Key Credential Providers + +**Identification:** Providers named with the `E2e` prefix and `createdTime` older than N hours. + +**Action:** `DeleteApiKeyCredentialProvider`. Silent failures acceptable — the CLI already logs these via `cleanupStaleCredentialProviders()`. + +**API calls:** +- `bedrock-agentcore-control:ListApiKeyCredentialProviders` (paginate) +- `bedrock-agentcore-control:DeleteApiKeyCredentialProvider` + +### 3. CloudWatch Log Groups + +**Identification:** Log groups under `/aws/bedrock-agentcore/runtimes/E2e*` and `/aws/codebuild/AgentCore-E2e*`. + +**Action:** `DeleteLogGroup` for groups older than N hours. Alternatively, set a retention policy of 3 days on all matched groups and let CloudWatch expire data. Retention is safer — deletion drops diagnostic context if someone is debugging a test failure. + +**API calls:** +- `logs:DescribeLogGroups` (filter by prefix, paginate) +- `logs:PutRetentionPolicy` (preferred) or `logs:DeleteLogGroup` + +### 4. ECR Repositories + +**Identification:** Repositories tagged with `Environment=e2e-test` or named with the `agentcore-e2e-*` prefix. + +**Action:** Delete images older than N hours. Keep the repository itself — CDK recreates images on every deploy, so repo deletion causes churn. Image cleanup is sufficient. + +**API calls:** +- `ecr:DescribeRepositories` (filter by tag or name) +- `ecr:ListImages` +- `ecr:BatchDeleteImage` + +### 5. S3 (CDK Bootstrap Bucket) + +**Identification:** The bootstrap bucket (`cdk-*-assets-*`) is shared across all deploys in the account. Don't delete the bucket or its tagged objects — CDK uses content-hashed object keys and expects them to persist. + +**Recommendation:** Apply an S3 lifecycle policy to the bootstrap bucket: transition objects to Intelligent-Tiering after 30 days, expire non-current versions after 90 days. Do this once via Terraform/CLI, not via the sweeper. + +## Workflow Structure + +GitHub Actions workflow (`.github/workflows/e2e-sweeper.yml`) with: + +```yaml +on: + schedule: + - cron: '0 4 * * *' # Daily at 04:00 UTC + workflow_dispatch: + inputs: + dry-run: + type: boolean + default: true +``` + +Permissions: use the same OIDC role that e2e tests use, but with delete permissions for the resources above. Store the role ARN in `AWS_E2E_SWEEPER_ROLE_ARN`. + +Steps: + +1. Configure AWS credentials (OIDC). +2. Check for running e2e jobs via `gh api /repos/aws/agentcore-cli/actions/runs?status=in_progress`. If any e2e workflow is running, skip the sweep (or narrow age threshold to 24 hours). +3. Run sweep script (Node or Python) against each resource category. +4. Post a summary to a Slack channel (resource counts deleted per category, failures). +5. On any resource with repeated delete failures (>3 runs in a row), open a GitHub issue. + +## Safety Rails + +- **Hard age floor:** never delete anything younger than 2 hours, even if the script says to. +- **Account allow-list:** the script must fail closed if `AWS_ACCOUNT_ID` is not in the expected list (CI account only). +- **Kill switch:** check for a `SWEEPER_DISABLED` repo variable before running. On-call can flip this if the sweeper misbehaves. +- **Rate limits:** cap deletes per category at 50 per run to avoid runaway behavior. + +## Implementation Order + +1. Start with CloudFormation stack sweeping (highest $ impact). Run in dry-run for one week. +2. Add credential provider sweeping (already scoped by prefix, low risk). +3. Add log group retention policies (set-and-forget, no scheduled action needed). +4. Add ECR image cleanup (low $ impact; deferrable). +5. Enable live deletes after two weeks of clean dry-run output. + +## References + +- CDK stack tagging: `src/assets/cdk/bin/cdk.ts` (tags applied when `AGENTCORE_E2E_TEST=1`) +- Credential provider cleanup: `e2e-tests/e2e-helper.ts#cleanupStaleCredentialProviders` +- E2E teardown: `e2e-tests/e2e-helper.ts#teardownE2EProject` (throws on repeated failure as of this PR) diff --git a/e2e-tests/byo-custom-jwt.test.ts b/e2e-tests/byo-custom-jwt.test.ts index 64e534e20..ddc9ddcaa 100644 --- a/e2e-tests/byo-custom-jwt.test.ts +++ b/e2e-tests/byo-custom-jwt.test.ts @@ -48,7 +48,36 @@ const region = process.env.AWS_REGION ?? 'us-east-1'; * Run the local CLI build without skipping install (needed for deploy). */ function runLocalCLI(args: string[], cwd: string): Promise { - return runCLI(args, cwd, { skipInstall: false }); + return runCLI(args, cwd, { + skipInstall: false, + env: { + AGENTCORE_E2E_TEST: '1', + AGENTCORE_E2E_CREATOR: process.env.AGENTCORE_E2E_CREATOR ?? 'github-actions', + }, + }); +} + +async function deleteCognitoResourceWithRetry( + label: string, + op: () => Promise, + attempts = 3, + delayMs = 5000 +): Promise { + for (let attempt = 1; attempt <= attempts; attempt++) { + try { + await op(); + return; + } catch (err) { + const name = (err as { name?: string }).name ?? 'Unknown'; + const msg = (err as { message?: string }).message ?? String(err); + if (attempt === attempts) { + console.error(`[cognito-cleanup] ${label} failed after ${attempts} attempts: [${name}] ${msg}`); + return; + } + console.warn(`[cognito-cleanup] ${label} attempt ${attempt}/${attempts} failed: [${name}] — retrying`); + await new Promise(resolve => setTimeout(resolve, delayMs)); + } + } } describe.sequential('e2e: BYO agent with CUSTOM_JWT auth', () => { @@ -201,21 +230,15 @@ describe.sequential('e2e: BYO agent with CUSTOM_JWT auth', () => { // ── Delete Cognito resources ── if (userPoolId) { - try { - await cognitoClient.send(new DeleteResourceServerCommand({ UserPoolId: userPoolId, Identifier: 'agentcore' })); - } catch { - /* best-effort */ - } - try { - await cognitoClient.send(new DeleteUserPoolDomainCommand({ UserPoolId: userPoolId, Domain: domainPrefix })); - } catch { - /* best-effort */ - } - try { - await cognitoClient.send(new DeleteUserPoolCommand({ UserPoolId: userPoolId })); - } catch { - /* best-effort */ - } + await deleteCognitoResourceWithRetry('DeleteResourceServer', () => + cognitoClient.send(new DeleteResourceServerCommand({ UserPoolId: userPoolId, Identifier: 'agentcore' })) + ); + await deleteCognitoResourceWithRetry('DeleteUserPoolDomain', () => + cognitoClient.send(new DeleteUserPoolDomainCommand({ UserPoolId: userPoolId, Domain: domainPrefix })) + ); + await deleteCognitoResourceWithRetry('DeleteUserPool', () => + cognitoClient.send(new DeleteUserPoolCommand({ UserPoolId: userPoolId })) + ); } // ── Clean up temp directory ── diff --git a/e2e-tests/e2e-helper.ts b/e2e-tests/e2e-helper.ts index e0f6a51cc..85f889d15 100644 --- a/e2e-tests/e2e-helper.ts +++ b/e2e-tests/e2e-helper.ts @@ -308,7 +308,10 @@ export function createE2ESuite(cfg: E2EConfig) { export { hasAws, baseCanRun }; export function runAgentCoreCLI(args: string[], cwd: string): Promise { - return spawnAndCollect('agentcore', args, cwd); + return spawnAndCollect('agentcore', args, cwd, { + AGENTCORE_E2E_TEST: '1', + AGENTCORE_E2E_CREATOR: process.env.AGENTCORE_E2E_CREATOR ?? 'github-actions', + }); } // TODO: Replace with `agentcore add target` once the CLI command is re-introduced @@ -347,7 +350,7 @@ async function deleteCredentialProvider(client: BedrockAgentCoreControlClient, n * Runs in beforeAll to prevent accumulation from previous runs that * crashed or timed out before their afterAll teardown could execute. */ -export async function cleanupStaleCredentialProviders(maxAgeMs: number = 30 * 60 * 1000): Promise { +export async function cleanupStaleCredentialProviders(maxAgeMs: number = 5 * 60 * 1000): Promise { const region = process.env.AWS_REGION ?? 'us-east-1'; const client = new BedrockAgentCoreControlClient({ region }); const cutoff = new Date(Date.now() - maxAgeMs); @@ -365,17 +368,45 @@ export async function cleanupStaleCredentialProviders(maxAgeMs: number = 30 * 60 } export async function teardownE2EProject(projectPath: string, agentName: string, modelProvider: string): Promise { - await spawnAndCollect('agentcore', ['remove', 'all', '--json'], projectPath); - const result = await spawnAndCollect('agentcore', ['deploy', '--yes', '--json'], projectPath); - if (result.exitCode !== 0) { - console.log('Teardown stdout:', result.stdout); - console.log('Teardown stderr:', result.stderr); + const failures: string[] = []; + + const removeResult = await runAgentCoreCLI(['remove', 'all', '--json'], projectPath); + if (removeResult.exitCode !== 0) { + console.error(`[teardown] remove all failed (exit ${removeResult.exitCode})`); + console.error('[teardown] remove stdout:', removeResult.stdout); + console.error('[teardown] remove stderr:', removeResult.stderr); + failures.push(`remove all: exit ${removeResult.exitCode}`); + } + + const MAX_DEPLOY_ATTEMPTS = 3; + let deploySucceeded = false; + for (let attempt = 1; attempt <= MAX_DEPLOY_ATTEMPTS; attempt++) { + const result = await runAgentCoreCLI(['deploy', '--yes', '--json'], projectPath); + if (result.exitCode === 0) { + deploySucceeded = true; + if (attempt > 1) console.error(`[teardown] deploy succeeded on attempt ${attempt}`); + break; + } + console.error(`[teardown] deploy attempt ${attempt}/${MAX_DEPLOY_ATTEMPTS} failed (exit ${result.exitCode})`); + console.error('[teardown] deploy stdout:', result.stdout); + console.error('[teardown] deploy stderr:', result.stderr); + if (attempt < MAX_DEPLOY_ATTEMPTS) { + await new Promise(resolve => setTimeout(resolve, 15000)); + } } + if (!deploySucceeded) { + failures.push(`deploy teardown failed after ${MAX_DEPLOY_ATTEMPTS} attempts`); + } + if (modelProvider !== 'Bedrock' && agentName) { const region = process.env.AWS_REGION ?? 'us-east-1'; const client = new BedrockAgentCoreControlClient({ region }); await deleteCredentialProvider(client, `${agentName}${modelProvider}`); } + + if (failures.length > 0) { + throw new Error(`E2E teardown failed: ${failures.join('; ')}`); + } } export async function dumpImportDebugInfo( diff --git a/scripts/check-coverage.mjs b/scripts/check-coverage.mjs new file mode 100644 index 000000000..89ec92c20 --- /dev/null +++ b/scripts/check-coverage.mjs @@ -0,0 +1,281 @@ +#!/usr/bin/env node + +/** + * Coverage gate for agentcore-cli PRs. + * + * 1. Reads the vitest json-summary coverage report for overall stats. + * 2. Computes per-directory line coverage and fails if any directory is + * below its ratchet threshold. + * 3. Reads the vitest json report for per-line data, then checks exactly + * which PR-changed lines are covered. Fails if < PR_LINES_THRESHOLD + * of changed production lines are covered. + * 4. Posts a coverage summary comment on the PR (if running in GitHub Actions). + * + * Ratchet thresholds are set ~5% below current coverage to prevent + * regression without demanding immediate improvement. Target thresholds + * (noted next to each entry) represent what the directory *should* + * eventually reach. + */ +import { execSync } from 'child_process'; +import { readFileSync } from 'fs'; + +const PR_LINES_THRESHOLD = 50; + +/** + * Per-directory line coverage thresholds. + * Entries are checked longest-prefix-first so that nested directories take + * precedence over their parents. + */ +// Ratchet thresholds set ~5% below current to catch regressions without +// demanding immediate improvement. Raise when coverage improves. +const DIRECTORY_THRESHOLDS = [ + { prefix: 'src/schema/', threshold: 78, target: 85 }, + { prefix: 'src/cli/operations/', threshold: 52, target: 65 }, + { prefix: 'src/lib/', threshold: 82, target: 85 }, + { prefix: 'src/cli/aws/', threshold: 40, target: 60 }, + { prefix: 'src/cli/tui/hooks/', threshold: 14, target: 55 }, + { prefix: 'src/cli/tui/components/', threshold: 68, target: 75 }, + { prefix: 'src/cli/commands/', threshold: 46, target: 60 }, +]; + +// --------------------------------------------------------------------------- +// 1. Read overall coverage from json-summary +// --------------------------------------------------------------------------- + +const summary = JSON.parse(readFileSync('coverage/coverage-summary.json', 'utf8')); +const overall = summary.total; + +const stmtPct = overall.statements.pct; +const branchPct = overall.branches.pct; +const funcPct = overall.functions.pct; +const linePct = overall.lines.pct; + +// --------------------------------------------------------------------------- +// 2. Compute per-directory coverage from json-summary +// --------------------------------------------------------------------------- + +const cwd = process.cwd(); +const dirStats = DIRECTORY_THRESHOLDS.map(d => ({ ...d, total: 0, covered: 0 })); + +for (const [absPath, fileSummary] of Object.entries(summary)) { + if (absPath === 'total') continue; + const rel = absPath.startsWith(cwd) ? absPath.slice(cwd.length + 1) : absPath; + + // Longest-prefix match: iterate from most specific to least specific. + // DIRECTORY_THRESHOLDS is already ordered with nested paths before parents + // for tui/hooks and tui/components, but we sort by length to be safe. + const match = [...dirStats] + .sort((a, b) => b.prefix.length - a.prefix.length) + .find(d => rel.startsWith(d.prefix)); + if (!match) continue; + + match.total += fileSummary.lines.total; + match.covered += fileSummary.lines.covered; +} + +const dirResults = dirStats.map(d => { + const pct = d.total > 0 ? (d.covered / d.total) * 100 : 100; + const pass = pct >= d.threshold; + return { ...d, pct, pass }; +}); + +const allDirsPass = dirResults.every(d => d.pass); + +// --------------------------------------------------------------------------- +// 3. Read per-line coverage from json report +// --------------------------------------------------------------------------- + +const detail = JSON.parse(readFileSync('coverage/coverage-final.json', 'utf8')); + +const coveredLinesByFile = {}; + +for (const [absPath, fileCov] of Object.entries(detail)) { + const rel = absPath.startsWith(cwd) ? absPath.slice(cwd.length + 1) : absPath; + const covered = new Set(); + + for (const [id, count] of Object.entries(fileCov.s)) { + if (count > 0) { + const loc = fileCov.statementMap[id]; + for (let line = loc.start.line; line <= loc.end.line; line++) { + covered.add(line); + } + } + } + + coveredLinesByFile[rel] = covered; +} + +// --------------------------------------------------------------------------- +// 4. Compute PR-changed-line coverage +// --------------------------------------------------------------------------- + +const baseSha = process.env.BASE_SHA; +const headSha = process.env.HEAD_SHA; + +let prLinesCovered = 0; +let prLinesTotal = 0; +let prLinePct = 100; +const fileDetails = []; + +if (baseSha && headSha) { + const diffOutput = execSync( + `git diff ${baseSha}...${headSha} --unified=0 --diff-filter=AM -- 'src/**/*.ts' 'src/**/*.tsx'`, + { encoding: 'utf8' } + ); + + const changedLines = parseDiffForAddedLines(diffOutput); + + for (const [file, lines] of Object.entries(changedLines)) { + // Skip test files and excluded paths + if ( + file.includes('/__tests__/') || + file.endsWith('.test.ts') || + file.endsWith('.test.tsx') || + file.startsWith('src/assets/') || + file.startsWith('src/test-utils/') || + file.endsWith('.d.ts') + ) { + continue; + } + + const coveredSet = coveredLinesByFile[file]; + if (!coveredSet) continue; + + let fileCovered = 0; + for (const line of lines) { + if (coveredSet.has(line)) fileCovered++; + } + + prLinesTotal += lines.length; + prLinesCovered += fileCovered; + const pct = lines.length > 0 ? ((fileCovered / lines.length) * 100).toFixed(1) : '100.0'; + fileDetails.push({ file, changed: lines.length, covered: fileCovered, pct }); + } + + prLinePct = prLinesTotal > 0 ? (prLinesCovered / prLinesTotal) * 100 : 100; +} + +const prPass = prLinePct >= PR_LINES_THRESHOLD; + +// --------------------------------------------------------------------------- +// 5. Build report +// --------------------------------------------------------------------------- + +const allPass = allDirsPass && prPass; + +let report = `## Test Coverage Report\n\n`; +report += `### Overall\n\n`; +report += `| Metric | Coverage |\n`; +report += `|--------|----------|\n`; +report += `| Statements | ${stmtPct}% |\n`; +report += `| Branches | ${branchPct}% |\n`; +report += `| Functions | ${funcPct}% |\n`; +report += `| Lines | ${linePct}% |\n`; + +report += `\n### Per-Directory Line Coverage\n\n`; +report += `| Directory | Coverage | Ratchet | Target | Status |\n`; +report += `|-----------|----------|---------|--------|--------|\n`; +for (const d of dirResults) { + const totalDisplay = d.total > 0 ? `${d.pct.toFixed(1)}% (${d.covered}/${d.total})` : 'n/a'; + report += `| \`${d.prefix}\` | ${totalDisplay} | ${d.threshold}% | ${d.target}% | ${d.pass ? 'PASS' : 'FAIL'} |\n`; +} + +report += `\n### PR Changed Lines\n\n`; +report += `**${prLinePct.toFixed(1)}%** (${prLinesCovered}/${prLinesTotal}) — threshold ${PR_LINES_THRESHOLD}% — ${prPass ? 'PASS' : 'FAIL'}\n`; + +if (fileDetails.length > 0) { + report += `\n
Changed file coverage\n\n`; + report += `| File | Changed Lines | Covered | Coverage |\n`; + report += `|------|---------------|---------|----------|\n`; + for (const f of fileDetails) { + report += `| \`${f.file}\` | ${f.changed} | ${f.covered} | ${f.pct}% |\n`; + } + report += `\n
\n`; +} + +console.log(report); + +// --------------------------------------------------------------------------- +// 6. Post comment on PR +// --------------------------------------------------------------------------- + +const prNumber = process.env.PR_NUMBER; +const token = process.env.GITHUB_TOKEN; +const repo = process.env.GITHUB_REPOSITORY; + +if (prNumber && token && repo) { + const marker = ''; + const body = `${marker}\n${report}`; + const ghEnv = { ...process.env, GH_TOKEN: token }; + + try { + const commentsRaw = execSync(`gh api repos/${repo}/issues/${prNumber}/comments --paginate`, { + encoding: 'utf8', + env: ghEnv, + }); + const comments = JSON.parse(commentsRaw); + const existing = comments.find(c => c.body && c.body.startsWith(marker)); + + if (existing) { + execSync(`gh api repos/${repo}/issues/comments/${existing.id} -X PATCH --input -`, { + input: JSON.stringify({ body }), + encoding: 'utf8', + env: ghEnv, + }); + } else { + execSync(`gh api repos/${repo}/issues/${prNumber}/comments --input -`, { + input: JSON.stringify({ body }), + encoding: 'utf8', + env: ghEnv, + }); + } + console.log('Coverage comment posted to PR.'); + } catch (e) { + console.warn('Failed to post PR comment:', e.message); + } +} + +// --------------------------------------------------------------------------- +// 7. Exit +// --------------------------------------------------------------------------- + +if (!allPass) { + const failures = []; + for (const d of dirResults) { + if (!d.pass) failures.push(`${d.prefix} ${d.pct.toFixed(1)}% < ${d.threshold}%`); + } + if (!prPass) failures.push(`PR changed-line coverage ${prLinePct.toFixed(1)}% < ${PR_LINES_THRESHOLD}%`); + console.error(`\nCoverage check FAILED:\n - ${failures.join('\n - ')}`); + process.exit(1); +} + +console.log('\nCoverage check PASSED.'); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function parseDiffForAddedLines(diffOutput) { + const result = {}; + let currentFile = null; + + for (const line of diffOutput.split('\n')) { + const fileMatch = line.match(/^\+\+\+ b\/(.+)$/); + if (fileMatch) { + currentFile = fileMatch[1]; + if (!result[currentFile]) result[currentFile] = []; + continue; + } + + const hunkMatch = line.match(/^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@/); + if (hunkMatch && currentFile) { + const start = parseInt(hunkMatch[1], 10); + const count = parseInt(hunkMatch[2] ?? '1', 10); + for (let i = start; i < start + count; i++) { + result[currentFile].push(i); + } + } + } + + return result; +} diff --git a/src/assets/cdk/bin/cdk.ts b/src/assets/cdk/bin/cdk.ts index 7a78b71cd..1969e7c5d 100644 --- a/src/assets/cdk/bin/cdk.ts +++ b/src/assets/cdk/bin/cdk.ts @@ -69,16 +69,26 @@ async function main() { | Record | undefined; + const tags: Record = { + 'agentcore:project-name': spec.name, + 'agentcore:target-name': target.name, + }; + + // Opt-in e2e test tagging via env vars. Used by the CLI's own e2e suite + // so an infra sweeper can identify and delete orphaned test stacks. + if (process.env.AGENTCORE_E2E_TEST === '1') { + tags.Environment = 'e2e-test'; + tags.CreatedAt = new Date().toISOString(); + tags.CreatedBy = process.env.AGENTCORE_E2E_CREATOR ?? 'github-actions'; + } + new AgentCoreStack(app, stackName, { spec, mcpSpec, credentials, env, description: `AgentCore stack for ${spec.name} deployed to ${target.name} (${target.region})`, - tags: { - 'agentcore:project-name': spec.name, - 'agentcore:target-name': target.name, - }, + tags, }); } From 2fffb14c62c601ac9d82f209256cd50b44716908 Mon Sep 17 00:00:00 2001 From: Tejas Kashinath Date: Tue, 5 May 2026 15:14:14 -0400 Subject: [PATCH 2/5] docs: remove test-resource-sweeper-spec --- docs/test-resource-sweeper-spec.md | 115 ----------------------------- 1 file changed, 115 deletions(-) delete mode 100644 docs/test-resource-sweeper-spec.md diff --git a/docs/test-resource-sweeper-spec.md b/docs/test-resource-sweeper-spec.md deleted file mode 100644 index 38dbea4ef..000000000 --- a/docs/test-resource-sweeper-spec.md +++ /dev/null @@ -1,115 +0,0 @@ -# E2E Test Resource Sweeper — Specification - -## Purpose - -The e2e test suite provisions real AWS resources: CloudFormation stacks, credential providers, CloudWatch log groups, ECR images, and S3 artifacts. When a test crashes, times out, or is cancelled mid-run, its `afterAll` teardown may not execute, leaving orphaned resources behind. - -Orphan cost estimates from the April 2026 audit put sustained leaks at $2.5k-$10k/week depending on which stacks escape (runtimes, memory, credential providers). Teardown is now fatal on failure (PR bundled with this spec), but belt-and-suspenders dictates a scheduled sweeper that catches anything slipping through. - -This document specifies what a daily sweeper must do. The infra team owns implementation. - -## Scope - -The sweeper runs on a schedule (daily at 04:00 UTC recommended) in the same AWS account used by CI. It identifies and deletes resources that meet all three criteria: - -1. Match a well-known e2e naming or tagging pattern. -2. Created more than N hours ago (N=4 recommended — longer than the longest e2e run). -3. Are not part of an active workflow run (check GitHub Actions API for running e2e jobs). - -Dry-run mode must print what would be deleted without acting. Default to dry-run for the first two weeks. - -## Resources to Sweep - -### 1. CloudFormation Stacks - -**Identification:** Stacks tagged with `Environment=e2e-test` (this PR adds the tag via `AGENTCORE_E2E_TEST=1` env var in `src/assets/cdk/bin/cdk.ts`). Also match by stack name prefix `AgentCore-E2e*` as a fallback for older stacks. - -**Action:** `DeleteStack`. Watch for `DELETE_FAILED` and surface to the on-call rotation — some failures (S3 buckets with objects, ENIs held by Lambda) need manual intervention. - -**API calls:** -- `cloudformation:DescribeStacks` (paginate, filter by tag) -- `cloudformation:DeleteStack` -- `cloudformation:DescribeStackEvents` (on DELETE_FAILED for context) - -### 2. API Key Credential Providers - -**Identification:** Providers named with the `E2e` prefix and `createdTime` older than N hours. - -**Action:** `DeleteApiKeyCredentialProvider`. Silent failures acceptable — the CLI already logs these via `cleanupStaleCredentialProviders()`. - -**API calls:** -- `bedrock-agentcore-control:ListApiKeyCredentialProviders` (paginate) -- `bedrock-agentcore-control:DeleteApiKeyCredentialProvider` - -### 3. CloudWatch Log Groups - -**Identification:** Log groups under `/aws/bedrock-agentcore/runtimes/E2e*` and `/aws/codebuild/AgentCore-E2e*`. - -**Action:** `DeleteLogGroup` for groups older than N hours. Alternatively, set a retention policy of 3 days on all matched groups and let CloudWatch expire data. Retention is safer — deletion drops diagnostic context if someone is debugging a test failure. - -**API calls:** -- `logs:DescribeLogGroups` (filter by prefix, paginate) -- `logs:PutRetentionPolicy` (preferred) or `logs:DeleteLogGroup` - -### 4. ECR Repositories - -**Identification:** Repositories tagged with `Environment=e2e-test` or named with the `agentcore-e2e-*` prefix. - -**Action:** Delete images older than N hours. Keep the repository itself — CDK recreates images on every deploy, so repo deletion causes churn. Image cleanup is sufficient. - -**API calls:** -- `ecr:DescribeRepositories` (filter by tag or name) -- `ecr:ListImages` -- `ecr:BatchDeleteImage` - -### 5. S3 (CDK Bootstrap Bucket) - -**Identification:** The bootstrap bucket (`cdk-*-assets-*`) is shared across all deploys in the account. Don't delete the bucket or its tagged objects — CDK uses content-hashed object keys and expects them to persist. - -**Recommendation:** Apply an S3 lifecycle policy to the bootstrap bucket: transition objects to Intelligent-Tiering after 30 days, expire non-current versions after 90 days. Do this once via Terraform/CLI, not via the sweeper. - -## Workflow Structure - -GitHub Actions workflow (`.github/workflows/e2e-sweeper.yml`) with: - -```yaml -on: - schedule: - - cron: '0 4 * * *' # Daily at 04:00 UTC - workflow_dispatch: - inputs: - dry-run: - type: boolean - default: true -``` - -Permissions: use the same OIDC role that e2e tests use, but with delete permissions for the resources above. Store the role ARN in `AWS_E2E_SWEEPER_ROLE_ARN`. - -Steps: - -1. Configure AWS credentials (OIDC). -2. Check for running e2e jobs via `gh api /repos/aws/agentcore-cli/actions/runs?status=in_progress`. If any e2e workflow is running, skip the sweep (or narrow age threshold to 24 hours). -3. Run sweep script (Node or Python) against each resource category. -4. Post a summary to a Slack channel (resource counts deleted per category, failures). -5. On any resource with repeated delete failures (>3 runs in a row), open a GitHub issue. - -## Safety Rails - -- **Hard age floor:** never delete anything younger than 2 hours, even if the script says to. -- **Account allow-list:** the script must fail closed if `AWS_ACCOUNT_ID` is not in the expected list (CI account only). -- **Kill switch:** check for a `SWEEPER_DISABLED` repo variable before running. On-call can flip this if the sweeper misbehaves. -- **Rate limits:** cap deletes per category at 50 per run to avoid runaway behavior. - -## Implementation Order - -1. Start with CloudFormation stack sweeping (highest $ impact). Run in dry-run for one week. -2. Add credential provider sweeping (already scoped by prefix, low risk). -3. Add log group retention policies (set-and-forget, no scheduled action needed). -4. Add ECR image cleanup (low $ impact; deferrable). -5. Enable live deletes after two weeks of clean dry-run output. - -## References - -- CDK stack tagging: `src/assets/cdk/bin/cdk.ts` (tags applied when `AGENTCORE_E2E_TEST=1`) -- Credential provider cleanup: `e2e-tests/e2e-helper.ts#cleanupStaleCredentialProviders` -- E2E teardown: `e2e-tests/e2e-helper.ts#teardownE2EProject` (throws on repeated failure as of this PR) From 887161c69f2d01e40504ec3435602fcae581f855 Mon Sep 17 00:00:00 2001 From: Tejas Kashinath Date: Tue, 5 May 2026 15:51:44 -0400 Subject: [PATCH 3/5] fix: address review comments on Days 5-6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. cleanupStaleCredentialProviders default restored to 30 min, now configurable via E2E_STALE_CRED_MAX_AGE_MS env var. 5 min was too aggressive given container builds >5 min. 2. afterAll blocks that call teardownE2EProject now wrap in try/finally so testDir cleanup always runs even when teardown throws. Updated createE2ESuite + 5 standalone suites (ab-test-*, archive-lifecycle, config-bundle-eval-rec, evals-lifecycle). 3. vitest coverage include adds src/**/*.tsx so TUI React files are gated. Also exclude *.test.tsx from the coverage set. Adjusted src/cli/commands/ ratchet from 46% to 34% to reflect the new denominator (was 3424 lines, now 4553 with .tsx included). 4. check-coverage.mjs no longer silently skips changed files missing from coverage-final.json — counts them as 0% covered instead. --- e2e-tests/ab-test-config-bundle.test.ts | 9 ++++++--- e2e-tests/ab-test-target-based.test.ts | 9 ++++++--- e2e-tests/archive-lifecycle.test.ts | 9 ++++++--- e2e-tests/config-bundle-eval-rec.test.ts | 9 ++++++--- e2e-tests/e2e-helper.ts | 13 +++++++++---- e2e-tests/evals-lifecycle.test.ts | 9 ++++++--- scripts/check-coverage.mjs | 11 +++++++---- vitest.config.ts | 3 ++- 8 files changed, 48 insertions(+), 24 deletions(-) diff --git a/e2e-tests/ab-test-config-bundle.test.ts b/e2e-tests/ab-test-config-bundle.test.ts index cec0a9cc0..f0def071e 100644 --- a/e2e-tests/ab-test-config-bundle.test.ts +++ b/e2e-tests/ab-test-config-bundle.test.ts @@ -54,10 +54,13 @@ describe.sequential('e2e: config-bundle AB test lifecycle', () => { }, 300000); afterAll(async () => { - if (projectPath && hasAws) { - await teardownE2EProject(projectPath, agentName, 'Bedrock'); + try { + if (projectPath && hasAws) { + await teardownE2EProject(projectPath, agentName, 'Bedrock'); + } + } finally { + if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); } - if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); }, 600000); const run = (args: string[]) => runAgentCoreCLI(args, projectPath); diff --git a/e2e-tests/ab-test-target-based.test.ts b/e2e-tests/ab-test-target-based.test.ts index 274ee447a..71adc143f 100644 --- a/e2e-tests/ab-test-target-based.test.ts +++ b/e2e-tests/ab-test-target-based.test.ts @@ -55,10 +55,13 @@ describe.sequential('e2e: target-based AB test lifecycle', () => { }, 300000); afterAll(async () => { - if (projectPath && hasAws) { - await teardownE2EProject(projectPath, agentName, 'Bedrock'); + try { + if (projectPath && hasAws) { + await teardownE2EProject(projectPath, agentName, 'Bedrock'); + } + } finally { + if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); } - if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); }, 600000); const run = (args: string[]) => runAgentCoreCLI(args, projectPath); diff --git a/e2e-tests/archive-lifecycle.test.ts b/e2e-tests/archive-lifecycle.test.ts index dbe0a053e..99e10b3e2 100644 --- a/e2e-tests/archive-lifecycle.test.ts +++ b/e2e-tests/archive-lifecycle.test.ts @@ -67,10 +67,13 @@ describe.sequential('e2e: archive command lifecycle', () => { }, 300000); afterAll(async () => { - if (projectPath && hasAws) { - await teardownE2EProject(projectPath, agentName, 'Bedrock'); + try { + if (projectPath && hasAws) { + await teardownE2EProject(projectPath, agentName, 'Bedrock'); + } + } finally { + if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); } - if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); }, 600000); const run = (args: string[]) => runAgentCoreCLI(args, projectPath); diff --git a/e2e-tests/config-bundle-eval-rec.test.ts b/e2e-tests/config-bundle-eval-rec.test.ts index 01e3287bf..396ada46a 100644 --- a/e2e-tests/config-bundle-eval-rec.test.ts +++ b/e2e-tests/config-bundle-eval-rec.test.ts @@ -64,10 +64,13 @@ describe.sequential('e2e: config bundles, batch evaluation, and recommendations' }, 300000); afterAll(async () => { - if (projectPath && hasAws) { - await teardownE2EProject(projectPath, agentName, 'Bedrock'); + try { + if (projectPath && hasAws) { + await teardownE2EProject(projectPath, agentName, 'Bedrock'); + } + } finally { + if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); } - if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); }, 600000); const run = (args: string[]) => runAgentCoreCLI(args, projectPath); diff --git a/e2e-tests/e2e-helper.ts b/e2e-tests/e2e-helper.ts index 85f889d15..099caa16b 100644 --- a/e2e-tests/e2e-helper.ts +++ b/e2e-tests/e2e-helper.ts @@ -96,10 +96,13 @@ export function createE2ESuite(cfg: E2EConfig) { }, 300000); afterAll(async () => { - if (projectPath && hasAws) { - await teardownE2EProject(projectPath, agentName, cfg.modelProvider); + try { + if (projectPath && hasAws) { + await teardownE2EProject(projectPath, agentName, cfg.modelProvider); + } + } finally { + if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); } - if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); }, 600000); // Container builds go through CodeBuild which is slower and more prone to transient failures. @@ -350,7 +353,9 @@ async function deleteCredentialProvider(client: BedrockAgentCoreControlClient, n * Runs in beforeAll to prevent accumulation from previous runs that * crashed or timed out before their afterAll teardown could execute. */ -export async function cleanupStaleCredentialProviders(maxAgeMs: number = 5 * 60 * 1000): Promise { +export async function cleanupStaleCredentialProviders( + maxAgeMs: number = parseInt(process.env.E2E_STALE_CRED_MAX_AGE_MS ?? '', 10) || 30 * 60 * 1000 +): Promise { const region = process.env.AWS_REGION ?? 'us-east-1'; const client = new BedrockAgentCoreControlClient({ region }); const cutoff = new Date(Date.now() - maxAgeMs); diff --git a/e2e-tests/evals-lifecycle.test.ts b/e2e-tests/evals-lifecycle.test.ts index a2f245c74..4ab3fac1d 100644 --- a/e2e-tests/evals-lifecycle.test.ts +++ b/e2e-tests/evals-lifecycle.test.ts @@ -53,10 +53,13 @@ describe.sequential('e2e: evaluations lifecycle', () => { }, 300000); afterAll(async () => { - if (projectPath && hasAws) { - await teardownE2EProject(projectPath, agentName, 'Bedrock'); + try { + if (projectPath && hasAws) { + await teardownE2EProject(projectPath, agentName, 'Bedrock'); + } + } finally { + if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); } - if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); }, 600000); const run = (args: string[]) => runAgentCoreCLI(args, projectPath); diff --git a/scripts/check-coverage.mjs b/scripts/check-coverage.mjs index 89ec92c20..c30955e68 100644 --- a/scripts/check-coverage.mjs +++ b/scripts/check-coverage.mjs @@ -35,7 +35,7 @@ const DIRECTORY_THRESHOLDS = [ { prefix: 'src/cli/aws/', threshold: 40, target: 60 }, { prefix: 'src/cli/tui/hooks/', threshold: 14, target: 55 }, { prefix: 'src/cli/tui/components/', threshold: 68, target: 75 }, - { prefix: 'src/cli/commands/', threshold: 46, target: 60 }, + { prefix: 'src/cli/commands/', threshold: 34, target: 60 }, ]; // --------------------------------------------------------------------------- @@ -139,12 +139,15 @@ if (baseSha && headSha) { } const coveredSet = coveredLinesByFile[file]; - if (!coveredSet) continue; let fileCovered = 0; - for (const line of lines) { - if (coveredSet.has(line)) fileCovered++; + if (coveredSet) { + for (const line of lines) { + if (coveredSet.has(line)) fileCovered++; + } } + // If coveredSet is undefined, the file was never loaded during tests — + // count all changed lines as uncovered rather than skipping silently. prLinesTotal += lines.length; prLinesCovered += fileCovered; diff --git a/vitest.config.ts b/vitest.config.ts index 87efb98d9..f831957ba 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -86,9 +86,10 @@ export default defineConfig({ reporter: ['text', 'text-summary', 'json', 'json-summary', 'html', 'lcov'], reportsDirectory: './coverage', reportOnFailure: true, - include: ['src/**/*.ts'], + include: ['src/**/*.ts', 'src/**/*.tsx'], exclude: [ 'src/**/*.test.ts', + 'src/**/*.test.tsx', 'src/**/__tests__/**', 'src/assets/**', 'src/test-utils/**', From a9c4cf93a632718626df869f7964b66121df0039 Mon Sep 17 00:00:00 2001 From: Tejas Kashinath Date: Tue, 5 May 2026 16:03:49 -0400 Subject: [PATCH 4/5] style: fix prettier formatting in check-coverage.mjs --- scripts/check-coverage.mjs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/check-coverage.mjs b/scripts/check-coverage.mjs index c30955e68..4bca2c261 100644 --- a/scripts/check-coverage.mjs +++ b/scripts/check-coverage.mjs @@ -64,9 +64,7 @@ for (const [absPath, fileSummary] of Object.entries(summary)) { // Longest-prefix match: iterate from most specific to least specific. // DIRECTORY_THRESHOLDS is already ordered with nested paths before parents // for tui/hooks and tui/components, but we sort by length to be safe. - const match = [...dirStats] - .sort((a, b) => b.prefix.length - a.prefix.length) - .find(d => rel.startsWith(d.prefix)); + const match = [...dirStats].sort((a, b) => b.prefix.length - a.prefix.length).find(d => rel.startsWith(d.prefix)); if (!match) continue; match.total += fileSummary.lines.total; From 9c0156f2f963590f85d3619fe243b0cb3bacdb13 Mon Sep 17 00:00:00 2001 From: Tejas Kashinath Date: Tue, 5 May 2026 16:10:06 -0400 Subject: [PATCH 5/5] test: update snapshot for cdk.ts e2e tag injection --- .../__snapshots__/assets.snapshot.test.ts.snap | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap index 0962b1b0e..4dfedc9a6 100644 --- a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap +++ b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap @@ -114,16 +114,26 @@ async function main() { | Record | undefined; + const tags: Record = { + 'agentcore:project-name': spec.name, + 'agentcore:target-name': target.name, + }; + + // Opt-in e2e test tagging via env vars. Used by the CLI's own e2e suite + // so an infra sweeper can identify and delete orphaned test stacks. + if (process.env.AGENTCORE_E2E_TEST === '1') { + tags.Environment = 'e2e-test'; + tags.CreatedAt = new Date().toISOString(); + tags.CreatedBy = process.env.AGENTCORE_E2E_CREATOR ?? 'github-actions'; + } + new AgentCoreStack(app, stackName, { spec, mcpSpec, credentials, env, description: \`AgentCore stack for \${spec.name} deployed to \${target.name} (\${target.region})\`, - tags: { - 'agentcore:project-name': spec.name, - 'agentcore:target-name': target.name, - }, + tags, }); }