Hweinstock · Hweinstock · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026
diff --git a/.github/harness/Dockerfile b/.github/harness/Dockerfile
@@ -0,0 +1,33 @@
+FROM public.ecr.aws/docker/library/python:3.12-slim
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    curl \
+    jq \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install GitHub CLI
+RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg -o /usr/share/keyrings/githubcli-archive-keyring.gpg \
+    && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
+    > /etc/apt/sources.list.d/github-cli.list \
+    && apt-get update \
+    && apt-get install -y gh \
+    && rm -rf /var/lib/apt/lists/*
+
+# Tokens are baked into the image at build time. This image must be treated as a
+# secret and stored only in a registry with equivalent access controls.
+ARG CLONE_TOKEN
+ARG GITHUB_TOKEN
+
+# Configure git to use clone token for HTTPS clones
+RUN git config --global url."https://${CLONE_TOKEN}@github.com/".insteadOf "https://github.com/"
+
+# Persist gh CLI auth so GITHUB_TOKEN doesn't need to be in the environment
+RUN mkdir -p /root/.config/gh \
+    && echo "github.com:" > /root/.config/gh/hosts.yml \
+    && echo "  oauth_token: ${GITHUB_TOKEN}" >> /root/.config/gh/hosts.yml \
+    && echo "  user: agentcore-cli-automation" >> /root/.config/gh/hosts.yml \
+    && echo "  git_protocol: https" >> /root/.config/gh/hosts.yml
+
+WORKDIR /opt/workspace
diff --git a/.github/harness/README.md b/.github/harness/README.md
@@ -0,0 +1,39 @@
+# Harness Resources
+
+Container and scripts for AI-powered automation via
+[AgentCore Harness](https://docs.aws.amazon.com/bedrock/latest/userguide/agentcore.html).
+
+## Structure
+
+```
+harness/
+├── Dockerfile            # Container image for the harness runtime
+├── harness_review.py     # Invokes the harness to review PRs (SigV4 + event stream)
+└── prompts/
+    ├── system.md         # System prompt (workspace context)
+    └── review.md         # PR review task prompt
+```
+
+## Current: PR Reviewer
+
+Reviews pull requests on open/reopen via `.github/workflows/pr-ai-review.yml`.
+
+### Dual-token setup
+
+The Dockerfile takes two build args:
+
+- **`CLONE_TOKEN`** — baked into git config for cloning private repos
+- **`GITHUB_TOKEN`** — baked into `gh` CLI auth for posting PR comments
+
+### Building the container
+
+```bash
+finch build \
+  --build-arg CLONE_TOKEN=<pat-for-cloning> \
+  --build-arg GITHUB_TOKEN=<pat-for-gh-api> \
+  -t pr-reviewer .github/harness/
+```
+
+## Future: Tester
+
+This directory will also house a harness-based test runner.
diff --git a/.github/scripts/python/harness_review.py → .github/harness/harness_review.py b/.github/scripts/python/harness_review.py → .github/harness/harness_review.py
@@ -1,7 +1,7 @@
 """Invoke Bedrock AgentCore Harness to review a GitHub PR.
 
 Reads PR_URL from the environment. Streams harness output to stdout.
-Uses raw HTTP with SigV4 signing — no custom service model needed.
+Uses the boto3 bedrock-agentcore client's invoke_harness API.
 """
 
 import json
@@ -11,11 +11,6 @@
 import uuid
 
 import boto3
-from botocore.auth import SigV4Auth
-from botocore.awsrequest import AWSRequest
-from botocore.eventstream import EventStreamBuffer
-from urllib.parse import quote
-import urllib3
 
 # ANSI color codes
 CYAN = "\033[36m"
@@ -25,7 +20,7 @@
 DIM = "\033[2m"
 RESET = "\033[0m"
 
-SCRIPTS_DIR = os.path.join(os.path.dirname(__file__), "..")
+SCRIPTS_DIR = os.path.dirname(__file__)
 
 
 def read_prompt(filename):
@@ -35,50 +30,37 @@ def read_prompt(filename):
         return f.read()
 
 
-def invoke_harness(harness_arn, body, region):
-    """Send a SigV4-signed request to the harness invoke endpoint. Returns a streaming response.
-
-    InvokeHarness is not in standard boto3, so we call the REST API directly.
-    boto3 is only used to resolve AWS credentials (from env vars, OIDC, etc.)
-    and sign the request with SigV4. The response is an AWS binary event stream.
-    """
-    session = boto3.Session(region_name=region)
-    credentials = session.get_credentials().get_frozen_credentials()
-    url = f"https://bedrock-agentcore.{region}.amazonaws.com/harnesses/invoke?harnessArn={quote(harness_arn, safe='')}"
-    request = AWSRequest(method="POST", url=url, data=body, headers={
-        "Content-Type": "application/json",
-        "Accept": "application/vnd.amazon.eventstream",
-    })
-    SigV4Auth(credentials, "bedrock-agentcore", region).add_auth(request)
-    return urllib3.PoolManager().urlopen(
-        "POST", url, body=body,
-        headers=dict(request.headers),
-        preload_content=False,
-        timeout=urllib3.Timeout(connect=10, read=600),
+def invoke_harness_streaming(harness_arn, session_id, system_prompt, messages, model_id, region):
+    """Call invoke_harness via boto3 and return the event stream."""
+    client = boto3.client("bedrock-agentcore", region_name=region)
+    response = client.invoke_harness(
+        harnessArn=harness_arn,
+        runtimeSessionId=session_id,
+        systemPrompt=[{"text": system_prompt}],
+        messages=messages,
+        model={"bedrockModelConfig": {"modelId": model_id}},
     )
-
-
-def parse_events(http_response):
-    """Yield (event_type, payload) tuples from the harness binary event stream.
-
-    The response arrives as raw bytes in AWS binary event stream format.
-    EventStreamBuffer reassembles complete events from the 4KB chunks,
-    and we decode each event's JSON payload before yielding it.
-    """
-    event_buffer = EventStreamBuffer()
-    for chunk in http_response.stream(4096):
-        event_buffer.add_data(chunk)
-        for event in event_buffer:
-            if event.headers.get(":message-type") == "exception":
-                payload = json.loads(event.payload.decode("utf-8"))
-                print(f"\n{RED}ERROR: {payload}{RESET}", file=sys.stderr)
-                sys.exit(1)
-            event_type = event.headers.get(":event-type", "")
-            if event.payload:
-                yield event_type, json.loads(event.payload.decode("utf-8"))
-
-
-def print_stream(http_response):
+    return response["stream"]
+
+
+def parse_events(event_stream):
+    """Yield (event_type, payload) tuples from the boto3 event stream."""
+    for event in event_stream:
+        if "contentBlockStart" in event:
+            yield "contentBlockStart", event["contentBlockStart"]
+        elif "contentBlockDelta" in event:
+            yield "contentBlockDelta", event["contentBlockDelta"]
+        elif "contentBlockStop" in event:
+            yield "contentBlockStop", event["contentBlockStop"]
+        elif "messageStop" in event:
+            yield "messageStop", event["messageStop"]
+        elif "internalServerException" in event:
+            yield "internalServerException", event["internalServerException"]
+        elif "runtimeClientError" in event:
+            yield "runtimeClientError", event["runtimeClientError"]
+
+
+def print_stream(event_stream):
     """Display harness events with GitHub Actions log groups.
 
     The harness streams events as the agent works:
@@ -112,7 +94,7 @@ def flush_text():
                 print(f"{DIM}{line}{RESET}", flush=True)
             text_buffer = ""
 
-    for event_type, payload in parse_events(http_response):
+    for event_type, payload in parse_events(event_stream):
 
         if event_type == "contentBlockStart":
             start = payload.get("start", {})
@@ -171,6 +153,11 @@ def flush_text():
             print(f"\n{RED}ERROR: {payload}{RESET}", file=sys.stderr)
             sys.exit(1)
 
+        elif event_type == "runtimeClientError":
+            close_group()
+            print(f"\n{RED}ERROR: {payload.get('message', payload)}{RESET}", file=sys.stderr)
+            sys.exit(1)
+
     close_group()
     total = time.time() - start_time
     print(f"\n{GREEN}Review complete.{RESET} {DIM}({iteration} tool calls, {int(total)}s total){RESET}")
@@ -200,18 +187,14 @@ def flush_text():
 SYSTEM_PROMPT = read_prompt("system.md")
 REVIEW_PROMPT = read_prompt("review.md").format(pr_url=PR_URL)
 
-request_body = json.dumps({
-    "runtimeSessionId": SESSION_ID,
-    "systemPrompt": [{"text": SYSTEM_PROMPT}],
-    "messages": [{"role": "user", "content": [{"text": REVIEW_PROMPT}]}],
-    "model": {"bedrockModelConfig": {"modelId": MODEL_ID}},
-})
+messages = [{"role": "user", "content": [{"text": REVIEW_PROMPT}]}]
 
-http_response = invoke_harness(HARNESS_ARN, request_body, REGION)
-
-if http_response.status != 200:
-    error = http_response.read().decode("utf-8")
-    print(f"{RED}ERROR: HTTP {http_response.status}: {error}{RESET}", file=sys.stderr)
+try:
+    event_stream = invoke_harness_streaming(
+        HARNESS_ARN, SESSION_ID, SYSTEM_PROMPT, messages, MODEL_ID, REGION
+    )
+except Exception as e:
+    print(f"{RED}ERROR: Failed to invoke harness: {e}{RESET}", file=sys.stderr)
     sys.exit(1)
 
-print_stream(http_response)
+print_stream(event_stream)
diff --git a/.github/scripts/prompts/review.md → .github/harness/prompts/review.md b/.github/scripts/prompts/review.md → .github/harness/prompts/review.md
diff --git a/.github/scripts/prompts/system.md → .github/harness/prompts/system.md b/.github/scripts/prompts/system.md → .github/harness/prompts/system.md
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
@@ -87,7 +87,7 @@ jobs:
       - run: npm ci
       - run: npm run build --if-present
       - name: Run unit tests (shard ${{ matrix.shard }})
-        run: npx vitest run --project unit --shard=${{ matrix.shard }} --reporter=blob --reporter=verbose
+        run: npx vitest run --project unit --shard=${{ matrix.shard }} --reporter=blob --reporter=verbose --coverage
       - name: Upload blob report
         if: always()
         uses: actions/upload-artifact@v7
@@ -144,6 +144,6 @@ jobs:
         with:
           json-summary-path: coverage/coverage-summary.json
           json-final-path: coverage/coverage-final.json
-          vite-config-path: vitest.unit.config.ts
+          vite-config-path: vitest.config.ts
           file-coverage-mode: none
           coverage-thresholds: '{ "lines": 50, "branches": 50, "functions": 50, "statements": 50 }'
diff --git a/.github/workflows/e2e-tests-full.yml b/.github/workflows/e2e-tests-full.yml
@@ -27,6 +27,7 @@ jobs:
       fail-fast: false
       matrix:
         cdk-source: [npm, main]
+        shard: ['1/6', '2/6', '3/6', '4/6', '5/6', '6/6']
     steps:
       - uses: actions/checkout@v6
         with:
@@ -70,15 +71,15 @@ jobs:
           CDK_REPO: ${{ secrets.CDK_REPO_NAME }}
       - name: Install CLI globally
         run: npm install -g "$(npm pack | tail -1)"
-      - name: Run E2E tests (${{ matrix.cdk-source }})
+      - name: Run E2E tests (${{ matrix.cdk-source }}, shard ${{ matrix.shard }})
         env:
           AWS_ACCOUNT_ID: ${{ steps.aws.outputs.account_id }}
           AWS_REGION: ${{ inputs.aws_region || 'us-east-1' }}
           ANTHROPIC_API_KEY: ${{ env.E2E_ANTHROPIC_API_KEY }}
           OPENAI_API_KEY: ${{ env.E2E_OPENAI_API_KEY }}
           GEMINI_API_KEY: ${{ env.E2E_GEMINI_API_KEY }}
           CDK_TARBALL: ${{ env.CDK_TARBALL }}
-        run: npm run test:e2e
+        run: npx vitest run --project e2e --shard=${{ matrix.shard }}
   browser-tests:
     runs-on: ubuntu-latest
     environment: e2e-testing

diff --git a/.github/workflows/pr-ai-review.yml b/.github/workflows/pr-ai-review.yml
@@ -139,7 +139,7 @@ jobs:
         env:
           PR_URL: ${{ steps.pr-url.outputs.url }}
           HARNESS_ARN: ${{ secrets.HARNESS_ARN }}
-        run: python .github/scripts/python/harness_review.py
+        run: python .github/harness/harness_review.py
 
       - name: Remove agentcore-harness-reviewing label
         if: always()