Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/maintainer/.version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.19.6
0.25.0
297 changes: 57 additions & 240 deletions .github/workflows/maintainer.yml
Original file line number Diff line number Diff line change
@@ -1,266 +1,83 @@
# Caretaker — thin streaming workflow.
#
# This workflow is the bare minimum needed to invoke the backend.
# Webhooks installed via the caretaker GitHub App are the primary path
# (real-time, event-driven). This workflow exists for two narrower jobs:
#
# 1. Operator-triggered runs — kick a run from the GitHub UI via
# workflow_dispatch (the "Run workflow" button).
# 2. Webhook-miss recovery — sparse cron that reconciles state if a
# delivery dropped. The backend's reconciliation scheduler does
# most of the heavy lifting; this is a redundant safety net.
#
# No pip install of caretaker, no checkout, no LLM keys, no Copilot PAT —
# the backend holds everything. This file should change rarely.
#
# Required repo variable:
# CARETAKER_BACKEND_URL — e.g. https://caretaker.example.com
#
# Optional repo variable:
# CARETAKER_OIDC_AUDIENCE — defaults to "caretaker-backend"

name: Caretaker

on:
schedule:
- cron: "0 8 * * *"
pull_request:
types: [opened, synchronize, reopened]
pull_request_review:
types: [submitted]
check_suite:
types: [completed]
issues:
types: [opened, labeled]
issue_comment:
types: [created]
# Sparse cron — webhook-miss recovery only. The backend has its own
# reconciliation scheduler running on a 30-minute interval, so a
# 6-hour cadence here is plenty as a belt-and-suspenders fallback.
- cron: "37 */6 * * *"
workflow_dispatch:
inputs:
mode:
description: "Run mode"
description: "Run mode the backend should execute"
required: false
default: "full"
type: choice
options: [full, pr-only, issue-only, upgrade, dry-run]

# Prevent concurrent caretaker runs so each run sees the up-to-date memory
# store written by the previous run.
options:
- full
- pr-only
- issue-only
- upgrade
- security
- deps
- stale

# Serialise concurrent runs so an operator-triggered run does not stack
# on top of a sparse-cron run. Cancellation off — webhooks are the
# real-time path, this workflow is the safety net.
concurrency:
group: caretaker
cancel-in-progress: false

permissions:
contents: write
issues: write
pull-requests: write
# OIDC: enables the runner to mint a GitHub Actions JWT bound to the
# backend's audience, which the backend exchanges for a per-run
# ingest token. No GITHUB_TOKEN, no PAT, no LLM secrets.
id-token: write
contents: read

jobs:
# Short-circuit comment events that caretaker itself produced. Without this
# filter, every status / readiness / task comment caretaker writes triggers
# another caretaker run via the issue_comment webhook, producing a feedback
# loop. Comments are identified by a caretaker:* HTML-comment marker and
# by known bot logins.
dispatch-guard:
stream:
runs-on: ubuntu-latest
outputs:
should_run: ${{ steps.guard.outputs.should_run }}
timeout-minutes: 15
env:
CARETAKER_BACKEND_URL: ${{ vars.CARETAKER_BACKEND_URL }}
CARETAKER_OIDC_AUDIENCE: ${{ vars.CARETAKER_OIDC_AUDIENCE }}
steps:
- id: guard
uses: actions/github-script@v7
with:
script: |
const ev = context.eventName;
if (ev !== "issue_comment" && ev !== "pull_request_review") {
core.setOutput("should_run", "true");
return;
}
const payload = context.payload || {};
if (ev === "issue_comment") {
const body = payload.comment?.body || "";
if (/<!--\s*caretaker:[a-z0-9:_-]+/i.test(body)) {
core.notice("skip: issue_comment carries caretaker marker");
core.setOutput("should_run", "false");
return;
}
const actor = payload.comment?.user?.login || "";
const botActors = new Set([
"the-care-taker[bot]",
"github-actions[bot]",
"copilot-swe-agent[bot]",
"anthropic-code-agent[bot]",
"copilot-pull-request-reviewer[bot]",
]);
if (botActors.has(actor)) {
core.notice(`skip: bot-authored issue_comment from ${actor}`);
core.setOutput("should_run", "false");
return;
}
}
if (ev === "pull_request_review") {
const reviewer = payload.review?.user?.login || "";
if (reviewer === "copilot-pull-request-reviewer[bot]") {
core.notice(`skip: pull_request_review by ${reviewer}`);
core.setOutput("should_run", "false");
return;
}
}
core.setOutput("should_run", "true");

maintain:
needs: dispatch-guard
if: ${{ needs.dispatch-guard.outputs.should_run == 'true' }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "pip"

- name: Restore memory store cache
uses: actions/cache@v4
with:
path: .caretaker-memory.db
key: caretaker-memory-${{ github.repository }}
restore-keys: |
caretaker-memory-

- name: Install caretaker
- name: Install caretaker (thin client only)
run: |
VERSION=$(cat .github/maintainer/.version)
pip install "git+https://github.com/ianlintner/caretaker.git@v${VERSION}"
# LiteLLM is the ``llm-multi`` extra from caretaker's
# pyproject.toml; installing it separately keeps the install
# line above compatible with both the pre- and post-v0.8.1
# distribution rename (``caretaker`` vs ``caretaker-github``).
# Harmless when ``executor.foundry.enabled=false`` — no model
# is called — but required when the repo opts into the
# custom coding agent (see docs/custom-coding-agent-plan.md).
pip install "litellm>=1.50,<2"
# The thin client needs only the runs shipper; no agent code,
# no orchestrator, no LLM dependencies. The version is pinned
# to the latest published release so the runner does not run
# ahead of the backend.
pip install --quiet "caretaker"

# Cheap, offline sanity check — runs before every doctor/run call
# so a broken pin, unparseable config, or missing secret for an
# enabled agent fails loudly with an actionable row instead of
# getting swallowed by a later 403 / import error. See the
# 2026-04-22 audio_engineer outage post-mortem.
- name: Caretaker bootstrap-check
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Preferred: pass GitHub App credentials so caretaker self-mints
# tokens via GitHubAppCredentialsProvider (auto-refresh, no expiry).
# Set CARETAKER_APP_ID (var) + CARETAKER_APP_PRIVATE_KEY (secret)
# and un-comment these three lines:
# CARETAKER_GITHUB_APP_ID: ${{ vars.CARETAKER_APP_ID }}
# CARETAKER_GITHUB_APP_INSTALLATION_ID: ${{ vars.CARETAKER_APP_INSTALLATION_ID }}
# CARETAKER_GITHUB_APP_PRIVATE_KEY: ${{ secrets.CARETAKER_APP_PRIVATE_KEY }}
COPILOT_PAT: ${{ secrets.COPILOT_PAT }}
# Add your LLM provider credentials here. Examples:
# Azure AI Foundry (recommended):
AZURE_AI_API_KEY: ${{ secrets.AZURE_AI_API_KEY }}
AZURE_AI_API_BASE: ${{ secrets.AZURE_AI_API_BASE }}
# Azure OpenAI (classic):
# AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
# AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }}
# Direct Anthropic (if not using Azure AI Foundry):
# ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
# Fleet registry OAuth2 client_credentials wiring (only consumed
# when fleet_registry.enabled=true in config.yml). Safe to leave
# unset otherwise — caretaker just skips the heartbeat.
OAUTH2_CLIENT_ID: ${{ secrets.OAUTH2_CLIENT_ID }}
OAUTH2_CLIENT_SECRET: ${{ secrets.OAUTH2_CLIENT_SECRET }}
OAUTH2_TOKEN_URL: ${{ vars.OAUTH2_TOKEN_URL }}
OAUTH2_SCOPE: ${{ vars.OAUTH2_SCOPE }}
- name: Stream a backend-executed run
run: |
caretaker doctor \
--config .github/maintainer/config.yml \
--bootstrap-check

- name: Run
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Preferred: GitHub App self-mint credentials (see bootstrap-check above).
# CARETAKER_GITHUB_APP_ID: ${{ vars.CARETAKER_APP_ID }}
# CARETAKER_GITHUB_APP_INSTALLATION_ID: ${{ vars.CARETAKER_APP_INSTALLATION_ID }}
# CARETAKER_GITHUB_APP_PRIVATE_KEY: ${{ secrets.CARETAKER_APP_PRIVATE_KEY }}
# Fine-grained PAT for a real write-capable user or machine user.
# Caretaker uses this for Copilot issue assignment and @copilot comments
# that must not be authored as github-actions[bot].
COPILOT_PAT: ${{ secrets.COPILOT_PAT }}
# LLM provider credentials — add whichever your config uses:
AZURE_AI_API_KEY: ${{ secrets.AZURE_AI_API_KEY }}
AZURE_AI_API_BASE: ${{ secrets.AZURE_AI_API_BASE }}
# AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
# AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }}
# ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
CARETAKER_EVENT_PAYLOAD: ${{ toJSON(github.event) }}
CARETAKER_RUN_MODE: ${{ github.event.inputs.mode || 'full' }}
CARETAKER_EVENT_TYPE: ${{ github.event_name }}
# Fleet registry OAuth2 client_credentials wiring (only consumed when
# fleet_registry.enabled=true in config.yml).
OAUTH2_CLIENT_ID: ${{ secrets.OAUTH2_CLIENT_ID }}
OAUTH2_CLIENT_SECRET: ${{ secrets.OAUTH2_CLIENT_SECRET }}
OAUTH2_TOKEN_URL: ${{ vars.OAUTH2_TOKEN_URL }}
OAUTH2_SCOPE: ${{ vars.OAUTH2_SCOPE }}
run: |
caretaker run \
--config .github/maintainer/config.yml \
--mode "$CARETAKER_RUN_MODE" \
--event-type "$CARETAKER_EVENT_TYPE" \
--event-payload "$CARETAKER_EVENT_PAYLOAD"

# Upload a JSON snapshot of the memory store so it can be downloaded for
# auditing or used to manually restore a known-good state.
- name: Upload memory store snapshot
if: always()
uses: actions/upload-artifact@v4
with:
name: caretaker-memory-snapshot-${{ github.run_number }}
path: .caretaker-memory-snapshot.json
if-no-files-found: ignore
# Required: upload-artifact v4 excludes dotfiles by default.
include-hidden-files: true
retention-days: 30

# When the caretaker run itself fails, trigger the self-heal agent
self-heal-on-failure:
runs-on: ubuntu-latest
needs: maintain
if: failure()
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Install caretaker
run: |
VERSION=$(cat .github/maintainer/.version)
pip install "git+https://github.com/ianlintner/caretaker.git@v${VERSION}"
# LiteLLM is the ``llm-multi`` extra from caretaker's
# pyproject.toml; installing it separately keeps the install
# line above compatible with both the pre- and post-v0.8.1
# distribution rename (``caretaker`` vs ``caretaker-github``).
# Harmless when ``executor.foundry.enabled=false`` — no model
# is called — but required when the repo opts into the
# custom coding agent (see docs/custom-coding-agent-plan.md).
pip install "litellm>=1.50,<2"

- name: Self-heal — analyse own failure
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Preferred: GitHub App self-mint credentials.
# CARETAKER_GITHUB_APP_ID: ${{ vars.CARETAKER_APP_ID }}
# CARETAKER_GITHUB_APP_INSTALLATION_ID: ${{ vars.CARETAKER_APP_INSTALLATION_ID }}
# CARETAKER_GITHUB_APP_PRIVATE_KEY: ${{ secrets.CARETAKER_APP_PRIVATE_KEY }}
# Fine-grained PAT for a real write-capable user or machine user.
COPILOT_PAT: ${{ secrets.COPILOT_PAT }}
AZURE_AI_API_KEY: ${{ secrets.AZURE_AI_API_KEY }}
AZURE_AI_API_BASE: ${{ secrets.AZURE_AI_API_BASE }}
# ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
CARETAKER_FAILED_RUN_ID: ${{ github.run_id }}
CARETAKER_EVENT_PAYLOAD: >-
{
"workflow_run": {
"id": ${{ github.run_id }},
"name": "Caretaker",
"conclusion": "failure",
"head_branch": "${{ github.ref_name }}"
}
}
# Fleet registry OAuth2 client_credentials wiring (self-heal also
# emits a heartbeat). Only used when fleet_registry.enabled=true.
OAUTH2_CLIENT_ID: ${{ secrets.OAUTH2_CLIENT_ID }}
OAUTH2_CLIENT_SECRET: ${{ secrets.OAUTH2_CLIENT_SECRET }}
OAUTH2_TOKEN_URL: ${{ vars.OAUTH2_TOKEN_URL }}
OAUTH2_SCOPE: ${{ vars.OAUTH2_SCOPE }}
run: |
caretaker run \
--config .github/maintainer/config.yml \
--mode self-heal \
--event-type workflow_run \
--event-payload "$CARETAKER_EVENT_PAYLOAD"

caretaker stream --mode "${{ inputs.mode || 'full' }}"
Loading