Skip to content

Commit 2a5a449

Browse files
authored
fix(ci): require PR checks to pass (#1461)
1 parent 10af3e6 commit 2a5a449

7 files changed

Lines changed: 299 additions & 29 deletions

File tree

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
name: PR Merge Base
2+
description: Resolve and fetch the merge-base commit needed to diff a copy-pr-bot pull-request/<N> push against the PR base branch.
3+
4+
inputs:
5+
gh_token:
6+
description: GitHub token for PR and compare API calls.
7+
required: true
8+
9+
outputs:
10+
base_sha:
11+
description: Merge-base commit SHA for pull-request/<N> refs, or empty for other refs.
12+
value: ${{ steps.merge-base.outputs.base_sha }}
13+
14+
runs:
15+
using: composite
16+
steps:
17+
- id: merge-base
18+
shell: bash
19+
env:
20+
GH_TOKEN: ${{ inputs.gh_token }}
21+
GH_REPO: ${{ github.repository }}
22+
REF_NAME: ${{ github.ref_name }}
23+
GITHUB_SHA_VALUE: ${{ github.sha }}
24+
run: |
25+
set -euo pipefail
26+
27+
if [[ "$REF_NAME" =~ ^pull-request/([0-9]+)$ ]]; then
28+
pr_number="${BASH_REMATCH[1]}"
29+
base_ref=$(gh pr view "$pr_number" --repo "$GH_REPO" --json baseRefName -q '.baseRefName')
30+
# The mirrored branch is a push ref, so changed-files needs the true merge-base
31+
# to diff the PR head against its base branch.
32+
base_sha=$(gh api "repos/$GH_REPO/compare/$base_ref...$GITHUB_SHA_VALUE" --jq '.merge_base_commit.sha')
33+
git fetch --no-tags --depth=1 origin "$base_sha"
34+
echo "base_sha=$base_sha" >> "$GITHUB_OUTPUT"
35+
else
36+
echo "base_sha=" >> "$GITHUB_OUTPUT"
37+
fi

.github/workflows/e2e-label-help.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: E2E Label Help
22

3-
# When a `test:e2e` / `test:e2e-gpu` label is applied, post a PR comment
3+
# When a `test:e2e*` label is applied, post a PR comment
44
# telling the maintainer the next manual step. We don't dispatch the workflow
55
# ourselves: a workflow_dispatch-triggered run does not surface in the PR's
66
# Checks tab, so we'd lose in-progress visibility. Instead we point the
@@ -62,7 +62,7 @@ jobs:
6262
workflow_link="[$workflow_name](https://github.com/$GH_REPO/actions/workflows/$workflow_file)"
6363
instructions="Open $workflow_link, find the run for commit \`$short_pr\`, and click **Re-run all jobs** to execute with the label set."
6464
fi
65-
body="Label \`$LABEL_NAME\` applied for \`$short_pr\`. $instructions The \`E2E Gate\` check on this PR will flip green automatically once the run finishes."
65+
body="Label \`$LABEL_NAME\` applied for \`$short_pr\`. $instructions The matching required CI gate status on this PR will flip green automatically once the run finishes."
6666
fi
6767
6868
gh pr comment "$PR_NUMBER" --body "$body"

.github/workflows/helm-lint.yml

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,6 @@ on:
77
push:
88
branches:
99
- "pull-request/[0-9]+"
10-
paths:
11-
- "deploy/helm/**"
12-
- "mise.toml"
13-
- "mise.lock"
14-
- "tasks/helm.toml"
15-
- ".github/workflows/helm-lint.yml"
1610
workflow_dispatch:
1711

1812
env:
@@ -36,23 +30,61 @@ jobs:
3630
outputs:
3731
should_run: ${{ steps.gate.outputs.should_run }}
3832
steps:
39-
- uses: actions/checkout@v6
33+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
4034

4135
- id: gate
4236
uses: ./.github/actions/pr-gate
4337

44-
helm-lint:
45-
name: Helm Lint
38+
helm_changes:
39+
name: Detect Helm changes
4640
needs: pr_metadata
4741
if: needs.pr_metadata.outputs.should_run == 'true'
42+
runs-on: ubuntu-latest
43+
permissions:
44+
contents: read
45+
pull-requests: read
46+
outputs:
47+
should_run: ${{ steps.default.outputs.should_run || steps.changes.outputs.any_changed }}
48+
steps:
49+
- id: default
50+
if: github.event_name != 'push'
51+
shell: bash
52+
run: echo "should_run=true" >> "$GITHUB_OUTPUT"
53+
54+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
55+
if: github.event_name == 'push'
56+
57+
- id: merge-base
58+
if: github.event_name == 'push'
59+
uses: ./.github/actions/pr-merge-base
60+
with:
61+
gh_token: ${{ secrets.GITHUB_TOKEN }}
62+
63+
- id: changes
64+
if: github.event_name == 'push'
65+
uses: tj-actions/changed-files@aa08304bd477b800d468db44fe10f6c61f7f7b11 # v42.1.0
66+
with:
67+
base_sha: ${{ steps.merge-base.outputs.base_sha }}
68+
skip_initial_fetch: ${{ steps.merge-base.outputs.base_sha != '' }}
69+
files: |
70+
deploy/helm/**
71+
mise.toml
72+
mise.lock
73+
tasks/helm.toml
74+
.github/workflows/helm-lint.yml
75+
76+
helm-lint:
77+
name: Helm Lint
78+
needs: [pr_metadata, helm_changes]
79+
if: needs.pr_metadata.outputs.should_run == 'true' && needs.helm_changes.outputs.should_run == 'true'
4880
runs-on: linux-amd64-cpu8
4981
container:
5082
image: ghcr.io/nvidia/openshell/ci:latest
5183
credentials:
5284
username: ${{ github.actor }}
5385
password: ${{ secrets.GITHUB_TOKEN }}
5486
steps:
55-
- uses: actions/checkout@v6
87+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
5688

5789
- name: Install tools
5890
run: mise install --locked
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
name: Required CI Gates
2+
3+
on:
4+
pull_request_target:
5+
types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled]
6+
workflow_run:
7+
workflows:
8+
- Branch Checks
9+
- Branch E2E Checks
10+
- GPU Test
11+
- Branch Kubernetes E2E
12+
- Helm Lint
13+
types: [completed]
14+
15+
permissions:
16+
actions: read
17+
contents: read
18+
pull-requests: read
19+
statuses: write
20+
21+
concurrency:
22+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.event.workflow_run.head_sha || github.run_id }}
23+
cancel-in-progress: true
24+
25+
jobs:
26+
publish:
27+
name: Publish required CI gate statuses
28+
runs-on: ubuntu-latest
29+
steps:
30+
- name: Evaluate required CI gates
31+
env:
32+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
33+
GH_REPO: ${{ github.repository }}
34+
EVENT_NAME: ${{ github.event_name }}
35+
PR_NUMBER_FROM_EVENT: ${{ github.event.pull_request.number }}
36+
PR_HEAD_SHA_FROM_EVENT: ${{ github.event.pull_request.head.sha }}
37+
PR_LABELS_FROM_EVENT: ${{ toJSON(github.event.pull_request.labels.*.name) }}
38+
WORKFLOW_RUN_HEAD_SHA: ${{ github.event.workflow_run.head_sha }}
39+
WORKFLOW_RUN_EVENT: ${{ github.event.workflow_run.event }}
40+
shell: bash
41+
run: |
42+
set -euo pipefail
43+
44+
post_status() {
45+
local context="$1"
46+
local state="$2"
47+
local description="$3"
48+
local target_url="${4:-}"
49+
50+
args=(
51+
--method POST
52+
"repos/$GH_REPO/statuses/$HEAD_SHA"
53+
-f "state=$state"
54+
-f "context=$context"
55+
-f "description=$description"
56+
)
57+
if [ -n "$target_url" ]; then
58+
args+=(-f "target_url=$target_url")
59+
fi
60+
61+
echo "$context: $state - $description"
62+
gh api "${args[@]}" >/dev/null
63+
}
64+
65+
has_label() {
66+
local label="$1"
67+
jq -e --arg label "$label" 'index($label) != null' <<< "$LABELS_JSON" >/dev/null
68+
}
69+
70+
resolve_pull_request_event() {
71+
PR_NUMBER="$PR_NUMBER_FROM_EVENT"
72+
HEAD_SHA="$PR_HEAD_SHA_FROM_EVENT"
73+
LABELS_JSON=$(jq -c . <<< "$PR_LABELS_FROM_EVENT")
74+
}
75+
76+
resolve_workflow_run_event() {
77+
if [ "$WORKFLOW_RUN_EVENT" != "push" ]; then
78+
echo "Ignoring workflow_run from event '$WORKFLOW_RUN_EVENT'."
79+
exit 0
80+
fi
81+
82+
local associated_prs pr
83+
associated_prs=$(gh api "repos/$GH_REPO/commits/$WORKFLOW_RUN_HEAD_SHA/pulls")
84+
pr=$(jq -c 'map(select(.state == "open"))[0] // empty' <<< "$associated_prs")
85+
if [ -z "$pr" ]; then
86+
echo "No open PR associated with $WORKFLOW_RUN_HEAD_SHA; nothing to publish."
87+
exit 0
88+
fi
89+
90+
PR_NUMBER=$(jq -r '.number' <<< "$pr")
91+
pr=$(gh api "repos/$GH_REPO/pulls/$PR_NUMBER")
92+
HEAD_SHA=$(jq -r '.head.sha' <<< "$pr")
93+
LABELS_JSON=$(gh api "repos/$GH_REPO/issues/$PR_NUMBER" --jq '[.labels[].name]')
94+
}
95+
96+
resolve_context() {
97+
if [ "$EVENT_NAME" = "pull_request_target" ]; then
98+
resolve_pull_request_event
99+
elif [ "$EVENT_NAME" = "workflow_run" ]; then
100+
resolve_workflow_run_event
101+
else
102+
echo "Unsupported event '$EVENT_NAME'."
103+
exit 1
104+
fi
105+
106+
PR_URL="https://github.com/$GH_REPO/pull/$PR_NUMBER"
107+
MIRROR_REF="pull-request/$PR_NUMBER"
108+
}
109+
110+
verify_mirror() {
111+
local context="$1"
112+
local mirror_sha
113+
114+
mirror_sha=$(gh api "repos/$GH_REPO/branches/$MIRROR_REF" --jq '.commit.sha' 2>/dev/null || true)
115+
if [ -z "$mirror_sha" ]; then
116+
post_status "$context" pending "Waiting for /ok to test mirror" "$PR_URL"
117+
return 1
118+
fi
119+
120+
if [ "$mirror_sha" != "$HEAD_SHA" ]; then
121+
post_status "$context" pending "Waiting for /ok to test mirror" "$PR_URL"
122+
return 1
123+
fi
124+
125+
return 0
126+
}
127+
128+
evaluate_workflow() {
129+
local context="$1"
130+
local workflow_file="$2"
131+
local workflow_name="$3"
132+
local required_label="${4:-}"
133+
local workflow_url="https://github.com/$GH_REPO/actions/workflows/$workflow_file"
134+
135+
if [ -n "$required_label" ] && ! has_label "$required_label"; then
136+
post_status "$context" success "$required_label not applied" "$PR_URL"
137+
return 0
138+
fi
139+
140+
if ! verify_mirror "$context"; then
141+
return 0
142+
fi
143+
144+
local runs latest run_id status conclusion run_url real_success
145+
runs=$(gh api "repos/$GH_REPO/actions/workflows/$workflow_file/runs?head_sha=$HEAD_SHA&event=push" --jq '.workflow_runs')
146+
latest=$(jq -c --arg branch "$MIRROR_REF" '[.[] | select(.head_branch == $branch)] | sort_by(.created_at) | reverse | .[0] // empty' <<< "$runs")
147+
148+
if [ -z "$latest" ]; then
149+
post_status "$context" pending "Waiting for $workflow_name" "$workflow_url"
150+
return 0
151+
fi
152+
153+
run_id=$(jq -r '.id' <<< "$latest")
154+
status=$(jq -r '.status' <<< "$latest")
155+
conclusion=$(jq -r '.conclusion' <<< "$latest")
156+
run_url=$(jq -r '.html_url' <<< "$latest")
157+
158+
if [ "$status" != "completed" ]; then
159+
post_status "$context" pending "$workflow_name is $status" "$run_url"
160+
return 0
161+
fi
162+
163+
if [ "$conclusion" != "success" ]; then
164+
post_status "$context" failure "$workflow_name concluded $conclusion" "$run_url"
165+
return 0
166+
fi
167+
168+
real_success=$(gh api "repos/$GH_REPO/actions/runs/$run_id/jobs?per_page=100" \
169+
--jq '[.jobs[] | select(.conclusion == "success" and .name != "Resolve PR metadata")] | length')
170+
171+
if [ "$real_success" -lt 1 ]; then
172+
post_status "$context" failure "No real CI jobs ran" "$run_url"
173+
return 0
174+
fi
175+
176+
post_status "$context" success "$workflow_name passed" "$run_url"
177+
}
178+
179+
resolve_context
180+
181+
evaluate_workflow "OpenShell / Branch Checks" "branch-checks.yml" "Branch Checks"
182+
evaluate_workflow "OpenShell / E2E" "branch-e2e.yml" "Branch E2E Checks" "test:e2e"
183+
evaluate_workflow "OpenShell / GPU E2E" "test-gpu.yml" "GPU Test" "test:e2e-gpu"
184+
evaluate_workflow "OpenShell / Kubernetes E2E" "branch-kubernetes-e2e.yml" "Branch Kubernetes E2E" "test:e2e-kubernetes"
185+
evaluate_workflow "OpenShell / Helm Lint" "helm-lint.yml" "Helm Lint"

CI.md

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,15 @@ For local test commands see [TESTING.md](TESTING.md). For PR conventions see [CO
88

99
PR CI that runs on NVIDIA self-hosted runners uses NVIDIA's copy-pr-bot. The bot mirrors trusted PR commits to internal `pull-request/<N>` branches in this repository. The gated workflows trigger on pushes to those branches, not on the original PR.
1010

11-
`Branch Checks` run automatically after copy-pr-bot mirrors the PR. E2E suites are opt-in because they are more expensive and publish temporary images.
11+
`Branch Checks` run automatically after copy-pr-bot mirrors the PR. `Required CI Gates` posts PR-head statuses that verify the mirror exists, is current, and ran the expected push-based workflows. E2E suites are opt-in because they are more expensive and publish temporary images.
1212

13-
Two opt-in labels enable the suites:
13+
Three opt-in labels enable the suites:
1414

1515
- `test:e2e` runs `Branch E2E Checks` (non-GPU E2E)
1616
- `test:e2e-gpu` runs `GPU Test`
17+
- `test:e2e-kubernetes` runs `Branch Kubernetes E2E`
1718

18-
Both are required to merge once the corresponding `E2E Gate` checks are marked required in branch protection.
19+
The GitHub ruleset should require the `OpenShell / ...` statuses published by `Required CI Gates`, not the push-triggered workflow jobs directly.
1920

2021
## Commit signing
2122

@@ -65,11 +66,11 @@ Prerequisites:
6566
Flow:
6667

6768
1. Open the PR. copy-pr-bot mirrors it to `pull-request/<N>` automatically.
68-
2. The mirror push runs `Branch Checks` automatically. The first `Branch E2E Checks` / `GPU Test` run only resolves metadata and skips expensive jobs unless the matching label is already set.
69-
3. A maintainer applies `test:e2e` and/or `test:e2e-gpu`. `E2E Label Help` posts a comment with a link to the existing gated workflow run.
69+
2. The mirror push runs `Branch Checks` automatically. `Required CI Gates` keeps the PR blocked until the mirror exists, matches the PR head SHA, and the required push-based workflow succeeds. The first `Branch E2E Checks` / `GPU Test` / `Branch Kubernetes E2E` run only resolves metadata and skips expensive jobs unless the matching label is already set.
70+
3. A maintainer applies `test:e2e`, `test:e2e-gpu`, and/or `test:e2e-kubernetes`. `E2E Label Help` posts a comment with a link to the existing gated workflow run.
7071
4. The maintainer opens that link and clicks **Re-run all jobs**. This time `pr_metadata` sees the label and the build/E2E jobs run.
71-
5. When the run finishes, the `E2E Gate` check on the PR flips to green automatically.
72-
6. New commits push to the mirror automatically and re-trigger `Branch Checks` plus any labeled E2E/GPU workflows.
72+
5. When the run finishes, the matching `OpenShell / ...` gate status flips to green automatically.
73+
6. New commits push to the mirror automatically and re-trigger `Branch Checks` plus any labeled E2E/GPU/Kubernetes workflows.
7374

7475
### Forked PR
7576

@@ -82,9 +83,9 @@ Flow:
8283

8384
1. Open the PR. The vouch check confirms you are vouched (otherwise the PR is auto-closed).
8485
2. copy-pr-bot does not mirror forks automatically. A maintainer reviews the diff and comments `/ok to test <SHA>` with your latest commit SHA.
85-
3. After `/ok to test`, copy-pr-bot mirrors to `pull-request/<N>`. From here the flow is identical to internal PRs: maintainer applies the label, follows the comment from `E2E Label Help`, and re-runs the workflow.
86+
3. After `/ok to test`, copy-pr-bot mirrors to `pull-request/<N>`. From here the flow is identical to internal PRs: `Required CI Gates` verifies the mirror and required push workflows, and maintainers apply E2E labels when the extra suites are needed.
8687

87-
Important: every new commit you push requires another `/ok to test <new-SHA>` from a maintainer before E2E will run on it. If a label is applied while the mirror is stale, `E2E Label Help` will post a comment explaining what's needed.
88+
Important: every new commit you push requires another `/ok to test <new-SHA>` from a maintainer before push-based CI will run on it. If a label is applied while the mirror is stale, `E2E Label Help` will post a comment explaining what's needed.
8889

8990
## copy-pr-bot
9091

@@ -107,7 +108,23 @@ The bot's full administrator documentation is internal to NVIDIA. The only comma
107108
| `.github/workflows/branch-checks.yml` | Required non-E2E PR checks. Triggers on `push: pull-request/[0-9]+`. |
108109
| `.github/workflows/branch-e2e.yml` | Non-GPU E2E. Triggers on `push: pull-request/[0-9]+`. |
109110
| `.github/workflows/test-gpu.yml` | GPU E2E. Triggers on `push: pull-request/[0-9]+`. |
111+
| `.github/workflows/branch-kubernetes-e2e.yml` | Kubernetes E2E. Triggers on `push: pull-request/[0-9]+`. |
112+
| `.github/workflows/helm-lint.yml` | Helm chart validation. Triggers on `push: pull-request/[0-9]+` and skips lint jobs unless Helm inputs changed. |
110113
| `.github/actions/pr-gate/action.yml` | Composite action that resolves PR metadata and verifies the required label is set. |
114+
| `.github/actions/pr-merge-base/action.yml` | Composite action that resolves and fetches the merge-base commit for `pull-request/<N>` push workflows. |
115+
| `.github/workflows/required-ci-gates.yml` | Posts required PR-head statuses for push-based CI workflows. This is what branch protection should require. |
111116
| `.github/workflows/e2e-gate.yml` | Posts the required `E2E Gate` check on the PR. Re-evaluates after the gated workflow completes. |
112117
| `.github/workflows/e2e-gate-check.yml` | Reusable gate logic shared by E2E and GPU E2E. |
113118
| `.github/workflows/e2e-label-help.yml` | When a `test:e2e*` label is applied, posts a PR comment telling the maintainer the next manual step (re-run an existing workflow run, or `/ok to test <SHA>` to refresh the mirror). |
119+
120+
## Required status contexts
121+
122+
Require these statuses in the branch ruleset for push-based CI:
123+
124+
- `OpenShell / Branch Checks`
125+
- `OpenShell / E2E`
126+
- `OpenShell / GPU E2E`
127+
- `OpenShell / Kubernetes E2E`
128+
- `OpenShell / Helm Lint`
129+
130+
Do not require the underlying push workflow jobs directly. Those jobs only appear after copy-pr-bot mirrors trusted code, so they cannot independently prove that an untrusted or stale PR head was tested.

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,4 +282,4 @@ DCO sign-off is separate from cryptographic commit signing. CI requires signing
282282

283283
## CI
284284

285-
How E2E runs in CI, the `test:e2e` / `test:e2e-gpu` labels, copy-pr-bot, and commit-signing setup are documented in [CI.md](CI.md).
285+
How PR CI runs, the `test:e2e` / `test:e2e-gpu` / `test:e2e-kubernetes` labels, copy-pr-bot, and commit-signing setup are documented in [CI.md](CI.md).

0 commit comments

Comments
 (0)