Skip to content

Commit e3b0c71

Browse files
leofangclaude
andcommitted
Add 2-GPU runners: arm64 l4×2 nightly + Windows amd64 special runners
arm64 l4×2 runners are restricted to nightly-only use per the runner team (ARM64 L4 capacity concerns). Add them as nightly-standard entries in ci-nightly.yml so they run the standard test suite against wheels from the latest successful main CI run. Windows amd64 2-GPU runners (t4×2 TCC, h100×2 MCDM) are added as special runners in the regular PR CI matrix, mirroring the existing Linux amd64 2-GPU special runners. Also update the Windows test job name to show GPU count (x2) for multi-GPU entries, matching the Linux job name format. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent ad5001b commit e3b0c71

3 files changed

Lines changed: 34 additions & 4 deletions

File tree

.github/workflows/ci-nightly.yml

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
# Nightly CI pipeline that tests optional dependencies (PyTorch, numba-cuda)
6-
# against the latest cuda-python wheels built on main.
6+
# against the latest cuda-python wheels built on main, and runs the standard
7+
# test suite on runners reserved for nightly-only use (e.g. arm64 l4×2).
78
#
89
# This workflow does NOT build wheels — it downloads them from the latest
9-
# successful CI run on main and runs integration tests with optional deps.
10+
# successful CI run on main and runs integration/standard tests.
1011

1112
name: "CI: Nightly optional-deps"
1213

@@ -191,6 +192,26 @@ jobs:
191192
test-mode: nightly-numba-cuda
192193
matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))'
193194

195+
# ── Standard tests on nightly-only runners ──
196+
197+
test-standard-linux-aarch64:
198+
name: "Nightly standard (linux-aarch64)"
199+
if: ${{ github.repository_owner == 'nvidia' }}
200+
needs: find-wheels
201+
permissions:
202+
contents: read
203+
actions: read
204+
secrets: inherit
205+
uses: ./.github/workflows/test-wheel-linux.yml
206+
with:
207+
build-type: nightly
208+
host-platform: linux-aarch64
209+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
210+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
211+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
212+
test-mode: standard
213+
matrix_filter: 'map(select(.MODE == "nightly-standard"))'
214+
194215
# ── Status check ──
195216

196217
checks:
@@ -205,6 +226,7 @@ jobs:
205226
- test-numba-cuda-linux-64
206227
- test-numba-cuda-linux-aarch64
207228
- test-numba-cuda-windows
229+
- test-standard-linux-aarch64
208230
steps:
209231
- name: Exit
210232
run: |
@@ -227,7 +249,9 @@ jobs:
227249
needs.test-numba-cuda-linux-aarch64.result == 'cancelled' ||
228250
needs.test-numba-cuda-linux-aarch64.result == 'failure' ||
229251
needs.test-numba-cuda-windows.result == 'cancelled' ||
230-
needs.test-numba-cuda-windows.result == 'failure' }}; then
252+
needs.test-numba-cuda-windows.result == 'failure' ||
253+
needs.test-standard-linux-aarch64.result == 'cancelled' ||
254+
needs.test-standard-linux-aarch64.result == 'failure' }}; then
231255
exit 1
232256
fi
233257
exit 0

.github/workflows/test-wheel-windows.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ jobs:
8787
echo "MATRIX=${MATRIX}" | tee --append "${GITHUB_OUTPUT}"
8888
8989
test:
90-
name: Python ${{ matrix.PY_VER }}, CUDA ${{ matrix.CUDA_VER }} (${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}), GPU ${{ matrix.GPU }} (${{ matrix.DRIVER_MODE }})${{ matrix.TORCH_VER && format(', {0}', matrix.TORCH_VER) || '' }}${{ matrix.MODE == 'nightly-numba-cuda' && ', latest' || '' }}
90+
name: Python ${{ matrix.PY_VER }}, CUDA ${{ matrix.CUDA_VER }} (${{ (matrix.LOCAL_CTK == '1' && 'local') || 'wheels' }}), GPU ${{ matrix.GPU }}${{ matrix.GPU_COUNT != '1' && format(' (x{0})', matrix.GPU_COUNT) || '' }} (${{ matrix.DRIVER_MODE }})${{ matrix.TORCH_VER && format(', {0}', matrix.TORCH_VER) || '' }}${{ matrix.MODE == 'nightly-numba-cuda' && ', latest' || '' }}
9191
# The build stage could fail but we want the CI to keep moving.
9292
needs: compute-matrix
9393
strategy:

ci/test-matrix.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ linux:
7777
- { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
7878
- { MODE: 'nightly-numba-cuda', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
7979
- { MODE: 'nightly-numba-cuda', ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
80+
# nightly-standard (arm64 l4×2 — nightly-only per runner team request)
81+
- { MODE: 'nightly-standard', ARCH: 'arm64', PY_VER: '3.14', CUDA_VER: '13.2.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest' }
82+
- { MODE: 'nightly-standard', ARCH: 'arm64', PY_VER: '3.14t', CUDA_VER: '13.2.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '2', DRIVER: 'latest' }
8083

8184
windows:
8285
pull-request:
@@ -99,6 +102,9 @@ windows:
99102
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
100103
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
101104
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
105+
# special runners
106+
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.2.1', LOCAL_CTK: '1', GPU: 't4', GPU_COUNT: '2', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
107+
- { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'h100', GPU_COUNT: '2', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
102108
nightly:
103109
# nightly-pytorch
104110
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.11.0', TORCH_CUDA: 'cu126' }

0 commit comments

Comments
 (0)