Skip to content

Commit d296dbb

Browse files
committed
Merge remote-tracking branch 'upstream/main' into pyi-experiment
2 parents 442d212 + 92994e4 commit d296dbb

76 files changed

Lines changed: 7456 additions & 1295 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/actions/fetch_ctk/action.yml

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ inputs:
1414
cuda-components:
1515
description: "A list of the CTK components to install as a comma-separated list. e.g. 'cuda_nvcc,cuda_nvrtc,cuda_cudart'"
1616
required: false
17-
default: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_profiler_api,cuda_cccl,cuda_cupti,libnvjitlink,libcufile,libnvfatbin"
17+
default: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_profiler_api,cuda_cccl,cuda_cupti,libnvjitlink,libcufile,libnvfatbin,libcudla"
1818
cuda-path:
1919
description: "where the CTK components will be installed to, relative to $PWD"
2020
required: false
@@ -27,24 +27,15 @@ runs:
2727
shell: bash --noprofile --norc -xeuo pipefail {0}
2828
run: |
2929
# Pre-process the component list to ensure hash uniqueness
30+
# Use the runtime workspace mount so this also works inside container jobs.
31+
CTK_REDIST_TOOL="${GITHUB_WORKSPACE}/ci/tools/fetch_ctk_redistrib.py"
3032
CTK_CACHE_COMPONENTS=${{ inputs.cuda-components }}
31-
# Conditionally strip out libnvjitlink for CUDA versions < 12
32-
CUDA_MAJOR_VER="$(cut -d '.' -f 1 <<< ${{ inputs.cuda-version }})"
33-
if [[ "$CUDA_MAJOR_VER" -lt 12 ]]; then
34-
CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//libnvjitlink/}"
35-
fi
36-
# Conditionally strip out cuda_crt and libnvvm for CUDA versions < 13
37-
CUDA_MAJOR_VER="$(cut -d '.' -f 1 <<< ${{ inputs.cuda-version }})"
38-
if [[ "$CUDA_MAJOR_VER" -lt 13 ]]; then
39-
CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//cuda_crt/}"
40-
CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//libnvvm/}"
41-
fi
42-
# Conditionally strip out libcufile since it does not support Windows
43-
if [[ "${{ inputs.host-platform }}" == win-* ]]; then
44-
CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//libcufile/}"
45-
fi
46-
# Cleanup stray commas after removing components
47-
CTK_CACHE_COMPONENTS="${CTK_CACHE_COMPONENTS//,,/,}"
33+
CTK_JSON_URL="https://developer.download.nvidia.com/compute/cuda/redist/redistrib_${{ inputs.cuda-version }}.json"
34+
CTK_CACHE_COMPONENTS="$(python "$CTK_REDIST_TOOL" filter-components \
35+
--host-platform "${{ inputs.host-platform }}" \
36+
--cuda-version "${{ inputs.cuda-version }}" \
37+
--components "$CTK_CACHE_COMPONENTS" \
38+
--metadata-url "$CTK_JSON_URL")"
4839
4940
HASH=$(echo -n "${CTK_CACHE_COMPONENTS}" | sha256sum | awk '{print $1}')
5041
echo "CTK_CACHE_KEY=mini-ctk-${{ inputs.cuda-version }}-${{ inputs.host-platform }}-$HASH" >> $GITHUB_ENV
@@ -78,19 +69,17 @@ runs:
7869
mkdir $CACHE_TMP_DIR
7970
8071
# The binary archives (redist) are guaranteed to be updated as part of the release posting.
72+
# Use the runtime workspace mount so this also works inside container jobs.
73+
CTK_REDIST_TOOL="${GITHUB_WORKSPACE}/ci/tools/fetch_ctk_redistrib.py"
8174
CTK_BASE_URL="https://developer.download.nvidia.com/compute/cuda/redist/"
8275
CTK_JSON_URL="$CTK_BASE_URL/redistrib_${{ inputs.cuda-version }}.json"
76+
CTK_JSON_FILE="$CACHE_TMP_DIR/redistrib.json"
77+
curl -LSs "$CTK_JSON_URL" -o "$CTK_JSON_FILE"
8378
if [[ "${{ inputs.host-platform }}" == linux* ]]; then
84-
if [[ "${{ inputs.host-platform }}" == "linux-64" ]]; then
85-
CTK_SUBDIR="linux-x86_64"
86-
elif [[ "${{ inputs.host-platform }}" == "linux-aarch64" ]]; then
87-
CTK_SUBDIR="linux-sbsa"
88-
fi
8979
function extract() {
9080
tar -xvf $1 -C $CACHE_TMP_DIR --strip-components=1
9181
}
9282
elif [[ "${{ inputs.host-platform }}" == "win-64" ]]; then
93-
CTK_SUBDIR="windows-x86_64"
9483
function extract() {
9584
_TEMP_DIR_=$(mktemp -d)
9685
unzip $1 -d $_TEMP_DIR_
@@ -106,8 +95,10 @@ runs:
10695
curl -LSs $1 -o $2
10796
}
10897
CTK_COMPONENT=$1
109-
CTK_COMPONENT_REL_PATH="$(curl -s $CTK_JSON_URL |
110-
python -c "import sys, json; print(json.load(sys.stdin)['${CTK_COMPONENT}']['${CTK_SUBDIR}']['relative_path'])")"
98+
CTK_COMPONENT_REL_PATH="$(python "$CTK_REDIST_TOOL" component-relative-path \
99+
--host-platform "${{ inputs.host-platform }}" \
100+
--component "$CTK_COMPONENT" \
101+
--metadata-path "$CTK_JSON_FILE")"
111102
CTK_COMPONENT_URL="${CTK_BASE_URL}/${CTK_COMPONENT_REL_PATH}"
112103
CTK_COMPONENT_COMPONENT_FILENAME="$(basename $CTK_COMPONENT_REL_PATH)"
113104
download $CTK_COMPONENT_URL $CTK_COMPONENT_COMPONENT_FILENAME

.github/actions/sccache-summary/action.yml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ name: sccache summary
66
description: Parse sccache stats JSON and write a summary table to GITHUB_STEP_SUMMARY
77

88
# Inspired by NVIDIA/cccl's prepare-execution-summary.py (PR #3621).
9-
# Only counts C/C++ and CUDA language hits (excludes PTX/CUBIN which are
10-
# not included in sccache's compile_requests counter).
119

1210
inputs:
1311
json-file:
@@ -47,10 +45,11 @@ runs:
4745
with open(json_file) as f:
4846
stats = json.load(f)["stats"]
4947
50-
# compile_requests includes non-compilation calls (linker, etc).
51-
# Use cache_hits + cache_misses as the denominator to match sccache's
52-
# own "Cache hits rate" which only counts actual compilation requests.
53-
counted_languages = {"C/C++", "CUDA"}
48+
# compile_requests only counts top-level nvcc invocations, but each
49+
# invocation spawns sub-tool compilations (cudafe++, cicc, ptxas) that
50+
# sccache tracks under separate language keys. Count all of them so
51+
# the reported rate matches sccache's own "Cache hits rate".
52+
counted_languages = {"C/C++", "CUDA", "CUDA (Device code)", "PTX", "CUBIN"}
5453
hits = sum(
5554
v for k, v in stats.get("cache_hits", {}).get("counts", {}).items()
5655
if k in counted_languages

.github/workflows/build-wheel.yml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -401,11 +401,7 @@ jobs:
401401
402402
OLD_BRANCH=$(yq '.backport_branch' ci/versions.yml)
403403
OLD_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda*-${{ inputs.host-platform }}*"
404-
LATEST_PRIOR_RUN_ID=$(gh run list -b ${OLD_BRANCH} -L 1 -w "ci.yml" -s success -R NVIDIA/cuda-python --json databaseId | jq '.[]| .databaseId')
405-
if [[ "$LATEST_PRIOR_RUN_ID" == "" ]]; then
406-
echo "LATEST_PRIOR_RUN_ID not found!"
407-
exit 1
408-
fi
404+
LATEST_PRIOR_RUN_ID=$(./ci/tools/lookup-run-id --branch "${OLD_BRANCH}" NVIDIA/cuda-python "CI")
409405
410406
gh run download $LATEST_PRIOR_RUN_ID -p ${OLD_BASENAME} -R NVIDIA/cuda-python
411407
rm -rf ${OLD_BASENAME}-tests # exclude cython test artifacts

.github/workflows/ci-nightly.yml

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# Nightly CI pipeline that tests optional dependencies (PyTorch, numba-cuda)
6+
# against the latest cuda-python wheels built on main, and runs the standard
7+
# test suite on runners reserved for nightly-only use (e.g. arm64 l4×2).
8+
#
9+
# This workflow does NOT build wheels — it downloads them from the latest
10+
# successful CI run on main and runs integration/standard tests.
11+
12+
name: "CI: Nightly optional-deps"
13+
14+
concurrency:
15+
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
16+
cancel-in-progress: true
17+
18+
on:
19+
schedule:
20+
# 2:17 AM UTC daily, after the midnight main CI build finishes.
21+
# Avoid minute 0 because GitHub documents high scheduled-workflow load
22+
# at the start of every hour, where queued jobs may be delayed or dropped.
23+
- cron: "17 2 * * *"
24+
workflow_dispatch:
25+
inputs:
26+
run-id:
27+
description: >
28+
Override the CI run ID to download artifacts from.
29+
Leave empty to auto-detect the latest successful main run.
30+
type: string
31+
default: ''
32+
33+
jobs:
34+
find-wheels:
35+
runs-on: ubuntu-latest
36+
outputs:
37+
RUN_ID: ${{ steps.find.outputs.run_id }}
38+
HEAD_SHA: ${{ steps.find.outputs.head_sha }}
39+
CUDA_BUILD_VER: ${{ steps.find.outputs.cuda_build_ver }}
40+
steps:
41+
- name: Checkout repository
42+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
43+
with:
44+
fetch-depth: 1
45+
46+
- name: Find latest successful CI run on main
47+
id: find
48+
env:
49+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
50+
run: |
51+
if [[ -n "${{ inputs.run-id }}" ]]; then
52+
RUN_ID="${{ inputs.run-id }}"
53+
HEAD_SHA=$(gh run view "$RUN_ID" \
54+
-R "${{ github.repository }}" \
55+
--json headSha | jq -r '.headSha')
56+
else
57+
# lookup-run-id --branch --head-sha prints two lines: run_id then head_sha
58+
OUTPUT=$(./ci/tools/lookup-run-id --branch main --head-sha "${{ github.repository }}" "CI")
59+
RUN_ID=$(echo "$OUTPUT" | sed -n '1p')
60+
HEAD_SHA=$(echo "$OUTPUT" | sed -n '2p')
61+
fi
62+
63+
if [[ -z "$HEAD_SHA" || "$HEAD_SHA" == "null" ]]; then
64+
echo "::error::Could not resolve head SHA for CI run $RUN_ID"
65+
exit 1
66+
fi
67+
68+
CUDA_BUILD_VER=$(gh api \
69+
"repos/${{ github.repository }}/contents/ci/versions.yml?ref=$HEAD_SHA" \
70+
--jq '.content' \
71+
| base64 -d \
72+
| yq '.cuda.build.version')
73+
74+
if [[ -z "$CUDA_BUILD_VER" || "$CUDA_BUILD_VER" == "null" ]]; then
75+
echo "::error::Could not resolve CUDA build version from $HEAD_SHA"
76+
exit 1
77+
fi
78+
79+
echo "run_id=$RUN_ID" >> $GITHUB_OUTPUT
80+
echo "head_sha=$HEAD_SHA" >> $GITHUB_OUTPUT
81+
echo "cuda_build_ver=$CUDA_BUILD_VER" >> $GITHUB_OUTPUT
82+
83+
# ── PyTorch interop tests ──
84+
85+
test-pytorch-linux:
86+
name: "Nightly PyTorch (linux-64)"
87+
if: ${{ github.repository_owner == 'nvidia' }}
88+
needs: find-wheels
89+
permissions:
90+
contents: read
91+
actions: read
92+
secrets: inherit
93+
uses: ./.github/workflows/test-wheel-linux.yml
94+
with:
95+
build-type: nightly
96+
host-platform: linux-64
97+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
98+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
99+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
100+
test-mode: nightly-pytorch
101+
matrix_filter: 'map(select(.MODE == "nightly-pytorch"))'
102+
103+
test-pytorch-linux-aarch64:
104+
name: "Nightly PyTorch (linux-aarch64)"
105+
if: ${{ github.repository_owner == 'nvidia' }}
106+
needs: find-wheels
107+
permissions:
108+
contents: read
109+
actions: read
110+
secrets: inherit
111+
uses: ./.github/workflows/test-wheel-linux.yml
112+
with:
113+
build-type: nightly
114+
host-platform: linux-aarch64
115+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
116+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
117+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
118+
test-mode: nightly-pytorch
119+
matrix_filter: 'map(select(.MODE == "nightly-pytorch"))'
120+
121+
test-pytorch-windows:
122+
name: "Nightly PyTorch (win-64)"
123+
if: ${{ github.repository_owner == 'nvidia' }}
124+
needs: find-wheels
125+
permissions:
126+
contents: read
127+
actions: read
128+
secrets: inherit
129+
uses: ./.github/workflows/test-wheel-windows.yml
130+
with:
131+
build-type: nightly
132+
host-platform: win-64
133+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
134+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
135+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
136+
test-mode: nightly-pytorch
137+
matrix_filter: 'map(select(.MODE == "nightly-pytorch"))'
138+
139+
# ── numba-cuda tests ──
140+
141+
test-numba-cuda-linux-64:
142+
name: "Nightly numba-cuda (linux-64)"
143+
if: ${{ github.repository_owner == 'nvidia' }}
144+
needs: find-wheels
145+
permissions:
146+
contents: read
147+
actions: read
148+
secrets: inherit
149+
uses: ./.github/workflows/test-wheel-linux.yml
150+
with:
151+
build-type: nightly
152+
host-platform: linux-64
153+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
154+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
155+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
156+
test-mode: nightly-numba-cuda
157+
matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))'
158+
159+
test-numba-cuda-linux-aarch64:
160+
name: "Nightly numba-cuda (linux-aarch64)"
161+
if: ${{ github.repository_owner == 'nvidia' }}
162+
needs: find-wheels
163+
permissions:
164+
contents: read
165+
actions: read
166+
secrets: inherit
167+
uses: ./.github/workflows/test-wheel-linux.yml
168+
with:
169+
build-type: nightly
170+
host-platform: linux-aarch64
171+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
172+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
173+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
174+
test-mode: nightly-numba-cuda
175+
matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))'
176+
177+
test-numba-cuda-windows:
178+
name: "Nightly numba-cuda (win-64)"
179+
if: ${{ github.repository_owner == 'nvidia' }}
180+
needs: find-wheels
181+
permissions:
182+
contents: read
183+
actions: read
184+
secrets: inherit
185+
uses: ./.github/workflows/test-wheel-windows.yml
186+
with:
187+
build-type: nightly
188+
host-platform: win-64
189+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
190+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
191+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
192+
test-mode: nightly-numba-cuda
193+
matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))'
194+
195+
# ── Standard tests on nightly-only runners ──
196+
197+
test-standard-linux-aarch64:
198+
name: "Nightly standard (linux-aarch64)"
199+
if: ${{ github.repository_owner == 'nvidia' }}
200+
needs: find-wheels
201+
permissions:
202+
contents: read
203+
actions: read
204+
secrets: inherit
205+
uses: ./.github/workflows/test-wheel-linux.yml
206+
with:
207+
build-type: nightly
208+
host-platform: linux-aarch64
209+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
210+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
211+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
212+
test-mode: standard
213+
matrix_filter: 'map(select(.MODE == "nightly-standard"))'
214+
215+
# ── Status check ──
216+
217+
checks:
218+
name: Nightly check status
219+
if: always()
220+
runs-on: ubuntu-latest
221+
needs:
222+
- find-wheels
223+
- test-pytorch-linux
224+
- test-pytorch-linux-aarch64
225+
- test-pytorch-windows
226+
- test-numba-cuda-linux-64
227+
- test-numba-cuda-linux-aarch64
228+
- test-numba-cuda-windows
229+
- test-standard-linux-aarch64
230+
steps:
231+
- name: Exit
232+
run: |
233+
# If any dependency was cancelled or failed, that's a failure.
234+
#
235+
# See ci.yml for the full rationale on why we must use always()
236+
# and explicitly check each result rather than relying on the
237+
# default behaviour.
238+
if ${{ needs.find-wheels.result != 'success' }}; then
239+
exit 1
240+
fi
241+
if ${{ needs.test-pytorch-linux.result == 'cancelled' ||
242+
needs.test-pytorch-linux.result == 'failure' ||
243+
needs.test-pytorch-linux-aarch64.result == 'cancelled' ||
244+
needs.test-pytorch-linux-aarch64.result == 'failure' ||
245+
needs.test-pytorch-windows.result == 'cancelled' ||
246+
needs.test-pytorch-windows.result == 'failure' ||
247+
needs.test-numba-cuda-linux-64.result == 'cancelled' ||
248+
needs.test-numba-cuda-linux-64.result == 'failure' ||
249+
needs.test-numba-cuda-linux-aarch64.result == 'cancelled' ||
250+
needs.test-numba-cuda-linux-aarch64.result == 'failure' ||
251+
needs.test-numba-cuda-windows.result == 'cancelled' ||
252+
needs.test-numba-cuda-windows.result == 'failure' ||
253+
needs.test-standard-linux-aarch64.result == 'cancelled' ||
254+
needs.test-standard-linux-aarch64.result == 'failure' }}; then
255+
exit 1
256+
fi
257+
exit 0

0 commit comments

Comments
 (0)