Skip to content

Commit 2c6a43e

Browse files
authored
Merge branch 'main' into test_green_context_adapt_SM_split_tests_to_topology
2 parents 34ecade + 2957595 commit 2c6a43e

51 files changed

Lines changed: 2638 additions & 160 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/restricted-paths-guard.yml

Lines changed: 77 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ jobs:
2424
steps:
2525
- name: Inspect PR author signals for restricted paths
2626
env:
27-
# PR metadata inputs (author_association from event payload is
28-
# unreliable for fork PRs, so we query the collaborator API directly)
27+
# PR metadata inputs (the event payload's author_association can be
28+
# stale for fork PRs, so restricted-path PRs query the live PR API).
2929
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
3030
PR_NUMBER: ${{ github.event.pull_request.number }}
3131
PR_URL: ${{ github.event.pull_request.html_url }}
@@ -42,6 +42,8 @@ jobs:
4242
4343
COLLABORATOR_PERMISSION="not checked"
4444
COLLABORATOR_PERMISSION_API_ERROR=""
45+
AUTHOR_ASSOCIATION="not checked"
46+
AUTHOR_ASSOCIATION_API_ERROR=""
4547
4648
if ! MATCHING_RESTRICTED_PATHS=$(
4749
gh api \
@@ -68,6 +70,7 @@ jobs:
6870
echo ""
6971
echo "- **Error**: Failed to inspect the PR file list."
7072
echo "- **Author**: $PR_AUTHOR"
73+
echo "- **Author association**: $AUTHOR_ASSOCIATION"
7174
echo "- **Collaborator permission**: $COLLABORATOR_PERMISSION"
7275
echo ""
7376
echo "Please update the PR at: $PR_URL"
@@ -88,6 +91,7 @@ jobs:
8891
echo ""
8992
echo "- **Error**: Failed to inspect the current PR labels."
9093
echo "- **Author**: $PR_AUTHOR"
94+
echo "- **Author association**: $AUTHOR_ASSOCIATION"
9195
echo "- **Collaborator permission**: $COLLABORATOR_PERMISSION"
9296
echo ""
9397
echo "Please update the PR at: $PR_URL"
@@ -114,6 +118,13 @@ jobs:
114118
echo '```'
115119
}
116120
121+
write_author_association_api_error() {
122+
echo "- **Author association API error**:"
123+
echo '```text'
124+
printf '%s\n' "$AUTHOR_ASSOCIATION_API_ERROR"
125+
echo '```'
126+
}
127+
117128
post_review_label_comment() {
118129
local comment_body
119130
printf -v comment_body '%s\n\n%s\n' \
@@ -135,46 +146,87 @@ jobs:
135146
COMMENT_ACTION="not needed"
136147
137148
if [ "$TOUCHES_RESTRICTED_PATHS" = "true" ]; then
138-
# Distinguish a legitimate 404 "not a collaborator" response from
139-
# actual API failures. The former is an expected untrusted case;
140-
# the latter fails the workflow so it can be rerun later.
141-
if COLLABORATOR_PERMISSION_RESPONSE=$(
142-
gh api "repos/$REPO/collaborators/$PR_AUTHOR/permission" \
143-
--jq '.permission' 2>&1
149+
if AUTHOR_ASSOCIATION_RESPONSE=$(
150+
gh api "repos/$REPO/pulls/$PR_NUMBER" \
151+
--jq '.author_association // "NONE"' 2>&1
144152
); then
145-
COLLABORATOR_PERMISSION="$COLLABORATOR_PERMISSION_RESPONSE"
146-
elif [[ "$COLLABORATOR_PERMISSION_RESPONSE" == *"(HTTP 404)"* ]]; then
147-
COLLABORATOR_PERMISSION="none"
153+
AUTHOR_ASSOCIATION="$AUTHOR_ASSOCIATION_RESPONSE"
148154
else
149-
COLLABORATOR_PERMISSION="unknown"
150-
COLLABORATOR_PERMISSION_API_ERROR="$COLLABORATOR_PERMISSION_RESPONSE"
151-
echo "::error::Failed to inspect collaborator permission for $PR_AUTHOR."
155+
AUTHOR_ASSOCIATION="unknown"
156+
AUTHOR_ASSOCIATION_API_ERROR="$AUTHOR_ASSOCIATION_RESPONSE"
157+
echo "::error::Failed to inspect live author association for PR #$PR_NUMBER."
152158
{
153159
echo "## Restricted Paths Guard Failed"
154160
echo ""
155-
echo "- **Error**: Failed to inspect collaborator permission."
161+
echo "- **Error**: Failed to inspect live author association."
156162
echo "- **Author**: $PR_AUTHOR"
157-
echo "- **Collaborator permission**: $COLLABORATOR_PERMISSION"
163+
echo "- **Author association**: $AUTHOR_ASSOCIATION"
158164
echo ""
159165
write_matching_restricted_paths
160166
echo ""
161-
write_collaborator_permission_api_error
167+
write_author_association_api_error
162168
echo ""
163-
echo "Please retry this workflow. If the failure persists, inspect the collaborator permission API error above."
169+
echo "Please retry this workflow. If the failure persists, inspect the author association API error above."
164170
} >> "$GITHUB_STEP_SUMMARY"
165171
exit 1
166172
fi
167173
168-
case "$COLLABORATOR_PERMISSION" in
169-
admin|maintain|write|triage|read)
174+
case "$AUTHOR_ASSOCIATION" in
175+
MEMBER|OWNER)
170176
HAS_TRUSTED_SIGNAL=true
171-
LABEL_ACTION="not needed (collaborator permission is a trusted signal)"
172-
TRUSTED_SIGNALS="collaborator_permission:$COLLABORATOR_PERMISSION"
177+
LABEL_ACTION="not needed (live author association is a trusted signal)"
178+
TRUSTED_SIGNALS="author_association:$AUTHOR_ASSOCIATION"
173179
;;
174180
*)
175-
# none: not a trusted signal
181+
# COLLABORATOR can still be too broad for this policy; use the
182+
# collaborator permission API below for repo-level trust.
176183
;;
177184
esac
185+
186+
# Distinguish a legitimate 404 "not a collaborator" response from
187+
# actual API failures. The former is an expected untrusted case;
188+
# the latter fails the workflow so it can be rerun later.
189+
if [ "$HAS_TRUSTED_SIGNAL" = "false" ]; then
190+
if COLLABORATOR_PERMISSION_RESPONSE=$(
191+
gh api "repos/$REPO/collaborators/$PR_AUTHOR/permission" \
192+
--jq '.permission' 2>&1
193+
); then
194+
COLLABORATOR_PERMISSION="$COLLABORATOR_PERMISSION_RESPONSE"
195+
elif [[ "$COLLABORATOR_PERMISSION_RESPONSE" == *"(HTTP 404)"* ]]; then
196+
COLLABORATOR_PERMISSION="none"
197+
else
198+
COLLABORATOR_PERMISSION="unknown"
199+
COLLABORATOR_PERMISSION_API_ERROR="$COLLABORATOR_PERMISSION_RESPONSE"
200+
echo "::error::Failed to inspect collaborator permission for $PR_AUTHOR."
201+
{
202+
echo "## Restricted Paths Guard Failed"
203+
echo ""
204+
echo "- **Error**: Failed to inspect collaborator permission."
205+
echo "- **Author**: $PR_AUTHOR"
206+
echo "- **Author association**: $AUTHOR_ASSOCIATION"
207+
echo "- **Collaborator permission**: $COLLABORATOR_PERMISSION"
208+
echo ""
209+
write_matching_restricted_paths
210+
echo ""
211+
write_collaborator_permission_api_error
212+
echo ""
213+
echo "Please retry this workflow. If the failure persists, inspect the collaborator permission API error above."
214+
} >> "$GITHUB_STEP_SUMMARY"
215+
exit 1
216+
fi
217+
218+
case "$COLLABORATOR_PERMISSION" in
219+
admin|maintain|write|triage)
220+
HAS_TRUSTED_SIGNAL=true
221+
LABEL_ACTION="not needed (collaborator permission is a trusted signal)"
222+
TRUSTED_SIGNALS="collaborator_permission:$COLLABORATOR_PERMISSION"
223+
;;
224+
*)
225+
# read or none: not a trusted signal. In a public repo, read
226+
# can be the effective permission for any GitHub user.
227+
;;
228+
esac
229+
fi
178230
fi
179231
180232
NEEDS_REVIEW_LABEL=false
@@ -197,6 +249,7 @@ jobs:
197249
echo ""
198250
echo "- **Error**: Failed to add the \`$REVIEW_LABEL\` label."
199251
echo "- **Author**: $PR_AUTHOR"
252+
echo "- **Author association**: $AUTHOR_ASSOCIATION"
200253
echo "- **Collaborator permission**: $COLLABORATOR_PERMISSION"
201254
echo ""
202255
write_matching_restricted_paths
@@ -216,6 +269,7 @@ jobs:
216269
echo "## Restricted Paths Guard Completed"
217270
echo ""
218271
echo "- **Author**: $PR_AUTHOR"
272+
echo "- **Author association**: $AUTHOR_ASSOCIATION"
219273
echo "- **Collaborator permission**: $COLLABORATOR_PERMISSION"
220274
echo "- **Touches restricted paths**: $TOUCHES_RESTRICTED_PATHS"
221275
echo "- **Restricted paths**: \`cuda_bindings/\`, \`cuda_python/\`"

ci/tools/setup-sanitizer

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env bash
22

3-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
44
#
55
# SPDX-License-Identifier: Apache-2.0
66

@@ -12,7 +12,15 @@ set -euo pipefail
1212
if [[ "${SETUP_SANITIZER}" == 1 ]]; then
1313
COMPUTE_SANITIZER="${CUDA_HOME}/bin/compute-sanitizer"
1414
COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g')
15-
SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck --error-exitcode=1 --report-api-errors=no"
15+
# --target-processes=application-only: attach the sanitizer to the parent
16+
# pytest process only. Spawned multiprocessing.Process children run without
17+
# the sanitizer. This aims to mitigate a class of CI hangs where child
18+
# processes take an extreme amount of time to spawn (>30 seconds). Test bugs
19+
# triggered by that specific condition are typically uncovered only in CI,
20+
# where they become emergencies and are difficult to debug. The parent
21+
# process is still fully sanitized, which is where most of the interesting
22+
# host-side IPC plumbing runs anyway.
23+
SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=application-only --launch-timeout=0 --tool=memcheck --error-exitcode=1 --report-api-errors=no"
1624
if [[ "$COMPUTE_SANITIZER_VERSION" -ge 202111 ]]; then
1725
SANITIZER_CMD="${SANITIZER_CMD} --padding=32"
1826
fi

0 commit comments

Comments
 (0)