Skip to content

Commit db55339

Browse files
committed
Merge branch 'main' into rluo8→fix/ipc-peer-access-skip
2 parents 55f4997 + 92994e4 commit db55339

10 files changed

Lines changed: 50 additions & 19 deletions

File tree

cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ cdef int _cuPythonInit() except -1 nogil:
519519
cdef char libPath[260]
520520

521521
with gil, __symbol_lock:
522-
usePTDS = os.getenv('CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM', default=0)
522+
usePTDS = bool(int(os.getenv('CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM', default=0)))
523523

524524
# Load library
525525
libPath[0] = 0

cuda_bindings/cuda/bindings/_bindings/cyruntime.pyx.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ cdef int _cudaPythonInit() except -1 nogil:
1515
global __usePTDS
1616

1717
with gil:
18-
__usePTDS = os.getenv('CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM', default=False)
18+
__usePTDS = bool(int(os.getenv('CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM', default=0)))
1919
__cudaPythonInit = True
2020
return __usePTDS
2121

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
.. SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
.. SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
3+
4+
.. module:: cuda.bindings
5+
6+
``cuda-bindings`` 13.2.1 Release notes
7+
======================================
8+
9+
Bugfixes
10+
--------
11+
12+
* Per-thread default stream mode would be used whenever the
13+
``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` environment variable was set,
14+
even if it was set to ``0``.

cuda_bindings/tests/nvml/test_device.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from functools import cache
66

7+
import numpy as np
78
import pytest
89

910
from cuda.bindings import nvml
@@ -78,7 +79,7 @@ def test_get_nv_link_supported_bw_modes(all_devices):
7879
assert not hasattr(modes, "total_bw_modes")
7980

8081
for mode in modes.bw_modes:
81-
assert isinstance(mode, int)
82+
assert isinstance(mode, np.uint8)
8283

8384

8485
def test_device_get_pdi(all_devices):

cuda_core/cuda/core/_utils/enum_explanations_helpers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,11 @@
3535
def _binding_version() -> tuple[int, int, int]:
3636
"""Return the installed ``cuda-bindings`` version, or a conservative old value."""
3737
try:
38-
parts = importlib.metadata.version("cuda-bindings").split(".")[:3]
38+
version = importlib.metadata.version("cuda-bindings")
3939
except importlib.metadata.PackageNotFoundError:
4040
return (0, 0, 0) # For very old versions of cuda-python
41+
42+
parts = version.partition("+")[0].split(".")[:3]
4143
return tuple(int(v) for v in parts)
4244

4345

cuda_core/cuda/core/utils/_program_cache/_file_stream.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -422,11 +422,17 @@ def _path_for_key(self, key: object) -> Path:
422422
k = _as_key_bytes(key)
423423
# Hash the key to a fixed-length identifier so arbitrary-length user
424424
# keys never exceed per-component filename limits (typically 255 on
425-
# ext4 / NTFS). With a 256-bit blake2b digest, the cache relies on
426-
# cryptographic collision resistance for key uniqueness -- two
427-
# distinct keys hashing to the same path is astronomically unlikely
428-
# (~2^-128 with the 32-byte digest in use here).
429-
digest = hashlib.blake2b(k, digest_size=32).hexdigest()
425+
# ext4 / NTFS).
426+
#
427+
# FIPS: must use a FIPS-approved hash algorithm. FIPS-enforcing
428+
# systems can disable non-approved hashlib algorithms (for example
429+
# blake2b) at the OpenSSL level. See #2043.
430+
#
431+
# With a 256-bit SHA-256 digest, the cache relies on collision
432+
# resistance for key uniqueness -- two distinct keys hashing to the
433+
# same path is astronomically unlikely (~2^128 practical collision
434+
# work).
435+
digest = hashlib.sha256(k, usedforsecurity=False).hexdigest()
430436
return self._entries / digest[:2] / digest[2:]
431437

432438
# -- mapping API ---------------------------------------------------------

cuda_core/cuda/core/utils/_program_cache/_keys.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
)
3636

3737
# Bump when the key schema changes in a way that invalidates existing caches.
38-
_KEY_SCHEMA_VERSION = 1
38+
_KEY_SCHEMA_VERSION = 2
3939

4040
_VALID_CODE_TYPES = frozenset({"c++", "ptx", "nvvm"})
4141
_VALID_TARGET_TYPES = frozenset({"ptx", "cubin", "ltoir"})
@@ -768,7 +768,10 @@ def make_program_cache_key(
768768
option_bytes = backend.option_fingerprint(options, target_type)
769769
name_tags = backend.encode_name_expressions(name_expressions)
770770

771-
hasher = hashlib.blake2b(digest_size=32)
771+
# IMPORTANT: Must use a FIPS-approved hash algorithm (SHA-2 family).
772+
# FIPS-enforcing systems can disable non-approved hashlib algorithms
773+
# (for example blake2b) at the OpenSSL level. See #2043.
774+
hasher = hashlib.sha256(usedforsecurity=False)
772775

773776
def _update(label: str, payload: bytes) -> None:
774777
hasher.update(label.encode("ascii"))

cuda_core/tests/test_memory.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1474,11 +1474,11 @@ def test_pinned_mr_numa_id_default_no_ipc(init_cuda):
14741474
device = Device()
14751475
skip_if_pinned_memory_unsupported(device)
14761476

1477-
mr = PinnedMemoryResource(PinnedMemoryResourceOptions())
1477+
mr = create_pinned_memory_resource_or_xfail(PinnedMemoryResourceOptions(), xfail_device=device)
14781478
assert mr.numa_id == -1
14791479
mr.close()
14801480

1481-
mr = PinnedMemoryResource(PinnedMemoryResourceOptions(ipc_enabled=False))
1481+
mr = create_pinned_memory_resource_or_xfail(PinnedMemoryResourceOptions(ipc_enabled=False), xfail_device=device)
14821482
assert mr.numa_id == -1
14831483
mr.close()
14841484

@@ -1497,7 +1497,9 @@ def test_pinned_mr_numa_id_default_with_ipc(init_cuda):
14971497
if expected_numa_id < 0:
14981498
pytest.skip("System does not support NUMA")
14991499

1500-
mr = PinnedMemoryResource(PinnedMemoryResourceOptions(ipc_enabled=True, max_size=POOL_SIZE))
1500+
mr = create_pinned_memory_resource_or_xfail(
1501+
PinnedMemoryResourceOptions(ipc_enabled=True, max_size=POOL_SIZE), xfail_device=device
1502+
)
15011503
assert mr.numa_id == expected_numa_id
15021504
mr.close()
15031505

@@ -1511,7 +1513,7 @@ def test_pinned_mr_numa_id_explicit(init_cuda):
15111513
if host_numa_id < 0:
15121514
pytest.skip("System does not support NUMA")
15131515

1514-
mr = PinnedMemoryResource(PinnedMemoryResourceOptions(numa_id=host_numa_id))
1516+
mr = create_pinned_memory_resource_or_xfail(PinnedMemoryResourceOptions(numa_id=host_numa_id), xfail_device=device)
15151517
assert mr.numa_id == host_numa_id
15161518
mr.close()
15171519

@@ -1520,7 +1522,10 @@ def test_pinned_mr_numa_id_explicit(init_cuda):
15201522
if not supports_ipc_mempool(device):
15211523
pytest.skip("Driver rejects IPC-enabled mempool creation on this platform")
15221524

1523-
mr = PinnedMemoryResource(PinnedMemoryResourceOptions(ipc_enabled=True, numa_id=host_numa_id, max_size=POOL_SIZE))
1525+
mr = create_pinned_memory_resource_or_xfail(
1526+
PinnedMemoryResourceOptions(ipc_enabled=True, numa_id=host_numa_id, max_size=POOL_SIZE),
1527+
xfail_device=device,
1528+
)
15241529
assert mr.numa_id == host_numa_id
15251530
mr.close()
15261531

cuda_core/tests/test_program_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1773,7 +1773,7 @@ def test_filestream_cache_size_cap_counts_tmp_files(tmp_path):
17731773

17741774
def test_filestream_cache_handles_long_keys(tmp_path):
17751775
"""Arbitrary-length keys must not overflow per-component filename limits.
1776-
The filename is a fixed-length 256-bit blake2b digest; key uniqueness
1776+
The filename is a fixed-length 256-bit digest; key uniqueness
17771777
relies on the digest's collision resistance."""
17781778
from cuda.core.utils import FileStreamProgramCache
17791779

scripts/run_tests.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env bash
22

3-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
44
# SPDX-License-Identifier: Apache-2.0
55

66
set -euo pipefail
@@ -133,7 +133,7 @@ status_from_rc() {
133133
run_pytest() {
134134
# Run pytest safely under set -e and return its exit code
135135
set +e
136-
python -m pytest "${PYTEST_FLAGS[@]}" "$@"
136+
CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM=0 python -m pytest "${PYTEST_FLAGS[@]}" "$@"
137137
local rc=$?
138138
set -e
139139
return ${rc}

0 commit comments

Comments
 (0)