Skip to content

Commit 2957595

Browse files
authored
nvbug-6193808: Work around mojibake in nvml.system_get_process_name on WSL (#2118)
* nvbug-6193808: Work around mojibake in nvml.system_get_process_name on WSL * Re-enable test * Move POSIX-only functionality to a separate module * Address comments in the PR
1 parent ae34e4c commit 2957595

7 files changed

Lines changed: 131 additions & 5 deletions

File tree

cuda_bindings/docs/source/release/13.2.0-notes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,4 @@ Known issues
8080
------------
8181

8282
* Updating from older versions (v12.6.2.post1 and below) via ``pip install -U cuda-python`` might not work. Please do a clean re-installation by uninstalling ``pip uninstall -y cuda-python`` followed by installing ``pip install cuda-python``.
83+
* ``nvml.system_get_process_name`` on WSL can return incorrect values. To work around this, set the locale to "C" before calling ``nvml.device_get_compute_running_processes_v3`` (which sets the process names) and before calling ``nvml.system_get_process_name``. ``cuda_core`` does this automatically, but users of the raw NVML API will need to do this manually.

cuda_bindings/docs/source/release/13.3.0-notes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,4 @@ Known issues
3939
------------
4040

4141
* Updating from older versions (v12.6.2.post1 and below) via ``pip install -U cuda-python`` might not work. Please do a clean re-installation by uninstalling ``pip uninstall -y cuda-python`` followed by installing ``pip install cuda-python``.
42+
* ``nvml.system_get_process_name`` on WSL can return incorrect values. To work around this, set the locale to "C" before calling ``nvml.device_get_compute_running_processes_v3`` (which sets the process names) and before calling ``nvml.system_get_process_name``. ``cuda_core`` does this automatically, but users of the raw NVML API will need to do this manually.

cuda_core/build_hooks.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,12 +143,21 @@ def _build_cuda_core(debug=False):
143143
# cuda-bindings not available in editable mode, will use installed version
144144
pass
145145

146+
_posix_only_modules = frozenset(
147+
{
148+
"_utils/_wsl_locale",
149+
}
150+
)
151+
146152
# It seems setuptools' wildcard support has problems for namespace packages,
147153
# so we explicitly spell out all Extension instances.
148154
def module_names():
149155
root_path = os.path.sep.join(["cuda", "core", ""])
150156
for filename in glob.glob(f"{root_path}/**/*.pyx", recursive=True):
151-
yield filename[len(root_path) : -4]
157+
mod = filename[len(root_path) : -4]
158+
if sys.platform == "win32" and mod.replace(os.path.sep, "/") in _posix_only_modules:
159+
continue
160+
yield mod
152161

153162
def get_sources(mod_name):
154163
"""Get source files for a module, including any .cpp files."""
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
6+
# WSL-specific locale guard, used by cuda.core.system.get_process_name() to
7+
# work around a bug in NVML's WSL implementation where nvmlSystemGetProcessName
8+
# returns mojibake when the calling thread is in a non-"C" locale. See
9+
# get_process_name() for the full backstory.
10+
#
11+
# This module is only compiled on Linux (build_hooks.py excludes it on Windows)
12+
# because it uses the POSIX per-thread locale APIs (newlocale/uselocale/
13+
# freelocale), which are not available on MSVC. Callers must guard imports of
14+
# this module with try/except ImportError.
15+
16+
17+
cdef extern from "locale.h" nogil:
18+
ctypedef void *locale_t
19+
int LC_ALL_MASK
20+
locale_t newlocale(int category_mask, const char *locale, locale_t base)
21+
locale_t uselocale(locale_t newloc)
22+
void freelocale(locale_t locobj)
23+
24+
25+
cdef class c_locale_guard:
26+
"""Context manager that pins the calling thread to the "C" locale.
27+
28+
Uses POSIX newlocale/uselocale/freelocale so other threads' view of the
29+
locale is unaffected. Restores the previous thread locale on exit.
30+
"""
31+
cdef locale_t _c_locale
32+
cdef locale_t _prev_locale
33+
cdef bint _active
34+
35+
def __cinit__(self):
36+
self._c_locale = <locale_t>0
37+
self._prev_locale = <locale_t>0
38+
self._active = False
39+
40+
def __enter__(self):
41+
self._c_locale = newlocale(LC_ALL_MASK, b"C", <locale_t>0)
42+
if self._c_locale == <locale_t>0:
43+
raise RuntimeError("Failed to create C locale")
44+
self._prev_locale = uselocale(self._c_locale)
45+
self._active = True
46+
return self
47+
48+
def __exit__(self, exc_type, exc_val, exc_tb):
49+
if self._active:
50+
uselocale(self._prev_locale)
51+
freelocale(self._c_locale)
52+
self._active = False
53+
return False

cuda_core/cuda/core/system/_system.pyx

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,30 @@
1010

1111
CUDA_BINDINGS_NVML_IS_COMPATIBLE: bool
1212

13+
14+
cdef bint _detect_wsl():
15+
try:
16+
with open("/proc/sys/kernel/osrelease") as f:
17+
data = f.read().lower()
18+
except OSError:
19+
return False
20+
return "microsoft" in data or "wsl" in data
21+
22+
23+
cdef bint _IS_WSL = _detect_wsl()
24+
25+
26+
# The WSL locale guard lives in a separate module that is only compiled on
27+
# Linux (build_hooks.py excludes it on Windows), because it relies on POSIX
28+
# per-thread locale APIs that MSVC does not provide. On non-Linux platforms
29+
# the import fails and we fall back to a no-op guard; _IS_WSL is then False
30+
# so the guard is never entered anyway.
31+
if _IS_WSL:
32+
from cuda.core._utils._wsl_locale import c_locale_guard
33+
else:
34+
c_locale_guard = None
35+
36+
1337
try:
1438
from cuda.bindings._version import __version_tuple__ as _BINDINGS_VERSION
1539
except ImportError:
@@ -127,8 +151,37 @@ def get_process_name(pid: int) -> str:
127151
name: str
128152
The process name.
129153
"""
154+
def _get_process_name(pid) -> str:
155+
# NVML caches process names on a per-PID basis when queried via
156+
# nvmlSystemGetProcessName, and the cache is populated when enumerating
157+
# running processes on devices. To ensure the name is cached for the
158+
# requested PID, we walk all devices and query their running processes.
159+
for i in range(nvml.device_get_count_v2()):
160+
try:
161+
dev_h = nvml.device_get_handle_by_index_v2(i)
162+
nvml.device_get_compute_running_processes_v3(dev_h)
163+
except nvml.NvmlError:
164+
continue
165+
return nvml.system_get_process_name(pid)
166+
130167
initialize()
131-
return nvml.system_get_process_name(pid)
168+
if not _IS_WSL:
169+
return _get_process_name(pid)
170+
171+
# WSL workaround: nvmlSystemGetProcessName on WSL takes a wide-char
172+
# conversion path when the calling thread's locale is non-"C". That path
173+
# walks a UTF-16LE source buffer with a 4-byte stride (as if it were
174+
# UTF-32LE) and emits 5-byte UTF-8 sequences that look like garbage
175+
# preceding the trailing basename of /proc/<pid>/exe. CPython's startup
176+
# unconditionally calls setlocale(LC_ALL, ""), so essentially every
177+
# cuda.core caller hits this. The cached entry for the PID is set the
178+
# first time NVML resolves it (typically inside
179+
# nvmlDeviceGetComputeRunningProcesses_v3), so to recover a correct value
180+
# we re-prime the cache under the "C" locale before reading the name.
181+
# c_locale_guard uses POSIX per-thread locale APIs (see _wsl_locale.pyx)
182+
# so other threads' view of the locale is unaffected.
183+
with c_locale_guard(): # no-cython-lint
184+
return _get_process_name(pid)
132185

133186

134187
__all__ = [

cuda_core/docs/source/release/1.1.0-notes.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,12 @@ New features
4343
:attr:`~ManagedBuffer.preferred_location`,
4444
:attr:`~ManagedBuffer.accessed_by`). Locations are expressed via
4545
:class:`Device` or :class:`Host`.
46+
47+
Bug fixes
48+
---------
49+
50+
- On WSL, ``cuda.core.system.get_process_name`` would raise a
51+
``UnicodeDecodeError``. It should now return the correct result.
52+
- Calling ``cuda.core.system.get_process_name`` before querying any device's
53+
``compute_running_processes`` would raise a ``NvmlNotFoundError``. Now it will
54+
correctly return the process name, if it is a GPU-using process.

cuda_core/tests/system/test_system_system.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
except ImportError:
1313
from cuda import cuda as driver
1414

15-
import helpers
16-
1715
from cuda.core import system
1816
from cuda.core._utils.cuda_utils import handle_return
1917

@@ -62,9 +60,11 @@ def test_nvml_version():
6260
assert 0 <= ver_patch[0] <= 99
6361

6462

65-
@pytest.mark.skipif(helpers.IS_WSL, reason="Process names may not be available on WSL")
6663
@skip_if_nvml_unsupported
6764
def test_get_process_name():
65+
for device in system.Device.get_all_devices():
66+
x = device.compute_running_processes
67+
6868
try:
6969
process_name = system.get_process_name(os.getpid())
7070
except system.NotFoundError:

0 commit comments

Comments
 (0)