Skip to content

Commit 22fd00c

Browse files
committed
Move POSIX-only functionality to a separate module
1 parent bfb518e commit 22fd00c

3 files changed

Lines changed: 87 additions & 34 deletions

File tree

cuda_core/build_hooks.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,12 +143,21 @@ def _build_cuda_core(debug=False):
143143
# cuda-bindings not available in editable mode, will use installed version
144144
pass
145145

146+
_posix_only_modules = frozenset(
147+
{
148+
"_utils/_wsl_locale",
149+
}
150+
)
151+
146152
# It seems setuptools' wildcard support has problems for namespace packages,
147153
# so we explicitly spell out all Extension instances.
148154
def module_names():
149155
root_path = os.path.sep.join(["cuda", "core", ""])
150156
for filename in glob.glob(f"{root_path}/**/*.pyx", recursive=True):
151-
yield filename[len(root_path) : -4]
157+
mod = filename[len(root_path) : -4]
158+
if sys.platform == "win32" and mod.replace(os.path.sep, "/") in _posix_only_modules:
159+
continue
160+
yield mod
152161

153162
def get_sources(mod_name):
154163
"""Get source files for a module, including any .cpp files."""
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
6+
# WSL-specific locale guard, used by cuda.core.system.get_process_name() to
7+
# work around a bug in NVML's WSL implementation where nvmlSystemGetProcessName
8+
# returns mojibake when the calling thread is in a non-"C" locale. See
9+
# get_process_name() for the full backstory.
10+
#
11+
# This module is only compiled on Linux (build_hooks.py excludes it on Windows)
12+
# because it uses the POSIX per-thread locale APIs (newlocale/uselocale/
13+
# freelocale), which are not available on MSVC. Callers must guard imports of
14+
# this module with try/except ImportError.
15+
16+
17+
cdef extern from "locale.h" nogil:
18+
ctypedef void *locale_t
19+
int LC_ALL_MASK
20+
locale_t newlocale(int category_mask, const char *locale, locale_t base)
21+
locale_t uselocale(locale_t newloc)
22+
void freelocale(locale_t locobj)
23+
24+
25+
cdef class c_locale_guard:
26+
"""Context manager that pins the calling thread to the "C" locale.
27+
28+
Uses POSIX newlocale/uselocale/freelocale so other threads' view of the
29+
locale is unaffected. Restores the previous thread locale on exit.
30+
"""
31+
cdef locale_t _c_locale
32+
cdef locale_t _prev_locale
33+
cdef bint _active
34+
35+
def __cinit__(self):
36+
self._c_locale = <locale_t>0
37+
self._prev_locale = <locale_t>0
38+
self._active = False
39+
40+
def __enter__(self):
41+
self._c_locale = newlocale(LC_ALL_MASK, b"C", <locale_t>0)
42+
if self._c_locale == <locale_t>0:
43+
raise RuntimeError("Failed to create C locale")
44+
self._prev_locale = uselocale(self._c_locale)
45+
self._active = True
46+
return self
47+
48+
def __exit__(self, exc_type, exc_val, exc_tb):
49+
if self._active:
50+
uselocale(self._prev_locale)
51+
freelocale(self._c_locale)
52+
self._active = False
53+
return False

cuda_core/cuda/core/system/_system.pyx

Lines changed: 24 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,6 @@
1111
CUDA_BINDINGS_NVML_IS_COMPATIBLE: bool
1212

1313

14-
# POSIX per-thread locale APIs. We use these (rather than setlocale(3))
15-
# so the WSL workaround in get_process_name() doesn't perturb the locale
16-
# observed by other threads. locale_t is an opaque pointer in glibc.
17-
cdef extern from "locale.h" nogil:
18-
ctypedef void *locale_t
19-
int LC_ALL_MASK
20-
locale_t LC_GLOBAL_LOCALE
21-
locale_t newlocale(int category_mask, const char *locale, locale_t base)
22-
locale_t uselocale(locale_t newloc)
23-
void freelocale(locale_t locobj)
24-
25-
2614
cdef bint _detect_wsl():
2715
try:
2816
with open("/proc/sys/kernel/osrelease") as f:
@@ -34,6 +22,18 @@ cdef bint _detect_wsl():
3422

3523
cdef bint _IS_WSL = _detect_wsl()
3624

25+
26+
# The WSL locale guard lives in a separate module that is only compiled on
27+
# Linux (build_hooks.py excludes it on Windows), because it relies on POSIX
28+
# per-thread locale APIs that MSVC does not provide. On non-Linux platforms
29+
# the import fails and we fall back to a no-op guard; _IS_WSL is then False
30+
# so the guard is never entered anyway.
31+
if _IS_WSL:
32+
from cuda.core._utils._wsl_locale import c_locale_guard
33+
else:
34+
c_locale_guard = None
35+
36+
3737
try:
3838
from cuda.bindings._version import __version_tuple__ as _BINDINGS_VERSION
3939
except ImportError:
@@ -161,33 +161,24 @@ def get_process_name(pid: int) -> str:
161161
nvml.device_get_compute_running_processes_v3(dev_h)
162162
return nvml.system_get_process_name(pid)
163163

164-
cdef locale_t c_locale
165-
cdef locale_t prev_locale
166-
167164
initialize()
168165
if not _IS_WSL:
169166
return _get_process_name(pid)
170167

171168
# WSL workaround: nvmlSystemGetProcessName on WSL takes a wide-char
172-
# conversion path when the process locale is non-"C". That path walks
173-
# a UTF-16LE source buffer with a 4-byte stride (as if it were UTF-32LE)
174-
# and emits 5-byte UTF-8 sequences that look like garbage preceding the
175-
# trailing basename of /proc/<pid>/exe. CPython's startup unconditionally
176-
# calls setlocale(LC_ALL, ""), so essentially every cuda.core caller hits
177-
# this. The cached entry for the PID is set the first time NVML resolves
178-
# it (typically inside nvmlDeviceGetComputeRunningProcesses_v3), so to
179-
# recover a correct value we re-prime the cache under the "C" locale
180-
# before reading the name. We use the POSIX per-thread locale APIs so
181-
# other threads' view of the locale is unaffected.
182-
c_locale = newlocale(LC_ALL_MASK, b"C", <locale_t>0)
183-
if c_locale == <locale_t>0:
184-
raise RuntimeError("Failed to create C locale")
185-
prev_locale = uselocale(c_locale)
186-
try:
169+
# conversion path when the calling thread's locale is non-"C". That path
170+
# walks a UTF-16LE source buffer with a 4-byte stride (as if it were
171+
# UTF-32LE) and emits 5-byte UTF-8 sequences that look like garbage
172+
# preceding the trailing basename of /proc/<pid>/exe. CPython's startup
173+
# unconditionally calls setlocale(LC_ALL, ""), so essentially every
174+
# cuda.core caller hits this. The cached entry for the PID is set the
175+
# first time NVML resolves it (typically inside
176+
# nvmlDeviceGetComputeRunningProcesses_v3), so to recover a correct value
177+
# we re-prime the cache under the "C" locale before reading the name.
178+
# c_locale_guard uses POSIX per-thread locale APIs (see _wsl_locale.pyx)
179+
# so other threads' view of the locale is unaffected.
180+
with c_locale_guard(): # no-cython-lint
187181
return _get_process_name(pid)
188-
finally:
189-
uselocale(prev_locale)
190-
freelocale(c_locale)
191182

192183

193184
__all__ = [

0 commit comments

Comments
 (0)