1111CUDA_BINDINGS_NVML_IS_COMPATIBLE: bool
1212
1313
14- # POSIX per-thread locale APIs. We use these (rather than setlocale(3))
15- # so the WSL workaround in get_process_name() doesn't perturb the locale
16- # observed by other threads. locale_t is an opaque pointer in glibc.
17- cdef extern from " locale.h" nogil:
18- ctypedef void * locale_t
19- int LC_ALL_MASK
20- locale_t LC_GLOBAL_LOCALE
21- locale_t newlocale(int category_mask, const char * locale, locale_t base)
22- locale_t uselocale(locale_t newloc)
23- void freelocale(locale_t locobj)
24-
25-
2614cdef bint _detect_wsl():
2715 try :
2816 with open (" /proc/sys/kernel/osrelease" ) as f:
@@ -34,6 +22,18 @@ cdef bint _detect_wsl():
3422
3523cdef bint _IS_WSL = _detect_wsl()
3624
25+
26+ # The WSL locale guard lives in a separate module that is only compiled on
27+ # Linux (build_hooks.py excludes it on Windows), because it relies on POSIX
28+ # per-thread locale APIs that MSVC does not provide. On non-Linux platforms
29+ # the import fails and we fall back to a no-op guard; _IS_WSL is then False
30+ # so the guard is never entered anyway.
31+ if _IS_WSL:
32+ from cuda.core._utils._wsl_locale import c_locale_guard
33+ else :
34+ c_locale_guard = None
35+
36+
3737try :
3838 from cuda.bindings._version import __version_tuple__ as _BINDINGS_VERSION
3939except ImportError :
@@ -161,33 +161,24 @@ def get_process_name(pid: int) -> str:
161161 nvml.device_get_compute_running_processes_v3(dev_h)
162162 return nvml.system_get_process_name(pid)
163163
164- cdef locale_t c_locale
165- cdef locale_t prev_locale
166-
167164 initialize()
168165 if not _IS_WSL:
169166 return _get_process_name(pid)
170167
171168 # WSL workaround: nvmlSystemGetProcessName on WSL takes a wide-char
172- # conversion path when the process locale is non-"C". That path walks
173- # a UTF-16LE source buffer with a 4-byte stride (as if it were UTF-32LE)
174- # and emits 5-byte UTF-8 sequences that look like garbage preceding the
175- # trailing basename of /proc/<pid>/exe. CPython's startup unconditionally
176- # calls setlocale(LC_ALL, ""), so essentially every cuda.core caller hits
177- # this. The cached entry for the PID is set the first time NVML resolves
178- # it (typically inside nvmlDeviceGetComputeRunningProcesses_v3), so to
179- # recover a correct value we re-prime the cache under the "C" locale
180- # before reading the name. We use the POSIX per-thread locale APIs so
181- # other threads' view of the locale is unaffected.
182- c_locale = newlocale(LC_ALL_MASK, b" C" , < locale_t> 0 )
183- if c_locale == < locale_t> 0 :
184- raise RuntimeError (" Failed to create C locale" )
185- prev_locale = uselocale(c_locale)
186- try :
169+ # conversion path when the calling thread's locale is non-"C". That path
170+ # walks a UTF-16LE source buffer with a 4-byte stride (as if it were
171+ # UTF-32LE) and emits 5-byte UTF-8 sequences that look like garbage
172+ # preceding the trailing basename of /proc/<pid>/exe. CPython's startup
173+ # unconditionally calls setlocale(LC_ALL, ""), so essentially every
174+ # cuda.core caller hits this. The cached entry for the PID is set the
175+ # first time NVML resolves it (typically inside
176+ # nvmlDeviceGetComputeRunningProcesses_v3), so to recover a correct value
177+ # we re-prime the cache under the "C" locale before reading the name.
178+ # c_locale_guard uses POSIX per-thread locale APIs (see _wsl_locale.pyx)
179+ # so other threads' view of the locale is unaffected.
180+ with c_locale_guard(): # no-cython-lint
187181 return _get_process_name(pid)
188- finally :
189- uselocale(prev_locale)
190- freelocale(c_locale)
191182
192183
193184__all__ = [
0 commit comments