|
10 | 10 |
|
11 | 11 | CUDA_BINDINGS_NVML_IS_COMPATIBLE: bool |
12 | 12 |
|
| 13 | + |
| 14 | +cdef bint _detect_wsl(): |
| 15 | + try: |
| 16 | + with open("/proc/sys/kernel/osrelease") as f: |
| 17 | + data = f.read().lower() |
| 18 | + except OSError: |
| 19 | + return False |
| 20 | + return "microsoft" in data or "wsl" in data |
| 21 | + |
| 22 | + |
| 23 | +cdef bint _IS_WSL = _detect_wsl() |
| 24 | + |
| 25 | + |
| 26 | +# The WSL locale guard lives in a separate module that is only compiled on |
| 27 | +# Linux (build_hooks.py excludes it on Windows), because it relies on POSIX |
| 28 | +# per-thread locale APIs that MSVC does not provide. On non-Linux platforms |
| 29 | +# the import fails and we fall back to a no-op guard; _IS_WSL is then False |
| 30 | +# so the guard is never entered anyway. |
| 31 | +if _IS_WSL: |
| 32 | + from cuda.core._utils._wsl_locale import c_locale_guard |
| 33 | +else: |
| 34 | + c_locale_guard = None |
| 35 | + |
| 36 | + |
13 | 37 | try: |
14 | 38 | from cuda.bindings._version import __version_tuple__ as _BINDINGS_VERSION |
15 | 39 | except ImportError: |
@@ -127,8 +151,37 @@ def get_process_name(pid: int) -> str: |
127 | 151 | name: str |
128 | 152 | The process name. |
129 | 153 | """ |
| 154 | + def _get_process_name(pid) -> str: |
| 155 | + # NVML caches process names on a per-PID basis when queried via |
| 156 | + # nvmlSystemGetProcessName, and the cache is populated when enumerating |
| 157 | + # running processes on devices. To ensure the name is cached for the |
| 158 | + # requested PID, we walk all devices and query their running processes. |
| 159 | + for i in range(nvml.device_get_count_v2()): |
| 160 | + try: |
| 161 | + dev_h = nvml.device_get_handle_by_index_v2(i) |
| 162 | + nvml.device_get_compute_running_processes_v3(dev_h) |
| 163 | + except nvml.NvmlError: |
| 164 | + continue |
| 165 | + return nvml.system_get_process_name(pid) |
| 166 | + |
130 | 167 | initialize() |
131 | | - return nvml.system_get_process_name(pid) |
| 168 | + if not _IS_WSL: |
| 169 | + return _get_process_name(pid) |
| 170 | + |
| 171 | + # WSL workaround: nvmlSystemGetProcessName on WSL takes a wide-char |
| 172 | + # conversion path when the calling thread's locale is non-"C". That path |
| 173 | + # walks a UTF-16LE source buffer with a 4-byte stride (as if it were |
| 174 | + # UTF-32LE) and emits 5-byte UTF-8 sequences that look like garbage |
| 175 | + # preceding the trailing basename of /proc/<pid>/exe. CPython's startup |
| 176 | + # unconditionally calls setlocale(LC_ALL, ""), so essentially every |
| 177 | + # cuda.core caller hits this. The cached entry for the PID is set the |
| 178 | + # first time NVML resolves it (typically inside |
| 179 | + # nvmlDeviceGetComputeRunningProcesses_v3), so to recover a correct value |
| 180 | + # we re-prime the cache under the "C" locale before reading the name. |
| 181 | + # c_locale_guard uses POSIX per-thread locale APIs (see _wsl_locale.pyx) |
| 182 | + # so other threads' view of the locale is unaffected. |
| 183 | + with c_locale_guard(): # no-cython-lint |
| 184 | + return _get_process_name(pid) |
132 | 185 |
|
133 | 186 |
|
134 | 187 | __all__ = [ |
|
0 commit comments