|
| 1 | + include/mimalloc/prim.h | 18 +++++++++++++++ |
| 2 | + src/init.c | 61 ++++++++++++++++++++++++++++++++++++++++++++----- |
| 3 | + 2 files changed, 73 insertions(+), 6 deletions(-) |
| 4 | + |
| 5 | +diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h |
| 6 | +index 1638235b..5caab9fb 100644 |
| 7 | +--- a/include/mimalloc/prim.h |
| 8 | ++++ b/include/mimalloc/prim.h |
| 9 | +@@ -420,12 +420,30 @@ static inline mi_theap_t* _mi_theap_cached(void) { |
| 10 | + |
| 11 | + extern mi_decl_hidden size_t _mi_theap_default_slot; |
| 12 | + extern mi_decl_hidden size_t _mi_theap_cached_slot; |
| 13 | ++extern mi_decl_hidden bool _mi_theap_use_win_tls_api; |
| 14 | ++extern mi_decl_hidden bool _mi_theap_use_compiler_tls; |
| 15 | ++extern mi_decl_hidden size_t _mi_theap_default_tls_index; |
| 16 | ++extern mi_decl_hidden size_t _mi_theap_cached_tls_index; |
| 17 | ++extern mi_decl_hidden mi_decl_thread mi_theap_t* __mi_theap_default_fallback; |
| 18 | ++extern mi_decl_hidden mi_decl_thread mi_theap_t* __mi_theap_cached_fallback; |
| 19 | + |
| 20 | + static inline mi_theap_t* _mi_theap_default(void) { |
| 21 | ++ if mi_unlikely(_mi_theap_use_compiler_tls) { |
| 22 | ++ return __mi_theap_default_fallback; |
| 23 | ++ } |
| 24 | ++ if mi_unlikely(_mi_theap_use_win_tls_api && _mi_theap_default_tls_index != (size_t)TLS_OUT_OF_INDEXES) { |
| 25 | ++ return (mi_theap_t*)TlsGetValue((DWORD)_mi_theap_default_tls_index); |
| 26 | ++ } |
| 27 | + return (mi_theap_t*)mi_prim_tls_slot(_mi_theap_default_slot); // valid initial "last user slot" so it returns NULL at first leading to slot initialization |
| 28 | + } |
| 29 | + |
| 30 | + static inline mi_theap_t* _mi_theap_cached(void) { |
| 31 | ++ if mi_unlikely(_mi_theap_use_compiler_tls) { |
| 32 | ++ return __mi_theap_cached_fallback; |
| 33 | ++ } |
| 34 | ++ if mi_unlikely(_mi_theap_use_win_tls_api && _mi_theap_cached_tls_index != (size_t)TLS_OUT_OF_INDEXES) { |
| 35 | ++ return (mi_theap_t*)TlsGetValue((DWORD)_mi_theap_cached_tls_index); |
| 36 | ++ } |
| 37 | + return (mi_theap_t*)mi_prim_tls_slot(_mi_theap_cached_slot); |
| 38 | + } |
| 39 | + |
| 40 | +diff --git a/src/init.c b/src/init.c |
| 41 | +index c037c432..c3987b0d 100644 |
| 42 | +--- a/src/init.c |
| 43 | ++++ b/src/init.c |
| 44 | +@@ -696,13 +696,46 @@ mi_decl_cold mi_decl_noinline mi_theap_t* _mi_theap_empty_get(void) { |
| 45 | + mi_decl_hidden size_t _mi_theap_default_slot = MI_TLS_USER_LAST_SLOT; |
| 46 | + mi_decl_hidden size_t _mi_theap_cached_slot = MI_TLS_USER_LAST_SLOT; |
| 47 | + |
| 48 | ++// Fallback if we cannot allocate a TLS index in the first 63 TLS slots. |
| 49 | ++// In that case we use the Win32 TLS APIs (TlsGetValue/TlsSetValue) instead of directly |
| 50 | ++// accessing the TEB TLS user slots. |
| 51 | ++mi_decl_hidden bool _mi_theap_use_win_tls_api = false; |
| 52 | ++mi_decl_hidden size_t _mi_theap_default_tls_index = (size_t)TLS_OUT_OF_INDEXES; |
| 53 | ++mi_decl_hidden size_t _mi_theap_cached_tls_index = (size_t)TLS_OUT_OF_INDEXES; |
| 54 | ++ |
| 55 | ++// Last-resort fallback if the process has exhausted TLS indices (TlsAlloc returns TLS_OUT_OF_INDEXES). |
| 56 | ++// We then store the theap pointers in compiler thread-local storage instead. |
| 57 | ++mi_decl_hidden bool _mi_theap_use_compiler_tls = false; |
| 58 | ++mi_decl_hidden mi_decl_thread mi_theap_t* __mi_theap_default_fallback = (mi_theap_t*)&_mi_theap_empty; |
| 59 | ++mi_decl_hidden mi_decl_thread mi_theap_t* __mi_theap_cached_fallback = (mi_theap_t*)&_mi_theap_empty; |
| 60 | ++ |
| 61 | + mi_decl_cold mi_theap_t* _mi_tls_slots_init(void) { |
| 62 | + static mi_atomic_once_t tls_slots_init; |
| 63 | + if (mi_atomic_once(&tls_slots_init)) { |
| 64 | +- _mi_theap_default_slot = TlsAlloc() + MI_TLS_USER_BASE; |
| 65 | +- _mi_theap_cached_slot = TlsAlloc() + MI_TLS_USER_BASE; |
| 66 | +- if (_mi_theap_cached_slot >= MI_TLS_USER_LAST_SLOT) { |
| 67 | +- _mi_error_message(EFAULT, "unable to allocate fast TLS user slot (0x%zx)\n", _mi_theap_cached_slot); |
| 68 | ++ const DWORD default_index = TlsAlloc(); |
| 69 | ++ const DWORD cached_index = TlsAlloc(); |
| 70 | ++ |
| 71 | ++ // If we cannot allocate any TLS index at all, we cannot function. |
| 72 | ++ if (default_index == TLS_OUT_OF_INDEXES || cached_index == TLS_OUT_OF_INDEXES) { |
| 73 | ++ // The process has exhausted all TLS indices. Fall back to compiler TLS. |
| 74 | ++ _mi_theap_use_compiler_tls = true; |
| 75 | ++ _mi_theap_use_win_tls_api = false; |
| 76 | ++ // keep fast slots at sentinel values; do not write to TEB slots. |
| 77 | ++ } |
| 78 | ++ // We can only use the fast TEB TLS user slots if the TLS index fits in the first 63 slots. |
| 79 | ++ // Slot 63 is reserved as the initial sentinel value (so a first read returns NULL). |
| 80 | ++ else if (default_index < 63 && cached_index < 63) { |
| 81 | ++ _mi_theap_default_slot = (size_t)default_index + MI_TLS_USER_BASE; |
| 82 | ++ _mi_theap_cached_slot = (size_t)cached_index + MI_TLS_USER_BASE; |
| 83 | ++ _mi_theap_use_win_tls_api = false; |
| 84 | ++ _mi_theap_use_compiler_tls = false; |
| 85 | ++ } |
| 86 | ++ else { |
| 87 | ++ // Fall back to Win32 TLS APIs for correctness in TLS-slot-heavy processes. |
| 88 | ++ _mi_theap_default_tls_index = (size_t)default_index; |
| 89 | ++ _mi_theap_cached_tls_index = (size_t)cached_index; |
| 90 | ++ _mi_theap_use_win_tls_api = true; |
| 91 | ++ _mi_theap_use_compiler_tls = false; |
| 92 | + } |
| 93 | + } |
| 94 | + return (mi_theap_t*)&_mi_theap_empty; |
| 95 | +@@ -732,7 +765,15 @@ void _mi_theap_cached_set(mi_theap_t* theap) { |
| 96 | + mi_prim_tls_slot_set(MI_TLS_MODEL_FIXED_SLOT_CACHED, theap); |
| 97 | + #elif MI_TLS_MODEL_DYNAMIC_WIN32 |
| 98 | + _mi_tls_slots_init(); |
| 99 | +- mi_prim_tls_slot_set(_mi_theap_cached_slot, theap); |
| 100 | ++ if (mi_unlikely(_mi_theap_use_compiler_tls)) { |
| 101 | ++ __mi_theap_cached_fallback = theap; |
| 102 | ++ } |
| 103 | ++ else if (mi_unlikely(_mi_theap_use_win_tls_api)) { |
| 104 | ++ TlsSetValue((DWORD)_mi_theap_cached_tls_index, theap); |
| 105 | ++ } |
| 106 | ++ else { |
| 107 | ++ mi_prim_tls_slot_set(_mi_theap_cached_slot, theap); |
| 108 | ++ } |
| 109 | + #elif MI_TLS_MODEL_DYNAMIC_PTHREADS |
| 110 | + _mi_tls_keys_init(); |
| 111 | + if (_mi_theap_cached_key!=0) pthread_setspecific(_mi_theap_cached_key, theap); |
| 112 | +@@ -748,7 +789,15 @@ void _mi_theap_default_set(mi_theap_t* theap) { |
| 113 | + mi_prim_tls_slot_set(MI_TLS_MODEL_FIXED_SLOT_DEFAULT, theap); |
| 114 | + #elif MI_TLS_MODEL_DYNAMIC_WIN32 |
| 115 | + _mi_tls_slots_init(); |
| 116 | +- mi_prim_tls_slot_set(_mi_theap_default_slot, theap); |
| 117 | ++ if (mi_unlikely(_mi_theap_use_compiler_tls)) { |
| 118 | ++ __mi_theap_default_fallback = theap; |
| 119 | ++ } |
| 120 | ++ else if (mi_unlikely(_mi_theap_use_win_tls_api)) { |
| 121 | ++ TlsSetValue((DWORD)_mi_theap_default_tls_index, theap); |
| 122 | ++ } |
| 123 | ++ else { |
| 124 | ++ mi_prim_tls_slot_set(_mi_theap_default_slot, theap); |
| 125 | ++ } |
| 126 | + #elif MI_TLS_MODEL_DYNAMIC_PTHREADS |
| 127 | + _mi_tls_keys_init(); |
| 128 | + if (_mi_theap_default_key!=0) pthread_setspecific(_mi_theap_default_key, theap); |
0 commit comments