Skip to content

Commit 801737f

Browse files
committed
Store full_page_bytes in mi_heap_t.
Should should avoid memory contention. Avoid casting *intptr_t to *Py_ssize_t. Include large and huge pages in count (promote eagerly to MI_BIN_FULL). Add comment noting about abandoned pages potentially being lost (their byte count never being subtracted).
1 parent 14b9696 commit 801737f

11 files changed

Lines changed: 276 additions & 83 deletions

File tree

Include/internal/mimalloc/mimalloc/types.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -517,9 +517,11 @@ typedef struct mi_abandoned_pool_s {
517517
// still be read.
518518
mi_decl_cache_align _Atomic(size_t) abandoned_readers; // = 0
519519

520-
// Total bytes (block_size * capacity) of pages currently in MI_BIN_FULL
521-
// state whose pool association is this pool.
520+
#if MI_FULL_PAGE_BYTES
521+
// Bytes (block_size * capacity) of full pages currently abandoned to this
522+
// pool.
522523
mi_decl_cache_align _Atomic(intptr_t) full_page_bytes; // = 0
524+
#endif
523525
} mi_abandoned_pool_t;
524526

525527

@@ -592,6 +594,11 @@ struct mi_heap_s {
592594
uint8_t tag; // custom identifier for this heap
593595
uint8_t debug_offset; // number of bytes to preserve when filling freed or uninitialized memory
594596
bool page_use_qsbr; // should freeing pages be delayed using QSBR
597+
#if MI_FULL_PAGE_BYTES
598+
// Bytes (block_size * capacity) of pages currently in MI_BIN_FULL state
599+
// owned by this heap.
600+
_Atomic(intptr_t) full_page_bytes;
601+
#endif
595602
};
596603

597604

Include/internal/pycore_gc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,8 @@ extern int _PyGC_VisitStackRef(union _PyStackRef *ref, visitproc visit, void *ar
337337
#ifdef Py_GIL_DISABLED
338338
extern void _PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp,
339339
gcvisitobjects_t callback, void *arg);
340+
// Estimate of bytes allocated by mimalloc.
341+
PyAPI_FUNC(Py_ssize_t) _PyGC_GetHeapBytes(PyInterpreterState *interp);
340342
#endif
341343

342344
#ifdef __cplusplus

Include/internal/pycore_mimalloc.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ typedef enum {
3636
# define MI_TSAN 1
3737
#endif
3838

39+
#ifdef Py_GIL_DISABLED
40+
// Track full-page byte totals on each mi_heap_t and mi_abandoned_pool_t.
41+
# define MI_FULL_PAGE_BYTES 1
42+
#endif
43+
3944
#ifdef __cplusplus
4045
extern "C++" {
4146
#endif

Lib/test/test_gc.py

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,8 +1271,58 @@ def test():
12711271
assert_python_ok("-c", code_inside_function)
12721272

12731273

1274-
@unittest.skipUnless(Py_GIL_DISABLED, "requires free-threaded GC")
1275-
@unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
1274+
1275+
@unittest.skipUnless(Py_GIL_DISABLED, "requires free-threaded GC")
1276+
@unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
1277+
class FreeThreadingTests(unittest.TestCase):
1278+
# Tests that are specific to the free-threading GC.
1279+
1280+
def test_gc_heap_bytes_large_allocs(self):
1281+
# The free-threaded GC threshold uses _PyGC_GetHeapBytes(), which
1282+
# sums mimalloc's full_page_bytes counters. Large/huge pages
1283+
# (>MI_MEDIUM_OBJ_SIZE_MAX, MI_BIN_HUGE) get eagerly promoted to
1284+
# MI_BIN_FULL by `_mi_malloc_generic` -- without that, mimalloc
1285+
# would never count these pages, and a cycle holding a large
1286+
# buffer would not register as memory pressure.
1287+
gc.collect()
1288+
baseline = _testinternalcapi.get_gc_heap_bytes()
1289+
size = 1 << 20 # 1 MiB
1290+
k = 5
1291+
data = [bytearray(size) for _ in range(k)]
1292+
after_alloc = _testinternalcapi.get_gc_heap_bytes()
1293+
# All k pages should be counted. Page size rounds up the request,
1294+
# so the increase should be at least k * size.
1295+
self.assertGreaterEqual(after_alloc - baseline, k * size)
1296+
del data
1297+
gc.collect()
1298+
after_free = _testinternalcapi.get_gc_heap_bytes()
1299+
# Freeing the lone block in each huge page un-fulls it. Allow some
1300+
# slop for unrelated allocations triggered by gc.collect().
1301+
self.assertLess(abs(after_free - baseline), size)
1302+
1303+
def test_gc_heap_bytes_many_small_allocs(self):
1304+
# Filling small pages should also bump the counter. Small/medium
1305+
# transitions are lazy (only when a page actually becomes full), so
1306+
# use enough allocations to fill many pages.
1307+
gc.collect()
1308+
baseline = _testinternalcapi.get_gc_heap_bytes()
1309+
n = 100_000
1310+
objs = [bytes(4) for i in range(n)]
1311+
after_alloc = _testinternalcapi.get_gc_heap_bytes()
1312+
print('small after alloc', baseline, after_alloc)
1313+
self.assertGreater(after_alloc - baseline, 1 << 20)
1314+
del objs
1315+
gc.collect()
1316+
after_free = _testinternalcapi.get_gc_heap_bytes()
1317+
print('small after free', baseline, after_free)
1318+
# Should drop substantially once the pages empty out.
1319+
self.assertLess(after_free - baseline, (after_alloc - baseline) // 2)
1320+
1321+
def test_gc_heap_bytes_nonneg(self):
1322+
# Counter is intptr_t and only increases or decreases via paired
1323+
# hooks; it must never go negative.
1324+
self.assertGreaterEqual(_testinternalcapi.get_gc_heap_bytes(), 0)
1325+
12761326
def test_tuple_untrack_counts(self):
12771327
# This ensures that the free-threaded GC is counting untracked tuples
12781328
# in the "long_lived_total" count. This is required to avoid

Modules/_testinternalcapi.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2636,6 +2636,12 @@ get_long_lived_total(PyObject *self, PyObject *Py_UNUSED(ignored))
26362636
return PyLong_FromInt64(PyInterpreterState_Get()->gc.long_lived_total);
26372637
}
26382638

2639+
static PyObject *
2640+
get_gc_heap_bytes(PyObject *self, PyObject *Py_UNUSED(ignored))
2641+
{
2642+
return PyLong_FromSsize_t(_PyGC_GetHeapBytes(PyInterpreterState_Get()));
2643+
}
2644+
26392645
#endif
26402646

26412647
static PyObject *
@@ -3001,6 +3007,7 @@ static PyMethodDef module_functions[] = {
30013007
{"get_tlbc", get_tlbc, METH_O, NULL},
30023008
{"get_tlbc_id", get_tlbc_id, METH_O, NULL},
30033009
{"get_long_lived_total", get_long_lived_total, METH_NOARGS},
3010+
{"get_gc_heap_bytes", get_gc_heap_bytes, METH_NOARGS},
30043011
#endif
30053012
#ifdef _Py_TIER2
30063013
{"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS},

Objects/mimalloc/heap.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,11 @@ static void mi_heap_reset_pages(mi_heap_t* heap) {
270270
_mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
271271
heap->thread_delayed_free = NULL;
272272
heap->page_count = 0;
273+
#if MI_FULL_PAGE_BYTES
274+
// All pages have been removed (destroyed, or transferred via
275+
// mi_heap_absorb which already moved the bytes to the destination heap).
276+
mi_atomic_store_relaxed(&heap->full_page_bytes, (intptr_t)0);
277+
#endif
273278
}
274279

275280
// called from `mi_heap_destroy` and `mi_heap_delete` to free the internal heap resources.
@@ -427,6 +432,14 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
427432
}
428433
mi_assert_internal(from->page_count == 0);
429434

435+
#if MI_FULL_PAGE_BYTES
436+
// The page-state hooks didn't fire for these transfers, so move the
437+
// full_page_bytes accounting in bulk. mi_heap_reset_pages(from) below
438+
// will zero `from->full_page_bytes`.
439+
intptr_t bytes = mi_atomic_load_relaxed(&from->full_page_bytes);
440+
mi_atomic_addi(&heap->full_page_bytes, bytes);
441+
#endif
442+
430443
// and do outstanding delayed frees in the `from` heap
431444
// note: be careful here as the `heap` field in all those pages no longer point to `from`,
432445
// turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a

Objects/mimalloc/init.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,10 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
104104
false,
105105
0,
106106
0,
107-
0
107+
0,
108+
#if MI_FULL_PAGE_BYTES
109+
MI_ATOMIC_VAR_INIT(0), // full_page_bytes
110+
#endif
108111
};
109112

110113
#define tld_empty_stats ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats)))

Objects/mimalloc/page-queue.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t*
151151
uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size));
152152
mi_assert_internal(bin <= MI_BIN_FULL);
153153
mi_page_queue_t* pq = &heap->pages[bin];
154-
mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size);
154+
mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size);
155155
return pq;
156156
}
157157

@@ -264,7 +264,9 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
264264
(page->xblock_size == to->block_size && mi_page_queue_is_full(from)) ||
265265
(page->xblock_size == from->block_size && mi_page_queue_is_full(to)) ||
266266
(page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) ||
267-
(page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to)));
267+
(page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to)) ||
268+
(mi_page_queue_is_huge(from) && mi_page_queue_is_full(to)) ||
269+
(mi_page_queue_is_full(from) && mi_page_queue_is_huge(to)));
268270

269271
mi_heap_t* heap = mi_page_heap(page);
270272
if (page->prev != NULL) page->prev->next = page->next;

Objects/mimalloc/page.c

Lines changed: 135 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,78 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
255255
mi_assert_internal(!force || page->local_free == NULL);
256256
}
257257

258+
/* -----------------------------------------------------------
259+
Full-page byte accounting (MI_FULL_PAGE_BYTES)
260+
261+
Maintain `mi_heap_t.full_page_bytes` (bytes of MI_BIN_FULL pages owned by
262+
the heap) and `mi_abandoned_pool_t.full_page_bytes` (bytes of MI_BIN_FULL
263+
pages currently abandoned to that pool). Page weight is
264+
`mi_page_block_size(page) * page->capacity`. Capacity is stable while a
265+
page is in the full queue (`mi_page_extend_free` only runs on non-full
266+
queues), so inc and dec see the same value.
267+
268+
State machine:
269+
to-full : heap += size
270+
from-full : heap -= size
271+
abandon a full : heap -= size; pool += size
272+
reclaim a full : pool -= size; heap += size
273+
free a full : heap -= size
274+
275+
The in_full bit is unconditionally cleared by `mi_page_queue_remove`, so
276+
`_mi_page_abandon` re-sets it after queue_remove to preserve the "this
277+
page's bytes were transferred to the pool" marker through abandonment.
278+
`_mi_page_reclaim` then routes such pages straight to MI_BIN_FULL, so
279+
`mi_page_queue_push` keeps the bit set; subsequent unfull/free fires the
280+
matching dec.
281+
282+
Large/huge pages (block_size > MI_MEDIUM_OBJ_SIZE_MAX) are 1-block pages
283+
in MI_BIN_HUGE; mimalloc never walks that queue on a subsequent alloc, so
284+
it would never call `mi_page_to_full` on them. `_mi_malloc_generic`
285+
therefore eagerly calls `mi_page_to_full` on a freshly-filled huge page
286+
(see the MI_FULL_PAGE_BYTES block at the bottom of that function).
287+
Inc/dec then proceed identically to small/medium pages.
288+
289+
Known minor leak: if a page abandoned-while-full later becomes empty and
290+
then freed, the +size we added on abandon is never subtracted.
291+
----------------------------------------------------------- */
292+
293+
#if MI_FULL_PAGE_BYTES
294+
static inline intptr_t mi_page_full_size(mi_page_t* page) {
295+
return (intptr_t)(mi_page_block_size(page) * (size_t)page->capacity);
296+
}
297+
298+
static void mi_page_full_inc(mi_page_t* page) {
299+
mi_atomic_addi(&mi_page_heap(page)->full_page_bytes, mi_page_full_size(page));
300+
}
301+
302+
static void mi_page_full_dec(mi_page_t* page) {
303+
mi_atomic_addi(&mi_page_heap(page)->full_page_bytes, -mi_page_full_size(page));
304+
}
305+
306+
// Called from `_mi_page_abandon` *before* the page's heap pointer is cleared.
307+
// Transfers the page's bytes from its heap to the pool that will own the
308+
// abandoned page. No-op if the page is not currently in MI_BIN_FULL.
309+
static void mi_page_full_abandon(mi_page_t* page) {
310+
if (!mi_page_is_in_full(page)) return;
311+
intptr_t bytes = mi_page_full_size(page);
312+
mi_heap_t* heap = mi_page_heap(page);
313+
mi_atomic_addi(&heap->full_page_bytes, -bytes);
314+
mi_atomic_addi(&heap->tld->segments.abandoned->full_page_bytes, bytes);
315+
}
316+
317+
// Called from `_mi_page_reclaim` when a page abandoned-while-full is
318+
// returning to a heap. in_full=true here means "this page's bytes are
319+
// currently in the pool counter from abandon". Transfer them: pool -= size,
320+
// new-heap += size. The caller routes the page directly into MI_BIN_FULL,
321+
// so the in_full bit (and matching dec hook on free/unfull) survives.
322+
static void mi_page_full_reclaim(mi_page_t* page) {
323+
if (!mi_page_is_in_full(page)) return;
324+
intptr_t bytes = mi_page_full_size(page);
325+
mi_heap_t* heap = mi_page_heap(page);
326+
mi_atomic_addi(&heap->tld->segments.abandoned->full_page_bytes, -bytes);
327+
mi_atomic_addi(&heap->full_page_bytes, bytes);
328+
}
329+
#endif // MI_FULL_PAGE_BYTES
258330

259331

260332
/* -----------------------------------------------------------
@@ -271,8 +343,24 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
271343
mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
272344
#endif
273345

274-
// TODO: push on full queue immediately if it is full?
275-
mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
346+
mi_page_queue_t* pq;
347+
#if MI_FULL_PAGE_BYTES
348+
// If the page was abandoned full (in_full preserved as marker), route
349+
// it directly to MI_BIN_FULL. Pushing to the size-bucket queue would
350+
// rely on a later alloc walking that queue to promote it via
351+
// mi_page_to_full -- which happens for small/medium bins but never for
352+
// MI_BIN_HUGE, so a reclaimed full huge page would otherwise leave the
353+
// pool counter without re-crediting any heap. mi_page_full_reclaim
354+
// does the pool-to-heap transfer.
355+
if (mi_page_is_in_full(page)) {
356+
pq = &heap->pages[MI_BIN_FULL];
357+
} else {
358+
pq = mi_page_queue(heap, mi_page_block_size(page));
359+
}
360+
mi_page_full_reclaim(page);
361+
#else
362+
pq = mi_page_queue(heap, mi_page_block_size(page));
363+
#endif
276364
mi_page_queue_push(heap, pq, page);
277365
_PyMem_mi_page_reclaimed(page);
278366
mi_assert_expensive(_mi_page_is_valid(page));
@@ -360,8 +448,8 @@ void _mi_page_unfull(mi_page_t* page) {
360448
mi_assert_internal(mi_page_is_in_full(page));
361449
if (!mi_page_is_in_full(page)) return;
362450

363-
#ifdef Py_GIL_DISABLED
364-
_PyMem_mi_page_full_dec(page);
451+
#if MI_FULL_PAGE_BYTES
452+
mi_page_full_dec(page);
365453
#endif
366454

367455
mi_heap_t* heap = mi_page_heap(page);
@@ -378,8 +466,8 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
378466
mi_assert_internal(!mi_page_is_in_full(page));
379467

380468
if (mi_page_is_in_full(page)) return;
381-
#ifdef Py_GIL_DISABLED
382-
_PyMem_mi_page_full_inc(page);
469+
#if MI_FULL_PAGE_BYTES
470+
mi_page_full_inc(page);
383471
#endif
384472
mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page);
385473
_mi_page_free_collect(page,false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
@@ -398,6 +486,13 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
398486

399487
mi_heap_t* pheap = mi_page_heap(page);
400488

489+
#if MI_FULL_PAGE_BYTES
490+
// Capture in_full while the heap pointer is still valid; transfer the
491+
// bytes from heap counter to pool counter. Must run before
492+
// mi_page_queue_remove, which clears the in_full bit unconditionally.
493+
bool was_in_full = mi_page_is_in_full(page);
494+
mi_page_full_abandon(page);
495+
#endif
401496
#ifdef Py_GIL_DISABLED
402497
if (page->qsbr_node.next != NULL) {
403498
// remove from QSBR queue, but keep the goal
@@ -413,6 +508,15 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
413508
mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
414509
mi_page_set_heap(page, NULL);
415510

511+
#if MI_FULL_PAGE_BYTES
512+
// Preserve the in_full marker through abandonment so `_mi_page_reclaim`'s
513+
// `mi_page_full_reclaim` call can transfer the bytes back to the
514+
// reclaiming heap. Nothing reads in_full on a heap-less page.
515+
if (was_in_full) {
516+
mi_page_set_in_full(page, true);
517+
}
518+
#endif
519+
416520
#if (MI_DEBUG>1) && !MI_TRACK_ENABLED
417521
// check there are no references left..
418522
for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->keys)) {
@@ -442,12 +546,16 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
442546
#ifdef Py_GIL_DISABLED
443547
mi_assert_internal(page->qsbr_goal == 0);
444548
mi_assert_internal(page->qsbr_node.next == NULL);
445-
// Defensive: a full page whose last block is freed locally goes through
549+
#endif
550+
#if MI_FULL_PAGE_BYTES
551+
// A full page whose last block is freed locally goes through
446552
// _mi_page_retire -> _PyMem_mi_page_maybe_free -> _mi_page_free without
447-
// ever calling _mi_page_unfull, so the per-thread full-page counter must
448-
// be decremented here to maintain the invariant.
553+
// ever calling _mi_page_unfull, so the heap's full_page_bytes counter
554+
// must be decremented here to maintain the invariant. `heap` is non-NULL
555+
// for any page reaching _mi_page_free (abandoned pages take the
556+
// segment-level cleanup path instead).
449557
if (mi_page_is_in_full(page)) {
450-
_PyMem_mi_page_full_dec(page);
558+
mi_page_full_dec(page);
451559
}
452560
#endif
453561

@@ -977,14 +1085,28 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
9771085
mi_assert_internal(mi_page_block_size(page) >= size);
9781086

9791087
// and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
1088+
void* p;
9801089
if mi_unlikely(zero && page->xblock_size == 0) {
9811090
// note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case.
982-
void* p = _mi_page_malloc(heap, page, size, false);
1091+
p = _mi_page_malloc(heap, page, size, false);
9831092
mi_assert_internal(p != NULL);
9841093
_mi_memzero_aligned(p, mi_page_usable_block_size(page));
985-
return p;
9861094
}
9871095
else {
988-
return _mi_page_malloc(heap, page, size, zero);
1096+
p = _mi_page_malloc(heap, page, size, zero);
1097+
}
1098+
1099+
#if MI_FULL_PAGE_BYTES
1100+
// Eagerly promote a freshly-filled huge page (1 block per page, in
1101+
// MI_BIN_HUGE) to MI_BIN_FULL so its bytes get counted. See the
1102+
// "Full-page byte accounting" comment block above.
1103+
if (p != NULL && !mi_page_immediate_available(page)) {
1104+
mi_page_queue_t* page_pq = mi_page_queue_of(page);
1105+
if (mi_page_queue_is_huge(page_pq) && !mi_page_is_in_full(page)) {
1106+
mi_page_to_full(page, page_pq);
1107+
}
9891108
}
1109+
#endif
1110+
1111+
return p;
9901112
}

0 commit comments

Comments
 (0)