From e915425cfc9d1177654fbc3143dce8e7f7c7ebbf Mon Sep 17 00:00:00 2001 From: edalzell Date: Fri, 12 Jun 2026 14:03:23 -0700 Subject: [PATCH 1/5] Single-pass Stache warm() to reduce peak memory Previously warm() cached all M items simultaneously in $this->fileItems while iterating N indexes, holding the full collection in memory throughout. For large collections with rich content (e.g. Bard fields) this could exhaust available memory. The fix inverts the loop: a single pass loads one item at a time, feeds all Value-based indexes simultaneously, then lets the item be GC'd before the next one loads. Peak memory is now proportional to one item plus N small [key => value] accumulator arrays rather than all M items at once. Indexes that don't implement getItemValue() (e.g. Terms/Associations) are unaffected and still update via their own data sources. Adds Index::setItems(array): static to support writing accumulated data back to an index without going through the per-item update path. --- src/Stache/Indexes/Index.php | 7 ++ src/Stache/Stores/Store.php | 26 +++++- tests/Stache/Stores/StoreWarmTest.php | 118 ++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 4 deletions(-) create mode 100644 tests/Stache/Stores/StoreWarmTest.php diff --git a/src/Stache/Indexes/Index.php b/src/Stache/Indexes/Index.php index 61182a2e005..00126931048 100644 --- a/src/Stache/Indexes/Index.php +++ b/src/Stache/Indexes/Index.php @@ -59,6 +59,13 @@ public function push($value) $this->items[] = $value; } + public function setItems(array $items): static + { + $this->items = $items; + + return $this; + } + public function load() { if ($this->loaded) { diff --git a/src/Stache/Stores/Store.php b/src/Stache/Stores/Store.php index c220b679e1b..5dfc0bfa97a 100644 --- a/src/Stache/Stores/Store.php +++ b/src/Stache/Stores/Store.php @@ -406,12 +406,30 @@ public function clear() public function warm() { - $this->shouldCacheFileItems = true; + $indexes = $this->resolveIndexes(); - $this->resolveIndexes()->each->update(); + // Partition: indexes implementing getItemValue() can be built in a single pass. + // Others (e.g. Terms/Associations) query their own data sources independently. + [$valueIndexes, $otherIndexes] = $indexes->partition( + fn ($index) => method_exists($index, 'getItemValue') + ); - $this->shouldCacheFileItems = false; - $this->fileItems = null; + // Single pass: hold one item in memory at a time while feeding all value indexes. + $accumulated = $valueIndexes->map(fn () => [])->all(); + + foreach ($this->paths()->keys() as $key) { + $item = $this->getItem($key); + + foreach ($valueIndexes as $name => $index) { + $accumulated[$name][$key] = $index->getItemValue($item); + } + } + + $valueIndexes->each(function ($index, $name) use ($accumulated) { + $index->setItems($accumulated[$name])->cache(); + }); + + $otherIndexes->each->update(); } public function keys() diff --git a/tests/Stache/Stores/StoreWarmTest.php b/tests/Stache/Stores/StoreWarmTest.php new file mode 100644 index 00000000000..67aaa44cd91 --- /dev/null +++ b/tests/Stache/Stores/StoreWarmTest.php @@ -0,0 +1,118 @@ +withItems([ + 'key-a' => 'Alpha', + 'key-b' => 'Beta', + ]); + + Stache::registerStore($store); + + $store->warm(); + + $this->assertEquals( + ['key-a' => 'Alpha', 'key-b' => 'Beta'], + $store->index('name')->items()->all() + ); + } + + #[Test] + public function it_handles_an_empty_store(): void + { + $store = (new WarmableTestStore)->withItems([]); + + Stache::registerStore($store); + + $store->warm(); + + $this->assertEquals([], $store->index('name')->items()->all()); + } + + #[Test] + public function it_updates_non_value_indexes_via_their_own_update_method(): void + { + $store = (new WarmableTestStore)->withItems([]); + + Stache::registerStore($store); + + $store->warm(); + + $this->assertEquals( + ['static-key' => 'static-value'], + $store->index('static')->items()->all() + ); + } +} + +class WarmableTestStore extends Store +{ + protected array $items = []; + + protected $defaultIndexes = []; + + protected $storeIndexes = [ + 'name' => WarmableNameIndex::class, + 'static' => WarmableStaticIndex::class, + ]; + + public function key(): string + { + return 'warmable'; + } + + public function withItems(array $items): static + { + $this->items = $items; + + return $this; + } + + public function paths() + { + return collect(array_fill_keys(array_keys($this->items), '/fake')); + } + + public function getItem($key): string + { + return $this->items[$key] ?? ''; + } + + public function getItemKey($item): string + { + return $item; + } + + public function getItemValues($keys, $valueIndex, $keyIndex): array + { + return []; + } +} + +class WarmableNameIndex extends Value +{ + public function getItemValue($item): string + { + return $item; + } +} + +class WarmableStaticIndex extends Index +{ + public function getItems(): array + { + return ['static-key' => 'static-value']; + } +} From 7ebf93b7edc89d2c9a5d87568ef58be2c8341362 Mon Sep 17 00:00:00 2001 From: edalzell Date: Fri, 12 Jun 2026 14:36:54 -0700 Subject: [PATCH 2/5] Use lazy() in Associations::getItems() to reduce memory Replaces get() with lazy() so entries are chunked rather than all loaded simultaneously. --- src/Stache/Indexes/Terms/Associations.php | 2 +- .../Stache/Indexes/Terms/AssociationsTest.php | 88 +++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 tests/Stache/Indexes/Terms/AssociationsTest.php diff --git a/src/Stache/Indexes/Terms/Associations.php b/src/Stache/Indexes/Terms/Associations.php index 1ddef3781e3..059fa390191 100644 --- a/src/Stache/Indexes/Terms/Associations.php +++ b/src/Stache/Indexes/Terms/Associations.php @@ -15,7 +15,7 @@ public function getItems() ->flatMap(function ($collection) use ($handle) { return $collection->queryEntries() ->where($handle, '<>', null) - ->get() + ->lazy() ->flatMap(function ($entry) use ($handle) { return collect($entry->value($handle)) ->map(function ($value) use ($entry) { diff --git a/tests/Stache/Indexes/Terms/AssociationsTest.php b/tests/Stache/Indexes/Terms/AssociationsTest.php new file mode 100644 index 00000000000..fcb2ee76a20 --- /dev/null +++ b/tests/Stache/Indexes/Terms/AssociationsTest.php @@ -0,0 +1,88 @@ +save(); + + Collection::make('blog') + ->sites(['en']) + ->taxonomies(['tags']) + ->save(); + + Entry::make()->id('entry-1')->locale('en')->collection('blog')->slug('one')->data(['tags' => ['alfa', 'bravo']])->save(); + Entry::make()->id('entry-2')->locale('en')->collection('blog')->slug('two')->data(['tags' => ['alfa']])->save(); + Entry::make()->id('entry-3')->locale('en')->collection('blog')->slug('three')->data(['title' => 'No tags'])->save(); + + $associations = Stache::store('terms')->store('tags')->index('associations'); + $associations->update(); + + $items = collect($associations->items()->all()); + + $this->assertCount(3, $items); + + $alfaItems = $items->where('slug', 'alfa')->values(); + $this->assertCount(2, $alfaItems); + $this->assertTrue($alfaItems->pluck('entry')->contains('entry-1')); + $this->assertTrue($alfaItems->pluck('entry')->contains('entry-2')); + + $bravoItems = $items->where('slug', 'bravo')->values(); + $this->assertCount(1, $bravoItems); + $this->assertEquals('entry-1', $bravoItems->first()['entry']); + + $items->each(function ($item) { + $this->assertEquals('blog', $item['collection']); + $this->assertEquals('en', $item['site']); + }); + } + + #[Test] + public function it_builds_associations_across_multiple_collections(): void + { + Taxonomy::make('tags')->save(); + + Collection::make('blog')->sites(['en'])->taxonomies(['tags'])->save(); + Collection::make('news')->sites(['en'])->taxonomies(['tags'])->save(); + + Entry::make()->id('blog-1')->locale('en')->collection('blog')->slug('blog-one')->data(['tags' => ['alfa']])->save(); + Entry::make()->id('news-1')->locale('en')->collection('news')->slug('news-one')->data(['tags' => ['alfa']])->save(); + + $associations = Stache::store('terms')->store('tags')->index('associations'); + $associations->update(); + + $alfaItems = collect($associations->items()->all())->where('slug', 'alfa')->values(); + + $this->assertCount(2, $alfaItems); + $this->assertTrue($alfaItems->pluck('collection')->contains('blog')); + $this->assertTrue($alfaItems->pluck('collection')->contains('news')); + } + + #[Test] + public function it_returns_empty_when_no_entries_have_the_taxonomy(): void + { + Taxonomy::make('tags')->save(); + + Collection::make('blog')->sites(['en'])->taxonomies(['tags'])->save(); + + Entry::make()->id('entry-1')->locale('en')->collection('blog')->slug('one')->data(['title' => 'No tags'])->save(); + + $associations = Stache::store('terms')->store('tags')->index('associations'); + $associations->update(); + + $this->assertEmpty($associations->items()->all()); + } +} From f012ebb338ce6fcd5fe42a48a848cd89021d2871 Mon Sep 17 00:00:00 2001 From: edalzell Date: Fri, 12 Jun 2026 14:59:01 -0700 Subject: [PATCH 3/5] Rewrite Associations::getItems() to iterate entries one at a time Previous approach called queryEntries()->get() which invoked getItemsFromFiles() holding all matching Entry objects in memory simultaneously. With 3000+ entries having large Bard content, this caused 2.5GB peak memory. New approach iterates entry paths one at a time via the entries store, explicitly unsets each Entry after extracting the needed values, so PHP can reclaim memory per-entry rather than holding everything at once. --- src/Stache/Indexes/Terms/Associations.php | 54 ++++++++++++++++------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/src/Stache/Indexes/Terms/Associations.php b/src/Stache/Indexes/Terms/Associations.php index 059fa390191..bf563992564 100644 --- a/src/Stache/Indexes/Terms/Associations.php +++ b/src/Stache/Indexes/Terms/Associations.php @@ -2,6 +2,7 @@ namespace Statamic\Stache\Indexes\Terms; +use Statamic\Facades\Stache; use Statamic\Facades\Taxonomy; use Statamic\Stache\Indexes\Index; use Statamic\Support\Str; @@ -10,24 +11,45 @@ class Associations extends Index { public function getItems() { - return Taxonomy::findByHandle($handle = $this->store->childKey()) + $handle = $this->store->childKey(); + + return Taxonomy::findByHandle($handle) ->collections() ->flatMap(function ($collection) use ($handle) { - return $collection->queryEntries() - ->where($handle, '<>', null) - ->lazy() - ->flatMap(function ($entry) use ($handle) { - return collect($entry->value($handle)) - ->map(function ($value) use ($entry) { - return [ - 'value' => $value, - 'slug' => Str::slug($value), - 'entry' => $entry->id(), - 'collection' => $entry->collectionHandle(), - 'site' => $entry->locale(), - ]; - }); - })->all(); + $entriesStore = Stache::store('entries')->store($collection->handle()); + $collectionHandle = $collection->handle(); + $results = []; + + foreach ($entriesStore->paths()->keys() as $key) { + $item = $entriesStore->getItem($key); + + if (! $item) { + continue; + } + + $value = $item->value($handle); + + if (empty($value)) { + unset($item); + continue; + } + + $entryId = $item->id(); + $site = $item->locale(); + unset($item); + + foreach ((array) $value as $termValue) { + $results[] = [ + 'value' => $termValue, + 'slug' => Str::slug($termValue), + 'entry' => $entryId, + 'collection' => $collectionHandle, + 'site' => $site, + ]; + } + } + + return $results; })->all(); } From 5a9609700781aceb9b1c782354d2bc703bb0038a Mon Sep 17 00:00:00 2001 From: edalzell Date: Sat, 13 Jun 2026 10:50:34 -0700 Subject: [PATCH 4/5] pint --- src/Stache/Indexes/Terms/Associations.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Stache/Indexes/Terms/Associations.php b/src/Stache/Indexes/Terms/Associations.php index bf563992564..67fba487a52 100644 --- a/src/Stache/Indexes/Terms/Associations.php +++ b/src/Stache/Indexes/Terms/Associations.php @@ -31,6 +31,7 @@ public function getItems() if (empty($value)) { unset($item); + continue; } From f052c4b0bbfd9335b1c2d41c31b09d01a1eb2c8e Mon Sep 17 00:00:00 2001 From: edalzell Date: Sat, 13 Jun 2026 13:00:11 -0700 Subject: [PATCH 5/5] comment to help jason understand --- src/Stache/Indexes/Terms/Associations.php | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/Stache/Indexes/Terms/Associations.php b/src/Stache/Indexes/Terms/Associations.php index 67fba487a52..e4ccf2e5ee9 100644 --- a/src/Stache/Indexes/Terms/Associations.php +++ b/src/Stache/Indexes/Terms/Associations.php @@ -17,9 +17,19 @@ public function getItems() ->collections() ->flatMap(function ($collection) use ($handle) { $entriesStore = Stache::store('entries')->store($collection->handle()); + // Hoist outside the loop to avoid repeated method calls per entry. $collectionHandle = $collection->handle(); $results = []; + // Two earlier approaches both caused excess memory usage: + // 1. queryEntries()->get()->flatMap() — loaded all matching entries at once. + // 2. queryEntries()->lazy()->flatMap() — chunked loading, but each Entry + // object was still kept alive for the duration of its flatMap closure, + // so entries accumulated within each chunk. + // With 3000+ entries containing large Bard content, both caused ~2.5 GB peak RSS. + // Iterating paths directly lets us unset each Entry immediately after + // extracting the scalar values we need, so PHP can reclaim memory + // per-entry rather than holding everything until flatMap returns. foreach ($entriesStore->paths()->keys() as $key) { $item = $entriesStore->getItem($key); @@ -30,6 +40,7 @@ public function getItems() $value = $item->value($handle); if (empty($value)) { + // Release the entry object before moving to the next key. unset($item); continue; @@ -37,6 +48,7 @@ public function getItems() $entryId = $item->id(); $site = $item->locale(); + // Release the entry object now that we have all the scalars we need. unset($item); foreach ((array) $value as $termValue) {