From 105e39c7db02eddd22879e11912d9182b9af8758 Mon Sep 17 00:00:00 2001 From: "Phong X. Nguyen" Date: Wed, 3 Jun 2026 23:16:31 +0000 Subject: [PATCH] Fix LRU RAM cache seen filter never engaging below 100% full proxy.config.cache.ram_cache.use_seen_filter values above 1 are documented to turn on the seen filter once the cache is (N-1)/N full (2 = 50%, 3 = 67%, ... 9 = 90%). The threshold was written as bytes >= max_bytes * (1 - (1 / N)) with N an int, so 1 / N is integer division and evaluates to 0 for every N > 1; the test became bytes >= max_bytes and the filter only engaged when completely full. A scan could therefore pollute a half-full cache. Rewrite the comparison as bytes * N >= max_bytes * (N - 1), which is exact in integer arithmetic and overflow-safe at realistic cache sizes. Add ram_cache_lru_seen_filter, which fails on the old form (20/20 unseen keys admitted at 60% full) and passes on the fix (0/20). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/iocore/cache/CacheTest.cc | 70 +++++++++++++++++++++++++++++++++ src/iocore/cache/RamCacheLRU.cc | 9 +++-- 2 files changed, 76 insertions(+), 3 deletions(-) diff --git a/src/iocore/cache/CacheTest.cc b/src/iocore/cache/CacheTest.cc index cd86df6051e..6a9036391da 100644 --- a/src/iocore/cache/CacheTest.cc +++ b/src/iocore/cache/CacheTest.cc @@ -684,3 +684,73 @@ REGRESSION_TEST(ram_cache)(RegressionTest *t, int level, int *pstatus) } } } + +// Verifies the tiered LRU seen filter engages at the documented fill level. With +// proxy.config.cache.ram_cache.use_seen_filter = N (> 1) the filter must turn on once the cache +// is (N - 1)/N full (N = 2 -> 50%). An integer-division bug (1 / N == 0) made it turn on only at +// 100% full, so a scan could pollute a half-full cache. +REGRESSION_TEST(ram_cache_lru_seen_filter)(RegressionTest *t, int level, int *pstatus) +{ + if (REGRESSION_TEST_NIGHTLY > level) { + *pstatus = REGRESSION_TEST_PASSED; + return; + } + if (cacheProcessor.IsCacheEnabled() != CacheInitState::INITIALIZED) { + rprintf(t, "cache not initialized"); + *pstatus = REGRESSION_TEST_FAILED; + return; + } + + int const saved_filter = cache_config_ram_cache_use_seen_filter; + cache_config_ram_cache_use_seen_filter = 2; // engage once the cache is 50% full + + CacheKey key; + StripeSM *stripe = theCache->key_to_stripe(&key, "example.com"sv); + int64_t cache_size = 1LL << 21; // 2 MB + RamCache *cache = new_RamCacheLRU(); + cache->init(cache_size, stripe); + + std::vector> keep; + auto put = [&](uint64_t n) { + CryptoHash hash; + hash.u64[0] = (n << 32) + n; + hash.u64[1] = (n << 32) + n; + IOBufferData *d = THREAD_ALLOC(ioDataAllocator, this_thread()); + d->alloc(BUFFER_SIZE_INDEX_16K); + memset(d->data(), 0, d->block_size()); + keep.push_back(make_ptr(d)); + cache->put(&hash, d, d->block_size()); + }; + auto resident = [&](uint64_t n) -> bool { + CryptoHash hash; + hash.u64[0] = (n << 32) + n; + hash.u64[1] = (n << 32) + n; + Ptr got; + return cache->get(&hash, &got); + }; + + // Fill to ~60% (above the 50% threshold). Put each key twice so it is admitted even once the + // filter is active (the first Put records "seen", the second admits). + int const obj = BUFFER_SIZE_FOR_INDEX(BUFFER_SIZE_INDEX_16K); + int const fill_n = static_cast((cache_size * 6 / 10) / obj); + for (int i = 0; i < fill_n; i++) { + put(1000 + i); + put(1000 + i); + } + + // Above the threshold, single-Put (unseen) keys must be filtered, not admitted. Use a batch to + // be robust against the occasional seen-filter hash collision. + int admitted = 0; + for (int i = 0; i < 20; i++) { + put(900000 + i); + if (resident(900000 + i)) { + admitted++; + } + } + + rprintf(t, "RamCache LRU seen filter: %d/20 single-seen keys admitted at ~60%% full (expect ~0)\n", admitted); + + cache_config_ram_cache_use_seen_filter = saved_filter; + keep.clear(); + *pstatus = (admitted <= 2) ? REGRESSION_TEST_PASSED : REGRESSION_TEST_FAILED; +} diff --git a/src/iocore/cache/RamCacheLRU.cc b/src/iocore/cache/RamCacheLRU.cc index ca3d8a384ad..6e392338be5 100644 --- a/src/iocore/cache/RamCacheLRU.cc +++ b/src/iocore/cache/RamCacheLRU.cc @@ -190,9 +190,12 @@ RamCacheLRU::put(CryptoHash *key, IOBufferData *data, [[maybe_unused]] uint32_t } uint32_t i = key->slice32(3) % nbuckets; if ((cache_config_ram_cache_use_seen_filter == 1) || - // If proxy.config.cache.ram_cache.use_seen_filter is > 1, and the cache is more than % full, then use the seen filter. - // % is calculated based on this setting, with 2 == 50%, 3 == 67%, 4 == 75%, up to 9 == 90%. - ((cache_config_ram_cache_use_seen_filter > 1) && (bytes >= max_bytes * (1 - (1 / cache_config_ram_cache_use_seen_filter))))) { + // For use_seen_filter > 1, only apply the filter once the cache is more than % full: + // 2 == 50%, 3 == 67%, 4 == 75%, up to 9 == 90%. Written as bytes * N >= max_bytes * (N - 1) + // rather than bytes >= max_bytes * (1 - 1 / N); the latter evaluates 1 / N in integer + // arithmetic (0 for every N > 1), so the filter only ever engaged at 100% full. + ((cache_config_ram_cache_use_seen_filter > 1) && + (bytes * cache_config_ram_cache_use_seen_filter >= max_bytes * (cache_config_ram_cache_use_seen_filter - 1)))) { uint32_t j = key->slice32(3) % (nbuckets * 2); // The seen filter bucket size is 2x if (!seen[j]) {