From 411baa6fc1737f45786a12049969103dc2282f28 Mon Sep 17 00:00:00 2001
From: "Phong X. Nguyen" <phongn@gmail.com>
Date: Wed, 3 Jun 2026 19:19:37 +0000
Subject: [PATCH 1/3] Fix CLFUS RAM cache value metric broken by integer
 division

PR #11733 rewrote the CACHE_VALUE_HITS_SIZE cast so static_cast<float>
wraps the whole quotient, making (hits + 1) / (size + overhead) integer
division. It truncates to 0 for normal object sizes, zeroing the value
metric and collapsing CLFUS to FIFO: no promote-on-hit, no clock second
chance, and no value-based ghost re-admission.

Bind the cast to the numerator to restore floating-point division, and
add the ram_cache_clfus_value regression test as a guard (it fails on
the pre-fix macro and passes after).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/iocore/cache/RamCacheCLFUS.cc | 33 ++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)
diff --git a/src/iocore/cache/RamCacheCLFUS.cc b/src/iocore/cache/RamCacheCLFUS.cc
index 639d2faa33c..08f801057a5 100644
--- a/src/iocore/cache/RamCacheCLFUS.cc
+++ b/src/iocore/cache/RamCacheCLFUS.cc
@@ -31,6 +31,7 @@
 #include "iocore/eventsystem/Tasks.h"
 #include "fastlz/fastlz.h"
 #include "tscore/CryptoHash.h"
+#include "tscore/Regression.h"
 #include <zlib.h>
 #ifdef HAVE_LZMA_H
 #include <lzma.h>
@@ -44,7 +45,7 @@
 // #define CHECK_ACOUNTING 1 // very expensive double checking of all sizes
 
 #define REQUEUE_HITS(_h)              ((_h) ? ((_h) - 1) : 0)
-#define CACHE_VALUE_HITS_SIZE(_h, _s) (static_cast<float>(((_h) + 1) / ((_s) + ENTRY_OVERHEAD)))
+#define CACHE_VALUE_HITS_SIZE(_h, _s) (static_cast<float>((_h) + 1) / ((_s) + ENTRY_OVERHEAD))
 #define CACHE_VALUE(_x)               CACHE_VALUE_HITS_SIZE((_x)->hits, (_x)->size)
 
 #define AVERAGE_VALUE_OVER 100
@@ -792,3 +793,33 @@ new_RamCacheCLFUS()
   RamCacheCLFUS *r = new RamCacheCLFUS;
   return r;
 }
+
+// Guards against PR #11733-style regressions of the CLFUS value metric: the value density
+// must be computed in floating point. Integer division truncates (hits + 1) / (size + overhead)
+// to 0 for normal object sizes, zeroing the metric and silently collapsing CLFUS to FIFO (no
+// promote-on-hit, no clock second chance, no value-based ghost re-admission).
+REGRESSION_TEST(ram_cache_clfus_value)(RegressionTest *t, int /* level ATS_UNUSED */, int *pstatus)
+{
+  *pstatus = REGRESSION_TEST_PASSED;
+
+  float v_one   = CACHE_VALUE_HITS_SIZE(1u, 16384u);   // a typical 16 KiB object, seen once
+  float v_hot   = CACHE_VALUE_HITS_SIZE(100u, 16384u); // same size, many more hits
+  float v_small = CACHE_VALUE_HITS_SIZE(10u, 1024u);   // smaller object, equal hits
+  float v_large = CACHE_VALUE_HITS_SIZE(10u, 16384u);
+
+  // A non-zero fraction: the integer-division regression makes this exactly 0.0f.
+  if (!(v_one > 0.0f)) {
+    rprintf(t, "CLFUS value metric truncated to zero (integer division)\n");
+    *pstatus = REGRESSION_TEST_FAILED;
+  }
+  // Frequency aware: more hits at equal size must rank higher.
+  if (!(v_hot > v_one)) {
+    rprintf(t, "CLFUS value metric does not increase with hits\n");
+    *pstatus = REGRESSION_TEST_FAILED;
+  }
+  // Size aware (the "by Size" in CLFUS): smaller objects at equal hits must rank higher.
+  if (!(v_small > v_large)) {
+    rprintf(t, "CLFUS value metric does not decrease with size\n");
+    *pstatus = REGRESSION_TEST_FAILED;
+  }
+}

From 8e7c7eff62ae66a4bb99f1c2bbf684d1299d49d9 Mon Sep 17 00:00:00 2001
From: "Phong X. Nguyen" <phongn@gmail.com>
Date: Wed, 3 Jun 2026 22:49:41 +0000
Subject: [PATCH 2/3] Make CLFUS RAM cache adapt to a shifting working set

CLFUS could not follow a changing working set: a once-hot object kept
its frequency forever (resident hit counts were never aged) and new
candidates were never admitted (the history/ghost list was emptied as
fast as it filled). On a working-set shift the cache froze on whatever
it had captured -- e.g. 0.125 vs LRU's 1.0 hit rate on the new set.

Two complementary changes fix it:

* Admission: _tick() used to free a history entry the moment its aged
  count reached 0, so the ghost list stayed ~empty and a re-requested
  key was forgotten before it could be re-admitted. Keep entries and
  free only to hold the list at its target size.

* Aging: halve all resident hit counts (and _average_value, the
  admission bar, in step) once per turnover, so a cold-but-once-hot
  object's advantage decays and warmer newcomers can take over.

The history list is capped at _objects / HISTORY_DIVISOR (4) rather
than a full cache-worth: ghost entries are ~88 bytes each and
unbudgeted (not counted against ram_cache.size), so a full cache-worth
is a large memory cost for caches of many small objects, and testing
showed a quarter preserves adaptivity.

Adds ram_cache_adaptivity (abrupt shift) and ram_cache_drift (gradual
rolling window) regression tests; both, plus the existing ram_cache,
now show CLFUS tracking the working set like LRU and beating it on
steady-state Zipfian.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/iocore/cache/CacheTest.cc     | 181 ++++++++++++++++++++++++++++++
 src/iocore/cache/RamCacheCLFUS.cc |  62 +++++++---
 2 files changed, 230 insertions(+), 13 deletions(-)

diff --git a/src/iocore/cache/CacheTest.cc b/src/iocore/cache/CacheTest.cc
index cd86df6051e..012dbd78d02 100644
--- a/src/iocore/cache/CacheTest.cc
+++ b/src/iocore/cache/CacheTest.cc
@@ -684,3 +684,184 @@ REGRESSION_TEST(ram_cache)(RegressionTest *t, int level, int *pstatus)
     }
   }
 }
+
+// Measures how well a RAM cache adapts when the hot working set shifts. Phase 1 warms set A to
+// (most of) the cache; phase 2 shifts every reference to a disjoint set B of the same size. A
+// frequency policy that never ages resident hit counts keeps the now-cold A pinned and starves
+// B (low B hit rate, high A retention); a recency or properly-aged policy releases A.
+struct RamCacheAdaptResult {
+  double b_hit_rate = 0.0;
+  int    a_retained = 0;
+  int    a_total    = 0;
+};
+
+static RamCacheAdaptResult
+test_RamCache_adaptivity(RamCache *cache, int64_t cache_size, StripeSM *stripe)
+{
+  cache->init(cache_size, stripe);
+
+  int const      obj    = BUFFER_SIZE_FOR_INDEX(BUFFER_SIZE_INDEX_16K);
+  int const      nhot   = static_cast<int>((cache_size / obj) * 7 / 8); // working set ~7/8 of capacity
+  int const      p1     = 20;                                           // rounds warming A
+  int const      p2     = 26;                                           // rounds referencing B
+  uint64_t const a_base = 1;
+  uint64_t const b_base = 1000000;
+
+  std::vector<Ptr<IOBufferData>> keep;
+  auto                           access = [&](uint64_t k) -> bool {
+    CryptoHash hash;
+    hash.u64[0] = (k << 32) + k;
+    hash.u64[1] = (k << 32) + k;
+    Ptr<IOBufferData> got;
+    if (cache->get(&hash, &got)) {
+      return true;
+    }
+    IOBufferData *d = THREAD_ALLOC(ioDataAllocator, this_thread());
+    d->alloc(BUFFER_SIZE_INDEX_16K);
+    memset(d->data(), 0, d->block_size());
+    keep.push_back(make_ptr(d));
+    cache->put(&hash, d, d->block_size());
+    return false;
+  };
+
+  for (int r = 0; r < p1; r++) { // warm A to the cache
+    for (int i = 0; i < nhot; i++) {
+      access(a_base + i);
+    }
+    keep.clear();
+  }
+
+  int b_hits = 0, b_total = 0;
+  for (int r = 0; r < p2; r++) { // shift all references to B
+    for (int i = 0; i < nhot; i++) {
+      bool hit = access(b_base + i);
+      if (r >= p2 / 2) { // measure once B has had a chance to establish
+        b_total++;
+        b_hits += hit ? 1 : 0;
+      }
+    }
+    keep.clear();
+  }
+
+  int a_ret = 0;
+  for (int i = 0; i < nhot; i++) {
+    CryptoHash hash;
+    hash.u64[0] = ((a_base + i) << 32) + (a_base + i);
+    hash.u64[1] = ((a_base + i) << 32) + (a_base + i);
+    Ptr<IOBufferData> got;
+    if (cache->get(&hash, &got)) {
+      a_ret++;
+    }
+  }
+  keep.clear();
+
+  RamCacheAdaptResult res;
+  res.b_hit_rate = b_total ? static_cast<double>(b_hits) / b_total : 0.0;
+  res.a_retained = a_ret;
+  res.a_total    = nhot;
+  return res;
+}
+
+REGRESSION_TEST(ram_cache_adaptivity)(RegressionTest *t, int level, int *pstatus)
+{
+  if (REGRESSION_TEST_NIGHTLY > level) {
+    *pstatus = REGRESSION_TEST_PASSED;
+    return;
+  }
+  if (cacheProcessor.IsCacheEnabled() != CacheInitState::INITIALIZED) {
+    rprintf(t, "cache not initialized");
+    *pstatus = REGRESSION_TEST_FAILED;
+    return;
+  }
+
+  CacheKey  key;
+  StripeSM *stripe     = theCache->key_to_stripe(&key, "example.com"sv);
+  int64_t   cache_size = 1LL << 21; // 2 MB
+
+  RamCacheAdaptResult lru   = test_RamCache_adaptivity(new_RamCacheLRU(), cache_size, stripe);
+  RamCacheAdaptResult clfus = test_RamCache_adaptivity(new_RamCacheCLFUS(), cache_size, stripe);
+
+  rprintf(t, "RamCache adaptivity after working-set shift (higher B-hit-rate / lower A-retained is better)\n");
+  rprintf(t, "RamCache LRU   B-hit-rate %.3f  A-retained %d/%d\n", lru.b_hit_rate, lru.a_retained, lru.a_total);
+  rprintf(t, "RamCache CLFUS B-hit-rate %.3f  A-retained %d/%d\n", clfus.b_hit_rate, clfus.a_retained, clfus.a_total);
+
+  // With the F2 fixes CLFUS must follow the shift: serve the new working set and release the stale one.
+  *pstatus = (clfus.b_hit_rate >= 0.90 && clfus.a_retained <= clfus.a_total / 3) ? REGRESSION_TEST_PASSED : REGRESSION_TEST_FAILED;
+}
+
+// Gradual-drift adaptivity: a rolling working set. Each round accesses a window of keys (a few
+// times each, so they stay hot and get admitted) and slides the window forward by a few keys.
+// Keys that roll off the trailing edge go cold while still carrying high hit counts; a policy
+// that never ages resident counts keeps that stale trailing edge and starves the leading edge.
+// Returns the hit rate on the current window over the second half of the run.
+static double
+test_RamCache_drift(RamCache *cache, int64_t cache_size, StripeSM *stripe)
+{
+  cache->init(cache_size, stripe);
+
+  int const cap    = static_cast<int>(cache_size / BUFFER_SIZE_FOR_INDEX(BUFFER_SIZE_INDEX_16K));
+  int const win    = cap * 3 / 4; // active working-set window (fits with room)
+  int const reps   = 2;           // accesses per key per round (keeps the window hot/admitted)
+  int const slide  = 3;           // keys retired and introduced per round
+  int const rounds = 40;
+
+  std::vector<Ptr<IOBufferData>> keep;
+  auto                           access = [&](uint64_t k) -> bool {
+    CryptoHash hash;
+    hash.u64[0] = (k << 32) + k;
+    hash.u64[1] = (k << 32) + k;
+    Ptr<IOBufferData> got;
+    if (cache->get(&hash, &got)) {
+      return true;
+    }
+    IOBufferData *d = THREAD_ALLOC(ioDataAllocator, this_thread());
+    d->alloc(BUFFER_SIZE_INDEX_16K);
+    memset(d->data(), 0, d->block_size());
+    keep.push_back(make_ptr(d));
+    cache->put(&hash, d, d->block_size());
+    return false;
+  };
+
+  int hits = 0, total = 0;
+  for (int r = 0; r < rounds; r++) {
+    uint64_t base = static_cast<uint64_t>(r) * slide; // window = [base, base + win)
+    for (int rep = 0; rep < reps; rep++) {
+      for (int i = 0; i < win; i++) {
+        bool hit = access(base + i);
+        if (r >= rounds / 2) {
+          total++;
+          hits += hit ? 1 : 0;
+        }
+      }
+    }
+    keep.clear();
+  }
+  return total ? static_cast<double>(hits) / total : 0.0;
+}
+
+REGRESSION_TEST(ram_cache_drift)(RegressionTest *t, int level, int *pstatus)
+{
+  if (REGRESSION_TEST_NIGHTLY > level) {
+    *pstatus = REGRESSION_TEST_PASSED;
+    return;
+  }
+  if (cacheProcessor.IsCacheEnabled() != CacheInitState::INITIALIZED) {
+    rprintf(t, "cache not initialized");
+    *pstatus = REGRESSION_TEST_FAILED;
+    return;
+  }
+
+  CacheKey  key;
+  StripeSM *stripe     = theCache->key_to_stripe(&key, "example.com"sv);
+  int64_t   cache_size = 1LL << 21; // 2 MB
+
+  double lru   = test_RamCache_drift(new_RamCacheLRU(), cache_size, stripe);
+  double clfus = test_RamCache_drift(new_RamCacheCLFUS(), cache_size, stripe);
+
+  rprintf(t, "RamCache gradual-drift current-window hit rate (higher is better)\n");
+  rprintf(t, "RamCache LRU   drift-hit-rate %.3f\n", lru);
+  rprintf(t, "RamCache CLFUS drift-hit-rate %.3f\n", clfus);
+
+  // With the F2 fixes CLFUS must track a rolling working set, not freeze on the initial cohort.
+  *pstatus = (clfus >= 0.80) ? REGRESSION_TEST_PASSED : REGRESSION_TEST_FAILED;
+}
diff --git a/src/iocore/cache/RamCacheCLFUS.cc b/src/iocore/cache/RamCacheCLFUS.cc
index 08f801057a5..5f9c6becbad 100644
--- a/src/iocore/cache/RamCacheCLFUS.cc
+++ b/src/iocore/cache/RamCacheCLFUS.cc
@@ -40,8 +40,16 @@
 #define REQUIRED_COMPRESSION 0.9 // must get to this size or declared incompressible
 #define REQUIRED_SHRINK      0.8 // must get to this size or keep original buffer (with padding)
 #define HISTORY_HYSTERIA     10  // extra temporary history
-#define ENTRY_OVERHEAD       256 // per-entry overhead to consider when computing cache value/size
-#define LZMA_BASE_MEMLIMIT   (64 * 1024 * 1024)
+// HISTORY_DIVISOR caps the history (ghost) list at ~ _objects / HISTORY_DIVISOR entries. The
+// ghost list only needs to remember recent eviction candidates long enough to be requested
+// again; it does not need a full cache-worth. Each ghost entry is a full RamCacheCLFUSEntry
+// (~88 bytes) and is unbudgeted -- it is not counted against ram_cache.size -- so a full
+// cache-worth is a large memory cost for caches of many small objects. Testing showed a quarter
+// preserves CLFUS's adaptivity to a shifting working set (a half or eighth also work; an eighth
+// begins to slip). See doc/developer-guide/cache-architecture/ram-cache.en.rst.
+#define HISTORY_DIVISOR    4
+#define ENTRY_OVERHEAD     256 // per-entry overhead to consider when computing cache value/size
+#define LZMA_BASE_MEMLIMIT (64 * 1024 * 1024)
 // #define CHECK_ACOUNTING 1 // very expensive double checking of all sizes
 
 #define REQUEUE_HITS(_h)              ((_h) ? ((_h) - 1) : 0)
@@ -108,6 +116,7 @@ class RamCacheCLFUS : public RamCache
 
   double  _average_value                        = 0;
   int64_t _history                              = 0;
+  int64_t _age_clock                            = 0; // accesses since the last resident aging pass
   int     _ibuckets                             = 0;
   int     _nbuckets                             = 0;
   DList(RamCacheCLFUSEntry, hash_link) *_bucket = nullptr;
@@ -121,7 +130,8 @@ class RamCacheCLFUS : public RamCache
   void                _move_compressed(RamCacheCLFUSEntry *e);
   RamCacheCLFUSEntry *_destroy(RamCacheCLFUSEntry *e);
   void                _requeue_victims(Que(RamCacheCLFUSEntry, lru_link) & victims);
-  void                _tick(); // move CLOCK on history
+  void                _tick();         // move CLOCK on history
+  void                _age_resident(); // periodically halve resident hit counts so the hot set ages
 };
 
 int64_t
@@ -356,19 +366,21 @@ RamCacheCLFUS::_tick()
   if (!e) {
     return;
   }
+  // Age the oldest history entry and keep it: the history list is a bounded record of recently
+  // evicted/seen keys, so a key requested again soon can be re-admitted cheaply. Previously an
+  // entry was freed the moment its aged count reached 0, which held the list near-empty and
+  // denied that second chance; now we free only to keep the list at its target size.
   e->hits >>= 1;
-  if (e->hits) {
-    e->hits = REQUEUE_HITS(e->hits);
-    this->_lru[1].enqueue(e);
-  } else {
-    goto Lfree;
-  }
-  if (this->_history <= this->_objects + HISTORY_HYSTERIA) {
+  e->hits   = REQUEUE_HITS(e->hits);
+  this->_lru[1].enqueue(e);
+  // Cap the history list well below the resident count: it only needs to remember recent
+  // eviction candidates long enough to be re-requested, and a full cache-worth of ghost entries
+  // is a large, unbudgeted memory cost for caches with many small objects.
+  if (this->_history <= this->_objects / HISTORY_DIVISOR + HISTORY_HYSTERIA) {
     return;
   }
   e = this->_lru[1].dequeue();
-Lfree:
-  if (!e) { // e may be nullptr after e= lru[1].dequeue()
+  if (!e) {
     return;
   }
   e->flag_bits.lru = 0;
@@ -390,6 +402,24 @@ RamCacheCLFUS::_victimize(RamCacheCLFUSEntry *e)
   this->_history++;
 }
 
+// Halve every resident hit count. Called periodically from put() so a once-hot object's
+// frequency advantage decays over time; without this CLFUS never ages lru[0] and a stale hot
+// set pins the cache against a shifting working set. Decaying every entry (rather than only
+// the eviction-scan survivors) is what lets _average_value fall and admit the new working set.
+void
+RamCacheCLFUS::_age_resident()
+{
+  forl_LL(RamCacheCLFUSEntry, e, this->_lru[0])
+  {
+    e->hits >>= 1;
+  }
+  // Decay the admission bar in step with the values it tracks. _average_value otherwise only
+  // changes inside the eviction loop, which is gated by _average_value itself (put() rejects a
+  // re-referenced entry whose value is below it). Without halving it too, aging the resident hit
+  // counts is invisible to that gate and a warming working set can never break in.
+  this->_average_value *= 0.5;
+}
+
 void
 RamCacheCLFUS::_move_compressed(RamCacheCLFUSEntry *e)
 {
@@ -596,6 +626,12 @@ RamCacheCLFUS::put(CryptoHash *key, IOBufferData *data, uint32_t len, bool copy,
   if (!this->_max_bytes) {
     return 0;
   }
+  // Age the resident set once per "turnover" (a put for every resident object). This is the
+  // CLOCK aging that lets a cold-but-once-hot working set fall below average and be evicted.
+  if (this->_objects > 0 && ++this->_age_clock >= this->_objects) {
+    this->_age_resident();
+    this->_age_clock = 0;
+  }
   uint32_t            i            = key->slice32(3) % this->_nbuckets;
   RamCacheCLFUSEntry *e            = this->_bucket[i].head;
   uint32_t            size         = copy ? len : data->block_size();
@@ -657,7 +693,7 @@ RamCacheCLFUS::put(CryptoHash *key, IOBufferData *data, uint32_t len, bool copy,
     uint16_t k     = key->slice32(3) >> 16;
     uint16_t kk    = this->_seen[s];
     this->_seen[s] = k;
-    if (this->_history >= this->_objects && kk != k) {
+    if (this->_history >= this->_objects / HISTORY_DIVISOR && kk != k) {
       DDbg(dbg_ctl_ram_cache, "put %X %" PRId64 " size %d UNSEEN", key->slice32(3), auxkey, size);
       return 0;
     }

From d38c09e0dfa654fbd76ee43d00de303424adbfe5 Mon Sep 17 00:00:00 2001
From: "Phong X. Nguyen" <phongn@gmail.com>
Date: Wed, 3 Jun 2026 22:53:57 +0000
Subject: [PATCH 3/3] doc: describe how the CLFUS RAM cache works

The developer guide only sketched CLFUS and its History List section no
longer matched the code. Document the value metric and the floating
admission bar, the cached and history lists and their CLOCK aging
(_tick, _age_resident), how a shifting working set is followed, and the
per-object memory overhead -- including the bounded (HISTORY_DIVISOR)
history list and its unbudgeted ghost entries.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../cache-architecture/ram-cache.en.rst       | 113 ++++++++++++++----
 1 file changed, 89 insertions(+), 24 deletions(-)

diff --git a/doc/developer-guide/cache-architecture/ram-cache.en.rst b/doc/developer-guide/cache-architecture/ram-cache.en.rst
index bb6851e2e2d..0e3b3364a4e 100644
--- a/doc/developer-guide/cache-architecture/ram-cache.en.rst
+++ b/doc/developer-guide/cache-architecture/ram-cache.en.rst
@@ -107,34 +107,99 @@ with the CLOCK rate of the Cached and History Lists.
 Cached List
 ===========
 
-The *Cached List* contains objects actually in memory. The basic operation is
-LRU with new entries inserted into a FIFO queue and hits causing objects to be
-reinserted. The interesting bit comes when an object is being considered for
-insertion. A check is first made against the Object Hash to see if the object
-is in the Cached List or History. Hits result in updating the ``hit`` field and
-reinsertion of the object. History hits result in the ``hit`` field being
-updated and a comparison to see if this object should be kept in memory. The
-comparison is against the least recently used members of the Cache List, and
-is based on a weighted frequency::
-
-   CACHE_VALUE = hits / (size + overhead)
-
-A new object must be enough bytes worth of currently cached objects to cover
-itself. Each time an object is considered for replacement the CLOCK moves
-forward. If the History object has a greater value then it is inserted into the
-Cached List and the replaced objects are removed from memory and their list
-entries are inserted into the History List. If the History object has a lesser
-value it is reinserted into the History List. Objects considered for replacement
-(at least one) but not replaced have their ``hits`` field set to ``0`` and are
-reinserted into the Cached List. This is the CLOCK operation on the Cached List.
+The *Cached List* (``_lru[0]`` in ``RamCacheCLFUS.cc``) holds the objects
+actually resident in memory. New entries are inserted into a FIFO queue and a
+hit reinserts the object at the tail. The interesting work happens when an
+object is considered for insertion (a *Put*, after a read from secondary
+storage). A check is first made against the object hash to see if the object is
+already in the Cached List or the History List.
+
+Each object is ranked by a weighted frequency, its *value*::
+
+   CACHE_VALUE = (hits + 1) / (size + ENTRY_OVERHEAD)
+
+Smaller and more frequently used objects rank higher, which is what is meant by
+least frequently used *by size*. The value of a candidate is compared against
+``_average_value``, an exponential moving average of the value of the objects
+passed over for replacement -- in effect a floating admission bar.
+
+.. note::
+
+   ``CACHE_VALUE`` must be evaluated in floating point. Because ``hits`` is
+   small and ``size`` is large, computing ``(hits + 1) / (size + overhead)`` in
+   integer arithmetic truncates to ``0`` for every normal object, which silently
+   collapses CLFUS to FIFO. This was the regression introduced in GitHub PR
+   #11733; the division is now forced to floating point.
+
+When a *Put* finds the incoming object in the History List, its value is
+compared against the least recently used members of the Cached List. The
+candidate must be worth at least as many bytes of currently cached objects as it
+displaces. Each time an object is considered for replacement the CLOCK advances.
+If the candidate wins it is moved into the Cached List and the objects it
+displaces are removed from memory, their (data-less) list entries moving to the
+History List; if it loses it is returned to the History List. Objects passed
+over for replacement (at least one) have their ``hits`` reduced and are
+reinserted -- this is the CLOCK (second chance) on the Cached List.
+
+Aging the cached list
+---------------------
+
+Frequency counts on resident objects would otherwise only ever grow, so an
+object that was hot days ago keeps winning replacement long after it has gone
+cold, and the cache cannot follow a changing working set. To prevent this, once
+per *turnover* (one *Put* for every resident object) ``_age_resident()`` halves
+every resident ``hits`` count *and* halves ``_average_value`` in the same pass.
+Halving the bar matters: ``_average_value`` is otherwise updated only inside the
+replacement loop, which a low-value candidate never reaches, so without it the
+decayed counts would be invisible to the admission decision and a warming
+working set could never break in.
 
 History List
 ============
 
-Each CLOCK, the least recently used entry in the History List is dequeued and
-if the ``hits`` field is not greater than ``1`` (it was hit at least once in
-the History or Cached List) it is deleted. Otherwise, the ``hits`` is set to
-``0`` and it is requeued on the History List.
+The *History List* (``_lru[1]``) is a bounded record of keys recently evicted
+from, or considered for, the Cached List. Its entries carry no data (the
+``IOBufferData`` pointer is null); they exist so that an object requested again
+soon after eviction can be cheaply re-admitted, and so a newly seen object can
+accumulate enough value to earn admission before it is forgotten.
+
+Each CLOCK tick (``_tick()``, run once per eviction) ages the oldest History
+entry -- halving its ``hits`` -- and *keeps* it, freeing entries only to hold the
+list at its target size, ``_objects / HISTORY_DIVISOR + HISTORY_HYSTERIA``. An
+earlier version freed an entry the moment its aged ``hits`` reached ``0``, which
+held the list nearly empty and denied re-requested objects their second chance.
+
+The list is deliberately capped well below a full cache-worth
+(``HISTORY_DIVISOR`` defaults to 4): it only needs to remember recent candidates
+long enough to be requested again, and a full cache-worth of history entries is a
+large memory cost (see `Memory overhead`_) for caches holding many small objects.
+
+Following a shifting working set
+================================
+
+The combination of the bounded, persistent History List and resident aging is
+what lets CLFUS track a working set that changes over time. New objects survive
+in history long enough to prove themselves and be admitted, while the frequency
+advantage of the previous working set decays until its members fall below the
+admission bar and are evicted. The ``ram_cache_adaptivity`` (an abrupt change of
+the entire hot set) and ``ram_cache_drift`` (a gradually rolling working set)
+regression tests in ``CacheTest.cc`` exercise this and compare CLFUS against the
+simpler LRU RAM cache.
+
+Memory overhead
+===============
+
+Every object in the Cached List has a resident list entry; this per-object
+overhead (roughly ``ENTRY_OVERHEAD``) is counted against
+``proxy.config.cache.ram_cache.size``. Every object in the History List has a
+list entry too (about 88 bytes), but these are **not** counted against the
+configured size -- they are memory the process uses in addition to it.
+
+Because the overhead is per object, it is largest for a big cache holding many
+small objects. ``HISTORY_DIVISOR`` bounds the History List to roughly
+``_objects / 4`` entries to keep this cost modest: for example a 32 GB cache of
+1 KB objects holds about 32 million resident objects and therefore about 8
+million history entries (~700 MB), rather than ~2.8 GB at a full cache-worth.
 
 Compression and Decompression
 =============================