docker · rumpl · May 6, 2026
@@ -227,6 +227,90 @@ func TestSplitIndexForKeep(t *testing.T) {
 	}
 }
 
+// TestSplitIndexForKeep_NeverReturnsZeroForNonEmptyInput pins the
+// invariant that for any non-empty messages slice, SplitIndexForKeep
+// returns a value in [1, len(messages)] — never 0.
+//
+// This matters because the compactor's firstKeptSessionIndex maps the
+// returned split index back to a sess.Messages position, and when a
+// prior summary exists, sessIndices[0] points at the prior summary
+// item rather than at the start of the prior kept-tail. If 0 were
+// reachable, the new FirstKeptEntry would land on the prior summary
+// item and the next reconstruction would skip the prior kept-tail
+// (the synthetic-summary user message inserted by
+// session.buildSessionSummaryMessages would still appear, but the
+// kept conversation between the prior FirstKeptEntry and the prior
+// summary index would be lost).
+//
+// The implementation makes 0 unreachable: lastValidBoundary is
+// initialised to len(messages); the only update path sets it to i
+// (the current index), but that update happens AFTER the overflow
+// check at iteration i, so a lastValidBoundary of 0 set at i=0 is
+// never returned — the loop exits and the function falls through to
+// `return len(messages)`. The overflow-path return therefore yields
+// values ≥ 1, and the no-overflow path yields len(messages).
+func TestSplitIndexForKeep_NeverReturnsZeroForNonEmptyInput(t *testing.T) {
+	t.Parallel()
+
+	msg := func(role chat.MessageRole, content string) chat.Message {
+		return chat.Message{Role: role, Content: content}
+	}
+
+	cases := []struct {
+		name      string
+		messages  []chat.Message
+		maxTokens int64
+	}{
+		{
+			name:      "single user message that fits",
+			messages:  []chat.Message{msg(chat.MessageRoleUser, "hi")},
+			maxTokens: 1_000_000,
+		},
+		{
+			name:      "single user message that overflows",
+			messages:  []chat.Message{msg(chat.MessageRoleUser, strings.Repeat("x", 10_000))},
+			maxTokens: 1,
+		},
+		{
+			name: "first message is user/assistant and fits, rest overflows",
+			messages: []chat.Message{
+				msg(chat.MessageRoleUser, "u0"),
+				msg(chat.MessageRoleAssistant, strings.Repeat("a", 40_000)),
+				msg(chat.MessageRoleUser, strings.Repeat("b", 40_000)),
+			},
+			maxTokens: 5_000,
+		},
+		{
+			name: "every message is user/assistant and everything fits (returns len)",
+			messages: []chat.Message{
+				msg(chat.MessageRoleUser, "u0"),
+				msg(chat.MessageRoleAssistant, "a0"),
+				msg(chat.MessageRoleUser, "u1"),
+			},
+			maxTokens: 1_000_000,
+		},
+		{
+			name: "first message is the synthetic Session Summary user message (the prior-summary case)",
+			messages: []chat.Message{
+				msg(chat.MessageRoleUser, "Session Summary: "+strings.Repeat("s", 80_000)),
+				msg(chat.MessageRoleUser, strings.Repeat("u", 40_000)),
+				msg(chat.MessageRoleAssistant, strings.Repeat("a", 40_000)),
+			},
+			maxTokens: 5_000,
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			got := SplitIndexForKeep(tc.messages, tc.maxTokens)
+			assert.NotZero(t, got, "SplitIndexForKeep must not return 0 for non-empty input (would land FirstKeptEntry on the prior summary item and drop the prior kept-tail)")
+			assert.GreaterOrEqual(t, got, 1)
+			assert.LessOrEqual(t, got, len(tc.messages))
+		})
+	}
+}
+
 func TestFirstIndexInBudget(t *testing.T) {
 	t.Parallel()
 

@@ -153,24 +153,35 @@ func RunLLM(ctx context.Context, args LLMArgs) (*Result, error) {
 // [maxKeepTokens] window. Used by the runtime when a hook supplies
 // its own summary so the kept-tail policy stays consistent across
 // the two strategies.
-func ComputeFirstKeptEntry(sess *session.Session, a *agent.Agent) int {
-	return mapToSessionIndex(sess, compaction.SplitIndexForKeep(nonSystemMessages(sess, a), maxKeepTokens))
+func ComputeFirstKeptEntry(sess *session.Session) int {
+	messages, sessIndices := gatherCompactionInput(sess)
+	return firstKeptSessionIndex(sess, sessIndices, compaction.SplitIndexForKeep(messages, maxKeepTokens))
 }
 
-// nonSystemMessages returns the agent-visible messages in sess with
-// the system entries filtered out. Both the LLM strategy (via
-// [extractMessages]) and the hook-supplied path (via
-// [ComputeFirstKeptEntry]) operate on this same shape, which is also
-// what [compaction.SplitIndexForKeep] expects.
-func nonSystemMessages(sess *session.Session, a *agent.Agent) []chat.Message {
-	var messages []chat.Message
-	for _, msg := range sess.GetMessages(a) {
-		if msg.Role == chat.MessageRoleSystem {
-			continue
-		}
-		messages = append(messages, msg)
+// gatherCompactionInput is a thin wrapper around
+// [session.Session.CompactionInput] that clears compaction-specific
+// fields on the returned chat messages.
+//
+// Cost is per-message bookkeeping already accumulated into
+// sess.TotalCost(); leaving it set would double-count when the
+// summarization session reports its own TotalCost back through
+// [Result.Cost]. CacheControl pins a provider cache checkpoint
+// (Anthropic prompt caching, etc.); pinning it inside the
+// summarization sub-call would associate the cache point with the
+// throwaway compaction conversation rather than the parent session.
+//
+// The reconstruction work — surfacing a synthetic "Session Summary"
+// message when a prior summary exists, picking the right start index
+// past the prior summary, and tracking origin indices in sess.Messages
+// — lives on Session itself so it can run under sess.mu.RLock and stay
+// race-safe against concurrent AddMessage / ApplyCompaction calls.
+func gatherCompactionInput(sess *session.Session) ([]chat.Message, []int) {
+	messages, sessIndices := sess.CompactionInput()
+	for i := range messages {
+		messages[i].Cost = 0
+		messages[i].CacheControl = false
 	}
-	return messages
+	return messages, sessIndices
 }
 
 // extractMessages returns the messages to send to the compaction
@@ -188,23 +199,11 @@ func nonSystemMessages(sess *session.Session, a *agent.Agent) []chat.Message {
 // If the conversation tail itself doesn't fit in
 // (contextLimit − MaxSummaryTokens − prompt-overhead), older messages
 // are dropped from the front of the to-compact list to make room.
-func extractMessages(sess *session.Session, a *agent.Agent, contextLimit int64, additionalPrompt string) ([]chat.Message, int) {
-	messages := nonSystemMessages(sess, a)
-	// Clear Cost and CacheControl on our local copy of the conversation.
-	// Cost is per-message bookkeeping that's already accumulated into
-	// sess.TotalCost(); leaving it set would double-count when the
-	// summarization session reports its own TotalCost back through the
-	// compactor.Result.Cost field. CacheControl pins a provider cache
-	// checkpoint (Anthropic prompt caching, etc.); pinning it inside the
-	// summarization sub-call would associate the cache point with the
-	// throwaway compaction conversation rather than the parent session.
-	for i := range messages {
-		messages[i].Cost = 0
-		messages[i].CacheControl = false
-	}
+func extractMessages(sess *session.Session, _ *agent.Agent, contextLimit int64, additionalPrompt string) ([]chat.Message, int) {
+	messages, sessIndices := gatherCompactionInput(sess)
 
 	splitIdx := compaction.SplitIndexForKeep(messages, maxKeepTokens)
-	firstKeptEntry := mapToSessionIndex(sess, splitIdx)
+	firstKeptEntry := firstKeptSessionIndex(sess, sessIndices, splitIdx)
 	messages = messages[:splitIdx]
 
 	systemPromptMessage := chat.Message{
@@ -238,21 +237,18 @@ func extractMessages(sess *session.Session, a *agent.Agent, contextLimit int64,
 	return messages, firstKeptEntry
 }
 
-// mapToSessionIndex maps an index in the non-system-filtered message
-// list (the form [extractMessages] operates on) back to an index in
-// sess.Messages. Returns len(sess.Messages) when filteredIdx is past
-// the end — i.e. "compact everything; keep nothing of the tail".
-func mapToSessionIndex(sess *session.Session, filteredIdx int) int {
-	count := 0
-	for i, item := range sess.Messages {
-		if item.IsMessage() && item.Message.Message.Role != chat.MessageRoleSystem {
-			if count == filteredIdx {
-				return i
-			}
-			count++
-		}
+// firstKeptSessionIndex translates a split index produced against the
+// chat-message list returned by [gatherCompactionInput] back to an
+// index in sess.Messages, suitable for the new summary's
+// FirstKeptEntry. Out-of-range splits map to len(sess.Messages),
+// matching the "compact everything; keep nothing of the tail"
+// sentinel that session.buildSessionSummaryMessages handles by
+// skipping the conversation loop.
+func firstKeptSessionIndex(sess *session.Session, sessIndices []int, splitIdx int) int {
+	if splitIdx >= len(sessIndices) {
+		return len(sess.Messages)
 	}
-	return len(sess.Messages)
+	return sessIndices[splitIdx]
 }
 
 // toItems wraps a flat slice of chat messages into session items so a

@@ -179,12 +179,10 @@ func TestExtractMessages_KeepsRecentMessages(t *testing.T) {
 func TestComputeFirstKeptEntry(t *testing.T) {
 	t.Parallel()
 
-	a := agent.New("test", "")
-
 	t.Run("empty session returns 0", func(t *testing.T) {
 		t.Parallel()
 		sess := session.New()
-		assert.Equal(t, 0, ComputeFirstKeptEntry(sess, a))
+		assert.Equal(t, 0, ComputeFirstKeptEntry(sess))
 	})
 
 	t.Run("short conversation: split at end (compact everything)", func(t *testing.T) {
@@ -194,11 +192,11 @@ func TestComputeFirstKeptEntry(t *testing.T) {
 			session.NewMessageItem(&session.Message{Message: chat.Message{Role: chat.MessageRoleUser, Content: "hi"}}),
 			session.NewMessageItem(&session.Message{Message: chat.Message{Role: chat.MessageRoleAssistant, Content: "hello"}}),
 		}))
-		assert.Equal(t, len(sess.Messages), ComputeFirstKeptEntry(sess, a))
+		assert.Equal(t, len(sess.Messages), ComputeFirstKeptEntry(sess))
 	})
 }
 
-func TestMapToSessionIndex(t *testing.T) {
+func TestGatherCompactionInput_NoPriorSummary(t *testing.T) {
 	t.Parallel()
 
 	sess := session.New(session.WithMessages([]session.Item{
@@ -209,13 +207,141 @@ func TestMapToSessionIndex(t *testing.T) {
 		session.NewMessageItem(&session.Message{Message: chat.Message{Role: chat.MessageRoleUser, Content: "u2"}}),
 	}))
 
-	// Filtered list (no system): [u1, a1, u2] → indices 0,1,2
-	// Map back to sess.Messages indices: 1, 2, 4
-	assert.Equal(t, 1, mapToSessionIndex(sess, 0))
-	assert.Equal(t, 2, mapToSessionIndex(sess, 1))
-	assert.Equal(t, 4, mapToSessionIndex(sess, 2))
-	// Past the end: returns len(sess.Messages)
-	assert.Equal(t, len(sess.Messages), mapToSessionIndex(sess, 3))
+	messages, sessIndices := gatherCompactionInput(sess)
+	require.Len(t, messages, 3)
+	assert.Equal(t, []int{1, 2, 4}, sessIndices)
+
+	assert.Equal(t, 1, firstKeptSessionIndex(sess, sessIndices, 0))
+	assert.Equal(t, 2, firstKeptSessionIndex(sess, sessIndices, 1))
+	assert.Equal(t, 4, firstKeptSessionIndex(sess, sessIndices, 2))
+	// Past the end: returns len(sess.Messages) (compact-everything sentinel).
+	assert.Equal(t, len(sess.Messages), firstKeptSessionIndex(sess, sessIndices, 3))
+}
+
+// TestGatherCompactionInput_WithPriorSummary pins the regression where
+// an existing summary in the history made the runtime miscompute
+// FirstKeptEntry: counting non-system items from index 0 ignores both
+// the synthetic "Session Summary" message that surfaces at the head of
+// the chat list and the prior summary's start offset, so the kept
+// boundary lands far too early in the session.
+func TestGatherCompactionInput_WithPriorSummary(t *testing.T) {
+	t.Parallel()
+
+	newMsgItem := func(role chat.MessageRole, content string) session.Item {
+		return session.NewMessageItem(&session.Message{Message: chat.Message{Role: role, Content: content}})
+	}
+
+	// Session shape:
+	//   [0..7]  : pre-compaction conversation (already summarized).
+	//   [8..9]  : kept tail of the prior compaction (FirstKeptEntry=8).
+	//   [10]    : prior summary item.
+	//   [11..14]: post-compaction conversation.
+	items := []session.Item{
+		newMsgItem(chat.MessageRoleUser, "u0"),
+		newMsgItem(chat.MessageRoleAssistant, "a0"),
+		newMsgItem(chat.MessageRoleUser, "u1"),
+		newMsgItem(chat.MessageRoleAssistant, "a1"),
+		newMsgItem(chat.MessageRoleUser, "u2"),
+		newMsgItem(chat.MessageRoleAssistant, "a2"),
+		newMsgItem(chat.MessageRoleUser, "u3"),
+		newMsgItem(chat.MessageRoleAssistant, "a3"),
+		newMsgItem(chat.MessageRoleUser, "u4-kept"),
+		newMsgItem(chat.MessageRoleAssistant, "a4-kept"),
+		{Summary: "prior summary", FirstKeptEntry: 8},
+		newMsgItem(chat.MessageRoleUser, "u5"),
+		newMsgItem(chat.MessageRoleAssistant, "a5"),
+		newMsgItem(chat.MessageRoleUser, "u6"),
+		newMsgItem(chat.MessageRoleAssistant, "a6"),
+	}
+	sess := session.New(session.WithMessages(items))
+
+	messages, sessIndices := gatherCompactionInput(sess)
+
+	// Expected filtered list:
+	//   [0]: synthetic Session Summary user message (origin: prior summary at idx 10)
+	//   [1]: items[8]   (kept-tail user)
+	//   [2]: items[9]   (kept-tail assistant)
+	//   [3]: items[11]  (post-summary user)
+	//   [4]: items[12]  (post-summary assistant)
+	//   [5]: items[13]
+	//   [6]: items[14]
+	require.Len(t, messages, 7)
+	assert.Equal(t, chat.MessageRoleUser, messages[0].Role)
+	assert.Contains(t, messages[0].Content, "Session Summary: prior summary")
+	assert.Equal(t, []int{10, 8, 9, 11, 12, 13, 14}, sessIndices)
+
+	// A split that keeps the last two messages should map to items[13]
+	// (the user message at idx 13), not to items[5] which is what the
+	// old count-from-zero implementation produced.
+	assert.Equal(t, 13, firstKeptSessionIndex(sess, sessIndices, 5))
+
+	// A split that keeps the entire post-summary tail (everything from
+	// items[8] onwards including the prior summary) maps the synthetic
+	// message back to its originating summary index so the prior
+	// summary item is preserved across the new compaction.
+	assert.Equal(t, 10, firstKeptSessionIndex(sess, sessIndices, 0))
+
+	// Out-of-range split: compact everything, keep nothing.
+	assert.Equal(t, len(sess.Messages), firstKeptSessionIndex(sess, sessIndices, len(messages)))
+}
+
+// TestFirstKeptSessionIndex_SplitZeroOnEmptyInputUsesSafeSentinel
+// pins the only path through which splitIdx == 0 can reach
+// firstKeptSessionIndex: an empty messages list (which only happens
+// for a brand-new session with no prior summary). In that case
+// sessIndices is also empty and the out-of-range branch returns
+// len(sess.Messages), the "compact everything; keep nothing" sentinel
+// that session.buildSessionSummaryMessages safely treats as no kept
+// tail.
+//
+// This is the safety net behind the
+// SplitIndexForKeep_NeverReturnsZeroForNonEmptyInput invariant: even
+// if a future change accidentally let splitIdx==0 escape from a
+// non-empty SplitIndexForKeep call, the bot's concern ("sessIndices[0]
+// = lastSummaryIdx is returned, dropping the prior kept-tail in the
+// next reconstruction") only triggers when sessIndices is non-empty
+// AND splitIdx==0 — which the invariant rules out and this test pins
+// the empty-input alternative for.
+func TestFirstKeptSessionIndex_SplitZeroOnEmptyInputUsesSafeSentinel(t *testing.T) {
+	t.Parallel()
+
+	sess := session.New()
+	var sessIndices []int
+
+	// Empty input is the only legitimate way splitIdx==0 reaches
+	// firstKeptSessionIndex. Both branches (>= len(sessIndices) and
+	// the indexed lookup) must yield len(sess.Messages) here.
+	assert.Equal(t, len(sess.Messages), firstKeptSessionIndex(sess, sessIndices, 0))
+}
+
+// TestGatherCompactionInput_PriorSummaryWithoutFirstKeptEntry covers
+// the case where a prior summary was applied as "compact everything,
+// keep nothing" (FirstKeptEntry left at zero): the iteration must
+// start strictly after the summary item, not from the top of the
+// session.
+func TestGatherCompactionInput_PriorSummaryWithoutFirstKeptEntry(t *testing.T) {
+	t.Parallel()
+
+	newMsgItem := func(role chat.MessageRole, content string) session.Item {
+		return session.NewMessageItem(&session.Message{Message: chat.Message{Role: role, Content: content}})
+	}
+
+	items := []session.Item{
+		newMsgItem(chat.MessageRoleUser, "old"),
+		newMsgItem(chat.MessageRoleAssistant, "old-reply"),
+		{Summary: "prior summary"},
+		newMsgItem(chat.MessageRoleUser, "new"),
+		newMsgItem(chat.MessageRoleAssistant, "new-reply"),
+	}
+	sess := session.New(session.WithMessages(items))
+
+	messages, sessIndices := gatherCompactionInput(sess)
+
+	// Filtered list: synthetic-summary, items[3], items[4].
+	// items[0..1] are excluded because they were compacted into the
+	// prior summary and FirstKeptEntry is zero.
+	require.Len(t, messages, 3)
+	assert.Equal(t, []int{2, 3, 4}, sessIndices)
 }
 
 func TestRunLLM_DoesNotDuplicateSystemPrompt(t *testing.T) {

@@ -148,7 +148,7 @@ func summaryFromHook(sess *session.Session, a *agent.Agent, pre *hooks.Result) *
 		"session_id", sess.ID, "agent", a.Name(), "summary_length", len(pre.Summary))
 	return &compactor.Result{
 		Summary:        pre.Summary,
-		FirstKeptEntry: compactor.ComputeFirstKeptEntry(sess, a),
+		FirstKeptEntry: compactor.ComputeFirstKeptEntry(sess),
 		// Estimate the summary's token count for session bookkeeping;
 		// no LLM was called so Cost stays at the zero value.
 		InputTokens: compaction.EstimateMessageTokens(&chat.Message{