Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions pkg/compaction/compaction_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,90 @@ func TestSplitIndexForKeep(t *testing.T) {
}
}

// TestSplitIndexForKeep_NeverReturnsZeroForNonEmptyInput pins the
// invariant that for any non-empty messages slice, SplitIndexForKeep
// returns a value in [1, len(messages)] — never 0.
//
// This matters because the compactor's firstKeptSessionIndex maps the
// returned split index back to a sess.Messages position, and when a
// prior summary exists, sessIndices[0] points at the prior summary
// item rather than at the start of the prior kept-tail. If 0 were
// reachable, the new FirstKeptEntry would land on the prior summary
// item and the next reconstruction would skip the prior kept-tail
// (the synthetic-summary user message inserted by
// session.buildSessionSummaryMessages would still appear, but the
// kept conversation between the prior FirstKeptEntry and the prior
// summary index would be lost).
//
// The implementation makes 0 unreachable: lastValidBoundary is
// initialised to len(messages); the only update path sets it to i
// (the current index), but that update happens AFTER the overflow
// check at iteration i, so a lastValidBoundary of 0 set at i=0 is
// never returned — the loop exits and the function falls through to
// `return len(messages)`. The overflow-path return therefore yields
// values ≥ 1, and the no-overflow path yields len(messages).
func TestSplitIndexForKeep_NeverReturnsZeroForNonEmptyInput(t *testing.T) {
t.Parallel()

msg := func(role chat.MessageRole, content string) chat.Message {
return chat.Message{Role: role, Content: content}
}

cases := []struct {
name string
messages []chat.Message
maxTokens int64
}{
{
name: "single user message that fits",
messages: []chat.Message{msg(chat.MessageRoleUser, "hi")},
maxTokens: 1_000_000,
},
{
name: "single user message that overflows",
messages: []chat.Message{msg(chat.MessageRoleUser, strings.Repeat("x", 10_000))},
maxTokens: 1,
},
{
name: "first message is user/assistant and fits, rest overflows",
messages: []chat.Message{
msg(chat.MessageRoleUser, "u0"),
msg(chat.MessageRoleAssistant, strings.Repeat("a", 40_000)),
msg(chat.MessageRoleUser, strings.Repeat("b", 40_000)),
},
maxTokens: 5_000,
},
{
name: "every message is user/assistant and everything fits (returns len)",
messages: []chat.Message{
msg(chat.MessageRoleUser, "u0"),
msg(chat.MessageRoleAssistant, "a0"),
msg(chat.MessageRoleUser, "u1"),
},
maxTokens: 1_000_000,
},
{
name: "first message is the synthetic Session Summary user message (the prior-summary case)",
messages: []chat.Message{
msg(chat.MessageRoleUser, "Session Summary: "+strings.Repeat("s", 80_000)),
msg(chat.MessageRoleUser, strings.Repeat("u", 40_000)),
msg(chat.MessageRoleAssistant, strings.Repeat("a", 40_000)),
},
maxTokens: 5_000,
},
}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got := SplitIndexForKeep(tc.messages, tc.maxTokens)
assert.NotZero(t, got, "SplitIndexForKeep must not return 0 for non-empty input (would land FirstKeptEntry on the prior summary item and drop the prior kept-tail)")
assert.GreaterOrEqual(t, got, 1)
assert.LessOrEqual(t, got, len(tc.messages))
})
}
}

func TestFirstIndexInBudget(t *testing.T) {
t.Parallel()

Expand Down
84 changes: 40 additions & 44 deletions pkg/runtime/compactor/compactor.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,24 +153,35 @@ func RunLLM(ctx context.Context, args LLMArgs) (*Result, error) {
// [maxKeepTokens] window. Used by the runtime when a hook supplies
// its own summary so the kept-tail policy stays consistent across
// the two strategies.
func ComputeFirstKeptEntry(sess *session.Session, a *agent.Agent) int {
return mapToSessionIndex(sess, compaction.SplitIndexForKeep(nonSystemMessages(sess, a), maxKeepTokens))
func ComputeFirstKeptEntry(sess *session.Session) int {
messages, sessIndices := gatherCompactionInput(sess)
return firstKeptSessionIndex(sess, sessIndices, compaction.SplitIndexForKeep(messages, maxKeepTokens))
}

// nonSystemMessages returns the agent-visible messages in sess with
// the system entries filtered out. Both the LLM strategy (via
// [extractMessages]) and the hook-supplied path (via
// [ComputeFirstKeptEntry]) operate on this same shape, which is also
// what [compaction.SplitIndexForKeep] expects.
func nonSystemMessages(sess *session.Session, a *agent.Agent) []chat.Message {
var messages []chat.Message
for _, msg := range sess.GetMessages(a) {
if msg.Role == chat.MessageRoleSystem {
continue
}
messages = append(messages, msg)
// gatherCompactionInput is a thin wrapper around
// [session.Session.CompactionInput] that clears compaction-specific
// fields on the returned chat messages.
//
// Cost is per-message bookkeeping already accumulated into
// sess.TotalCost(); leaving it set would double-count when the
// summarization session reports its own TotalCost back through
// [Result.Cost]. CacheControl pins a provider cache checkpoint
// (Anthropic prompt caching, etc.); pinning it inside the
// summarization sub-call would associate the cache point with the
// throwaway compaction conversation rather than the parent session.
//
// The reconstruction work — surfacing a synthetic "Session Summary"
// message when a prior summary exists, picking the right start index
// past the prior summary, and tracking origin indices in sess.Messages
// — lives on Session itself so it can run under sess.mu.RLock and stay
// race-safe against concurrent AddMessage / ApplyCompaction calls.
func gatherCompactionInput(sess *session.Session) ([]chat.Message, []int) {
messages, sessIndices := sess.CompactionInput()
for i := range messages {
messages[i].Cost = 0
messages[i].CacheControl = false
}
return messages
return messages, sessIndices
}

// extractMessages returns the messages to send to the compaction
Expand All @@ -188,23 +199,11 @@ func nonSystemMessages(sess *session.Session, a *agent.Agent) []chat.Message {
// If the conversation tail itself doesn't fit in
// (contextLimit − MaxSummaryTokens − prompt-overhead), older messages
// are dropped from the front of the to-compact list to make room.
func extractMessages(sess *session.Session, a *agent.Agent, contextLimit int64, additionalPrompt string) ([]chat.Message, int) {
messages := nonSystemMessages(sess, a)
// Clear Cost and CacheControl on our local copy of the conversation.
// Cost is per-message bookkeeping that's already accumulated into
// sess.TotalCost(); leaving it set would double-count when the
// summarization session reports its own TotalCost back through the
// compactor.Result.Cost field. CacheControl pins a provider cache
// checkpoint (Anthropic prompt caching, etc.); pinning it inside the
// summarization sub-call would associate the cache point with the
// throwaway compaction conversation rather than the parent session.
for i := range messages {
messages[i].Cost = 0
messages[i].CacheControl = false
}
func extractMessages(sess *session.Session, _ *agent.Agent, contextLimit int64, additionalPrompt string) ([]chat.Message, int) {
messages, sessIndices := gatherCompactionInput(sess)

splitIdx := compaction.SplitIndexForKeep(messages, maxKeepTokens)
firstKeptEntry := mapToSessionIndex(sess, splitIdx)
firstKeptEntry := firstKeptSessionIndex(sess, sessIndices, splitIdx)
messages = messages[:splitIdx]

systemPromptMessage := chat.Message{
Expand Down Expand Up @@ -238,21 +237,18 @@ func extractMessages(sess *session.Session, a *agent.Agent, contextLimit int64,
return messages, firstKeptEntry
}

// mapToSessionIndex maps an index in the non-system-filtered message
// list (the form [extractMessages] operates on) back to an index in
// sess.Messages. Returns len(sess.Messages) when filteredIdx is past
// the end — i.e. "compact everything; keep nothing of the tail".
func mapToSessionIndex(sess *session.Session, filteredIdx int) int {
count := 0
for i, item := range sess.Messages {
if item.IsMessage() && item.Message.Message.Role != chat.MessageRoleSystem {
if count == filteredIdx {
return i
}
count++
}
// firstKeptSessionIndex translates a split index produced against the
// chat-message list returned by [gatherCompactionInput] back to an
// index in sess.Messages, suitable for the new summary's
// FirstKeptEntry. Out-of-range splits map to len(sess.Messages),
// matching the "compact everything; keep nothing of the tail"
// sentinel that session.buildSessionSummaryMessages handles by
// skipping the conversation loop.
func firstKeptSessionIndex(sess *session.Session, sessIndices []int, splitIdx int) int {
if splitIdx >= len(sessIndices) {
return len(sess.Messages)
}
return len(sess.Messages)
return sessIndices[splitIdx]
}

// toItems wraps a flat slice of chat messages into session items so a
Expand Down
150 changes: 138 additions & 12 deletions pkg/runtime/compactor/compactor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,10 @@ func TestExtractMessages_KeepsRecentMessages(t *testing.T) {
func TestComputeFirstKeptEntry(t *testing.T) {
t.Parallel()

a := agent.New("test", "")

t.Run("empty session returns 0", func(t *testing.T) {
t.Parallel()
sess := session.New()
assert.Equal(t, 0, ComputeFirstKeptEntry(sess, a))
assert.Equal(t, 0, ComputeFirstKeptEntry(sess))
})

t.Run("short conversation: split at end (compact everything)", func(t *testing.T) {
Expand All @@ -194,11 +192,11 @@ func TestComputeFirstKeptEntry(t *testing.T) {
session.NewMessageItem(&session.Message{Message: chat.Message{Role: chat.MessageRoleUser, Content: "hi"}}),
session.NewMessageItem(&session.Message{Message: chat.Message{Role: chat.MessageRoleAssistant, Content: "hello"}}),
}))
assert.Equal(t, len(sess.Messages), ComputeFirstKeptEntry(sess, a))
assert.Equal(t, len(sess.Messages), ComputeFirstKeptEntry(sess))
})
}

func TestMapToSessionIndex(t *testing.T) {
func TestGatherCompactionInput_NoPriorSummary(t *testing.T) {
t.Parallel()

sess := session.New(session.WithMessages([]session.Item{
Expand All @@ -209,13 +207,141 @@ func TestMapToSessionIndex(t *testing.T) {
session.NewMessageItem(&session.Message{Message: chat.Message{Role: chat.MessageRoleUser, Content: "u2"}}),
}))

// Filtered list (no system): [u1, a1, u2] → indices 0,1,2
// Map back to sess.Messages indices: 1, 2, 4
assert.Equal(t, 1, mapToSessionIndex(sess, 0))
assert.Equal(t, 2, mapToSessionIndex(sess, 1))
assert.Equal(t, 4, mapToSessionIndex(sess, 2))
// Past the end: returns len(sess.Messages)
assert.Equal(t, len(sess.Messages), mapToSessionIndex(sess, 3))
messages, sessIndices := gatherCompactionInput(sess)
require.Len(t, messages, 3)
assert.Equal(t, []int{1, 2, 4}, sessIndices)

assert.Equal(t, 1, firstKeptSessionIndex(sess, sessIndices, 0))
assert.Equal(t, 2, firstKeptSessionIndex(sess, sessIndices, 1))
assert.Equal(t, 4, firstKeptSessionIndex(sess, sessIndices, 2))
// Past the end: returns len(sess.Messages) (compact-everything sentinel).
assert.Equal(t, len(sess.Messages), firstKeptSessionIndex(sess, sessIndices, 3))
}

// TestGatherCompactionInput_WithPriorSummary pins the regression where
// an existing summary in the history made the runtime miscompute
// FirstKeptEntry: counting non-system items from index 0 ignores both
// the synthetic "Session Summary" message that surfaces at the head of
// the chat list and the prior summary's start offset, so the kept
// boundary lands far too early in the session.
func TestGatherCompactionInput_WithPriorSummary(t *testing.T) {
t.Parallel()

newMsgItem := func(role chat.MessageRole, content string) session.Item {
return session.NewMessageItem(&session.Message{Message: chat.Message{Role: role, Content: content}})
}

// Session shape:
// [0..7] : pre-compaction conversation (already summarized).
// [8..9] : kept tail of the prior compaction (FirstKeptEntry=8).
// [10] : prior summary item.
// [11..14]: post-compaction conversation.
items := []session.Item{
newMsgItem(chat.MessageRoleUser, "u0"),
newMsgItem(chat.MessageRoleAssistant, "a0"),
newMsgItem(chat.MessageRoleUser, "u1"),
newMsgItem(chat.MessageRoleAssistant, "a1"),
newMsgItem(chat.MessageRoleUser, "u2"),
newMsgItem(chat.MessageRoleAssistant, "a2"),
newMsgItem(chat.MessageRoleUser, "u3"),
newMsgItem(chat.MessageRoleAssistant, "a3"),
newMsgItem(chat.MessageRoleUser, "u4-kept"),
newMsgItem(chat.MessageRoleAssistant, "a4-kept"),
{Summary: "prior summary", FirstKeptEntry: 8},
newMsgItem(chat.MessageRoleUser, "u5"),
newMsgItem(chat.MessageRoleAssistant, "a5"),
newMsgItem(chat.MessageRoleUser, "u6"),
newMsgItem(chat.MessageRoleAssistant, "a6"),
}
sess := session.New(session.WithMessages(items))

messages, sessIndices := gatherCompactionInput(sess)

// Expected filtered list:
// [0]: synthetic Session Summary user message (origin: prior summary at idx 10)
// [1]: items[8] (kept-tail user)
// [2]: items[9] (kept-tail assistant)
// [3]: items[11] (post-summary user)
// [4]: items[12] (post-summary assistant)
// [5]: items[13]
// [6]: items[14]
require.Len(t, messages, 7)
assert.Equal(t, chat.MessageRoleUser, messages[0].Role)
assert.Contains(t, messages[0].Content, "Session Summary: prior summary")
assert.Equal(t, []int{10, 8, 9, 11, 12, 13, 14}, sessIndices)

// A split that keeps the last two messages should map to items[13]
// (the user message at idx 13), not to items[5] which is what the
// old count-from-zero implementation produced.
assert.Equal(t, 13, firstKeptSessionIndex(sess, sessIndices, 5))

// A split that keeps the entire post-summary tail (everything from
// items[8] onwards including the prior summary) maps the synthetic
// message back to its originating summary index so the prior
// summary item is preserved across the new compaction.
assert.Equal(t, 10, firstKeptSessionIndex(sess, sessIndices, 0))

// Out-of-range split: compact everything, keep nothing.
assert.Equal(t, len(sess.Messages), firstKeptSessionIndex(sess, sessIndices, len(messages)))
}

// TestFirstKeptSessionIndex_SplitZeroOnEmptyInputUsesSafeSentinel
// pins the only path through which splitIdx == 0 can reach
// firstKeptSessionIndex: an empty messages list (which only happens
// for a brand-new session with no prior summary). In that case
// sessIndices is also empty and the out-of-range branch returns
// len(sess.Messages), the "compact everything; keep nothing" sentinel
// that session.buildSessionSummaryMessages safely treats as no kept
// tail.
//
// This is the safety net behind the
// SplitIndexForKeep_NeverReturnsZeroForNonEmptyInput invariant: even
// if a future change accidentally let splitIdx==0 escape from a
// non-empty SplitIndexForKeep call, the bot's concern ("sessIndices[0]
// = lastSummaryIdx is returned, dropping the prior kept-tail in the
// next reconstruction") only triggers when sessIndices is non-empty
// AND splitIdx==0 — which the invariant rules out and this test pins
// the empty-input alternative for.
func TestFirstKeptSessionIndex_SplitZeroOnEmptyInputUsesSafeSentinel(t *testing.T) {
t.Parallel()

sess := session.New()
var sessIndices []int

// Empty input is the only legitimate way splitIdx==0 reaches
// firstKeptSessionIndex. Both branches (>= len(sessIndices) and
// the indexed lookup) must yield len(sess.Messages) here.
assert.Equal(t, len(sess.Messages), firstKeptSessionIndex(sess, sessIndices, 0))
}

// TestGatherCompactionInput_PriorSummaryWithoutFirstKeptEntry covers
// the case where a prior summary was applied as "compact everything,
// keep nothing" (FirstKeptEntry left at zero): the iteration must
// start strictly after the summary item, not from the top of the
// session.
func TestGatherCompactionInput_PriorSummaryWithoutFirstKeptEntry(t *testing.T) {
t.Parallel()

newMsgItem := func(role chat.MessageRole, content string) session.Item {
return session.NewMessageItem(&session.Message{Message: chat.Message{Role: role, Content: content}})
}

items := []session.Item{
newMsgItem(chat.MessageRoleUser, "old"),
newMsgItem(chat.MessageRoleAssistant, "old-reply"),
{Summary: "prior summary"},
newMsgItem(chat.MessageRoleUser, "new"),
newMsgItem(chat.MessageRoleAssistant, "new-reply"),
}
sess := session.New(session.WithMessages(items))

messages, sessIndices := gatherCompactionInput(sess)

// Filtered list: synthetic-summary, items[3], items[4].
// items[0..1] are excluded because they were compacted into the
// prior summary and FirstKeptEntry is zero.
require.Len(t, messages, 3)
assert.Equal(t, []int{2, 3, 4}, sessIndices)
}

func TestRunLLM_DoesNotDuplicateSystemPrompt(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/runtime/session_compaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ func summaryFromHook(sess *session.Session, a *agent.Agent, pre *hooks.Result) *
"session_id", sess.ID, "agent", a.Name(), "summary_length", len(pre.Summary))
return &compactor.Result{
Summary: pre.Summary,
FirstKeptEntry: compactor.ComputeFirstKeptEntry(sess, a),
FirstKeptEntry: compactor.ComputeFirstKeptEntry(sess),
// Estimate the summary's token count for session bookkeeping;
// no LLM was called so Cost stays at the zero value.
InputTokens: compaction.EstimateMessageTokens(&chat.Message{
Expand Down
Loading
Loading