From 1d61cdb8c33ee5095db655f9e574cc254fb5cfd0 Mon Sep 17 00:00:00 2001 From: ScriptedAlchemy Date: Mon, 29 Jun 2026 06:49:21 +0000 Subject: [PATCH 1/6] feat: add project registry CLI and aggregate LCM status --- src/cli.rs | 37 ++++++ src/main.rs | 6 + src/project_cmd.rs | 187 ++++++++++++++++++++++++++ src/sessions/lcm/gc.rs | 6 +- src/sessions/lcm/query.rs | 212 +++++++++++++++++++++++++++++- tests/cli_help_test.rs | 4 + tests/cli_non_interactive_test.rs | 94 +++++++++++++ tests/mcp_handler_test.rs | 95 ++++++++++++- 8 files changed, 632 insertions(+), 9 deletions(-) create mode 100644 src/project_cmd.rs diff --git a/src/cli.rs b/src/cli.rs index bdd34824..e5c99d9b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -361,6 +361,11 @@ pub enum Commands { #[command(subcommand)] action: SessionsAction, }, + /// Inspect registered TraceDecay projects from the global registry + Projects { + #[command(subcommand)] + action: ProjectsAction, + }, /// Manage multi-branch indexing Branch { #[command(subcommand)] @@ -395,6 +400,38 @@ pub enum Commands { }, } +#[derive(Subcommand)] +pub enum ProjectsAction { + /// List registered projects + List { + /// Maximum projects to show + #[arg(long, default_value_t = 25)] + limit: usize, + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Search registered projects by id, path, alias, remote, or branch + Search { + /// Query text + query: String, + /// Maximum projects to show + #[arg(long, default_value_t = 25)] + limit: usize, + /// Output as JSON + #[arg(long)] + json: bool, + }, + /// Show registry context for one project id or path + Context { + /// Project id, root path, or registered alias + selector: String, + /// Output as JSON + #[arg(long)] + json: bool, + }, +} + #[derive(Subcommand)] pub enum LspAction { /// List supported language servers, availability, and install hints diff --git a/src/main.rs b/src/main.rs index 06dd81e3..a76f7900 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,6 +13,7 @@ mod cost_cmd; mod global; mod hook_cmd; mod lsp_cmd; +mod project_cmd; mod sessions_cmd; mod status_cmd; mod tool_command; @@ -834,6 +835,9 @@ async fn dispatch_command(command: Commands) -> tracedecay::errors::Result<()> { Commands::Sessions { action } => { sessions_cmd::handle_sessions_action(action).await?; } + Commands::Projects { action } => { + project_cmd::handle_projects_action(action).await?; + } Commands::Branch { action } => { commands::handle_branch_action(action).await?; } @@ -904,6 +908,7 @@ fn should_skip_startup_maintenance(command: &Commands) -> bool { | Commands::Lsp { .. } | Commands::Doctor { .. } | Commands::Migrate { .. } + | Commands::Projects { .. } | Commands::HookPreToolUse | Commands::HookPromptSubmit | Commands::HookStop @@ -968,6 +973,7 @@ fn should_skip_agent_install_maintenance(command: &Commands) -> bool { | Commands::Lsp { .. } | Commands::Doctor { .. } | Commands::Migrate { .. } + | Commands::Projects { .. } | Commands::Tool { .. } | Commands::Daemon { .. } ) diff --git a/src/project_cmd.rs b/src/project_cmd.rs new file mode 100644 index 00000000..1d5d1ab0 --- /dev/null +++ b/src/project_cmd.rs @@ -0,0 +1,187 @@ +use std::path::Path; + +use serde_json::json; +use tracedecay::errors::{Result, TraceDecayError}; +use tracedecay::global_db::{CodeProjectRecord, GlobalDb, ProjectRegistryContext}; + +use crate::cli::ProjectsAction; + +const MAX_LIMIT: usize = 1_000; + +pub(crate) async fn handle_projects_action(action: ProjectsAction) -> Result<()> { + let db = GlobalDb::open() + .await + .ok_or_else(|| TraceDecayError::Config { + message: + "no TraceDecay global registry found; run `tracedecay init` in a project first" + .to_string(), + })?; + + match action { + ProjectsAction::List { limit, json } => { + let limit = bounded_limit(limit); + let projects = db.list_code_projects(limit).await; + print_projects("registered projects", projects, limit, json)?; + } + ProjectsAction::Search { query, limit, json } => { + let limit = bounded_limit(limit); + let projects = db.search_code_projects(&query, limit).await; + if json { + println!( + "{}", + serde_json::to_string_pretty(&json!({ + "query": query, + "limit": limit, + "projects": projects, + }))? + ); + } else { + print_project_table(&format!("projects matching \"{query}\""), &projects); + } + } + ProjectsAction::Context { selector, json } => { + let context = project_context(&db, &selector).await.ok_or_else(|| { + TraceDecayError::Config { + message: format!( + "registered project not found for '{selector}'; try `tracedecay projects search {selector}`" + ), + } + })?; + print_project_context(&context, json)?; + } + } + Ok(()) +} + +fn bounded_limit(limit: usize) -> usize { + limit.clamp(1, MAX_LIMIT) +} + +async fn project_context(db: &GlobalDb, selector: &str) -> Option { + if let Some(context) = db.project_registry_context_by_id(selector).await { + return Some(context); + } + db.project_registry_context_by_alias(Path::new(selector)) + .await +} + +fn print_projects( + label: &str, + projects: Vec, + limit: usize, + json_output: bool, +) -> Result<()> { + if json_output { + println!( + "{}", + serde_json::to_string_pretty(&json!({ + "limit": limit, + "projects": projects, + }))? + ); + } else { + print_project_table(label, &projects); + } + Ok(()) +} + +fn print_project_table(label: &str, projects: &[CodeProjectRecord]) { + if projects.is_empty() { + println!("No {label} found."); + return; + } + + let id_width = projects + .iter() + .map(|project| project.project_id.len()) + .max() + .unwrap_or("project_id".len()) + .max("project_id".len()); + let branch_width = projects + .iter() + .map(|project| { + project + .default_branch + .as_deref() + .unwrap_or("-") + .chars() + .count() + }) + .max() + .unwrap_or("branch".len()) + .max("branch".len()); + + println!("Found {} {label}:", projects.len()); + println!(); + println!( + " {id: Result<()> { + if json_output { + println!("{}", serde_json::to_string_pretty(context)?); + return Ok(()); + } + + let project = &context.project; + println!("Project: {}", project.project_id); + println!("root: {}", project.display_root); + if let Some(branch) = &project.default_branch { + println!("default branch: {branch}"); + } + if let Some(remote) = &project.git_remote_url { + println!("remote: {remote}"); + } + if let Some(git_common_dir) = &project.git_common_dir { + println!("git common dir: {git_common_dir}"); + } + println!("last seen: {}", project.last_seen_at); + + if !context.aliases.is_empty() { + println!(); + println!("Aliases:"); + for alias in &context.aliases { + println!(" {}", alias.alias_path); + } + } + + if !context.stores.is_empty() { + println!(); + println!("Stores:"); + for store_context in &context.stores { + let store = &store_context.store; + println!( + " {} [{} / {}] {}", + store.store_id, store.store_kind, store.storage_mode, store.store_relpath + ); + for scope in &store_context.graph_scopes { + println!( + " scope {} branch={} db={} writable={}", + scope.graph_scope_id, scope.branch_name, scope.db_relpath, scope.writable + ); + } + for artifact in &store_context.artifacts { + let size = artifact + .size_bytes + .map(|bytes| bytes.to_string()) + .unwrap_or_else(|| "-".to_string()); + println!( + " artifact {} path={} size={}", + artifact.artifact_kind, artifact.relpath, size + ); + } + } + } + Ok(()) +} diff --git a/src/sessions/lcm/gc.rs b/src/sessions/lcm/gc.rs index 80ed58e9..9b08ce34 100644 --- a/src/sessions/lcm/gc.rs +++ b/src/sessions/lcm/gc.rs @@ -147,7 +147,8 @@ pub async fn referenced_payload_refs( .query( "SELECT storage_kind, payload_ref, content, snippet_text, index_text, metadata_json FROM lcm_raw_messages - WHERE provider = ?1 AND (?2 IS NULL OR session_id = ?2)", + WHERE (?1 = 'all' OR provider = ?1) + AND (?2 IS NULL OR session_id = ?2)", params![provider, util::opt_text(session_id)], ) .await?; @@ -282,7 +283,8 @@ pub async fn payload_metadata_refs_for_scope( .query( "SELECT payload_ref FROM lcm_external_payloads - WHERE provider = ?1 AND (?2 IS NULL OR session_id = ?2)", + WHERE (?1 = 'all' OR provider = ?1) + AND (?2 IS NULL OR session_id = ?2)", params![provider, util::opt_text(session_id)], ) .await?; diff --git a/src/sessions/lcm/query.rs b/src/sessions/lcm/query.rs index 64424ecf..24c60bab 100644 --- a/src/sessions/lcm/query.rs +++ b/src/sessions/lcm/query.rs @@ -500,7 +500,24 @@ pub(crate) async fn status( if !lcm_table_exists(conn, "lcm_raw_messages").await? { return Ok(empty_status(schema_version, gc_config)); } + if provider == "all" { + return aggregate_provider_status(conn, storage_root, session_id, deep, gc_config).await; + } + + status_for_provider(conn, storage_root, provider, session_id, deep, gc_config).await +} +async fn status_for_provider( + conn: &Connection, + storage_root: &Path, + provider: &str, + session_id: Option<&str>, + deep: bool, + gc_config: &LcmGcConfig, +) -> Result { + let schema_version = schema::schema_version(conn) + .await + .unwrap_or(LCM_SCHEMA_VERSION); let payload_health = payload_health_detail( conn, storage_root, @@ -561,6 +578,187 @@ pub(crate) async fn status( }) } +async fn aggregate_provider_status( + conn: &Connection, + storage_root: &Path, + session_id: Option<&str>, + deep: bool, + gc_config: &LcmGcConfig, +) -> Result { + let schema_version = schema::schema_version(conn) + .await + .unwrap_or(LCM_SCHEMA_VERSION); + let providers = lcm_status_providers(conn, session_id).await?; + if providers.is_empty() { + return Ok(empty_status(schema_version, gc_config)); + } + + let mut aggregate = empty_status(schema_version, gc_config); + for provider in providers { + let status = + status_for_provider(conn, storage_root, &provider, session_id, deep, gc_config).await?; + merge_lcm_status(&mut aggregate, status); + } + let payload_health = + payload_health_detail(conn, storage_root, "all", session_id, deep, 20, gc_config).await?; + aggregate.external_payload_count = payload_health.payload.externalized_count; + aggregate.missing_payload_count = payload_health.payload.missing_count; + aggregate.unreferenced_payload_count = payload_health.payload.unreferenced_count; + aggregate.payload = payload_health.payload; + aggregate.payload_gc = payload_health.payload_gc; + aggregate.storage_scope = Some("project_local".to_string()); + aggregate.dag.compression_ratio = python_round_ratio_to_tenths( + aggregate.dag.total_source_tokens, + aggregate.dag.total_tokens, + ); + aggregate.redaction.enabled = aggregate.redaction.lossy_records > 0; + Ok(aggregate) +} + +async fn lcm_status_providers( + conn: &Connection, + session_id: Option<&str>, +) -> Result, LcmError> { + let mut rows = conn + .query( + "SELECT DISTINCT provider + FROM ( + SELECT provider, session_id FROM lcm_raw_messages + UNION + SELECT provider, session_id FROM lcm_summary_nodes + UNION + SELECT provider, session_id FROM lcm_external_payloads + UNION + SELECT provider, current_session_id AS session_id FROM lcm_lifecycle_state + ) + WHERE (?1 IS NULL OR session_id = ?1) + ORDER BY provider", + params![util::opt_text(session_id)], + ) + .await?; + let mut providers = Vec::new(); + while let Some(row) = rows.next().await? { + providers.push(row.get(0)?); + } + Ok(providers) +} + +fn merge_lcm_status(target: &mut LcmStatus, source: LcmStatus) { + target.raw_message_count += source.raw_message_count; + target.summary_node_count += source.summary_node_count; + target.external_payload_count += source.external_payload_count; + target.missing_payload_count += source.missing_payload_count; + target.unreferenced_payload_count += source.unreferenced_payload_count; + target.maintenance_debt_count += source.maintenance_debt_count; + target.store.messages += source.store.messages; + target.store.estimated_tokens += source.store.estimated_tokens; + target.dag.total_nodes += source.dag.total_nodes; + target.dag.total_tokens += source.dag.total_tokens; + target.dag.total_source_tokens += source.dag.total_source_tokens; + for (depth, source_depth) in source.dag.depths { + let target_depth = target + .dag + .depths + .entry(depth) + .or_insert_with(|| LcmDagDepthStatus { + count: 0, + tokens: 0, + source_tokens: 0, + }); + target_depth.count += source_depth.count; + target_depth.tokens += source_depth.tokens; + target_depth.source_tokens += source_depth.source_tokens; + } + merge_payload_status(&mut target.payload, source.payload); + merge_payload_gc_status(&mut target.payload_gc, source.payload_gc); + target.lifecycle.lifecycle_state_count += source.lifecycle.lifecycle_state_count; + target.lifecycle.frontier_count += source.lifecycle.frontier_count; + target.lifecycle.maintenance_debt_count += source.lifecycle.maintenance_debt_count; + target.redaction.lossy_records += source.redaction.lossy_records; + target.redaction.legacy_truncated_count += source.redaction.legacy_truncated_count; +} + +fn merge_payload_status(target: &mut LcmPayloadStatus, source: LcmPayloadStatus) { + target.externalized_count += source.externalized_count; + target.missing_count += source.missing_count; + target.unreferenced_count += source.unreferenced_count; + target.placeholder_ref_count += source.placeholder_ref_count; + target.missing_placeholder_metadata_count += source.missing_placeholder_metadata_count; + target.missing_placeholder_file_count += source.missing_placeholder_file_count; + target.gc_candidate_count += source.gc_candidate_count; + target.root_contained &= source.root_contained; + target.orphan_file_count += source.orphan_file_count; + target.tombstoned_count += source.tombstoned_count; + target.referenced_count += source.referenced_count; + target.total_bytes += source.total_bytes; + target.referenced_bytes += source.referenced_bytes; + target.orphan_file_bytes += source.orphan_file_bytes; + target.reclaimable_bytes += source.reclaimable_bytes; + target.reclaimable_bytes_after_grace += source.reclaimable_bytes_after_grace; + target.integrity_mismatch_count = match ( + target.integrity_mismatch_count, + source.integrity_mismatch_count, + ) { + (Some(left), Some(right)) => Some(left + right), + (Some(value), None) | (None, Some(value)) => Some(value), + (None, None) => None, + }; +} + +fn merge_payload_gc_status(target: &mut LcmPayloadGcStatus, source: LcmPayloadGcStatus) { + target.last_gc_at = max_option_i64(target.last_gc_at, source.last_gc_at); + target.last_gc_duration_ms = + max_option_u64(target.last_gc_duration_ms, source.last_gc_duration_ms); + if target.last_gc_status.as_deref() != Some("failed") { + target.last_gc_status = source.last_gc_status.or(target.last_gc_status.take()); + } + target.last_gc_error = source.last_gc_error.or(target.last_gc_error.take()); + target.last_reaped_refs = sum_option_i64(target.last_reaped_refs, source.last_reaped_refs); + target.last_reaped_bytes = sum_option_u64(target.last_reaped_bytes, source.last_reaped_bytes); + target.next_run_eligible_at = + min_option_i64(target.next_run_eligible_at, source.next_run_eligible_at); +} + +fn max_option_i64(left: Option, right: Option) -> Option { + match (left, right) { + (Some(left), Some(right)) => Some(left.max(right)), + (Some(value), None) | (None, Some(value)) => Some(value), + (None, None) => None, + } +} + +fn min_option_i64(left: Option, right: Option) -> Option { + match (left, right) { + (Some(left), Some(right)) => Some(left.min(right)), + (Some(value), None) | (None, Some(value)) => Some(value), + (None, None) => None, + } +} + +fn sum_option_i64(left: Option, right: Option) -> Option { + match (left, right) { + (Some(left), Some(right)) => Some(left + right), + (Some(value), None) | (None, Some(value)) => Some(value), + (None, None) => None, + } +} + +fn max_option_u64(left: Option, right: Option) -> Option { + match (left, right) { + (Some(left), Some(right)) => Some(left.max(right)), + (Some(value), None) | (None, Some(value)) => Some(value), + (None, None) => None, + } +} + +fn sum_option_u64(left: Option, right: Option) -> Option { + match (left, right) { + (Some(left), Some(right)) => Some(left + right), + (Some(value), None) | (None, Some(value)) => Some(value), + (None, None) => None, + } +} + async fn lcm_table_exists(conn: &Connection, table_name: &str) -> Result { Ok(util::fetch_i64( conn, @@ -1940,7 +2138,8 @@ async fn payload_byte_counts_for_scope( .query( "SELECT payload_ref, byte_count FROM lcm_external_payloads - WHERE provider = ?1 AND (?2 IS NULL OR session_id = ?2)", + WHERE (?1 = 'all' OR provider = ?1) + AND (?2 IS NULL OR session_id = ?2)", params![provider, util::opt_text(session_id)], ) .await?; @@ -1962,7 +2161,8 @@ async fn payload_ref_locations_for_scope( .query( "SELECT store_id, message_id, session_id, storage_kind, payload_ref, content, snippet_text, index_text, metadata_json FROM lcm_raw_messages - WHERE provider = ?1 AND (?2 IS NULL OR session_id = ?2)", + WHERE (?1 = 'all' OR provider = ?1) + AND (?2 IS NULL OR session_id = ?2)", params![provider, util::opt_text(session_id)], ) .await?; @@ -2097,7 +2297,7 @@ async fn tombstoned_count( .query( "SELECT COUNT(*) FROM lcm_raw_messages - WHERE provider = ?1 + WHERE (?1 = 'all' OR provider = ?1) AND (?2 IS NULL OR session_id = ?2) AND ( content LIKE ?3 COLLATE NOCASE @@ -2177,7 +2377,8 @@ async fn metadata_refs_for_scope( .query( "SELECT payload_ref FROM lcm_external_payloads - WHERE provider = ?1 AND (?2 IS NULL OR session_id = ?2)", + WHERE (?1 = 'all' OR provider = ?1) + AND (?2 IS NULL OR session_id = ?2)", params![provider, util::opt_text(session_id)], ) .await?; @@ -2204,12 +2405,13 @@ async fn placeholder_refs_for_scope( let sql = format!( "SELECT content, snippet_text, index_text, metadata_json FROM lcm_raw_messages - WHERE provider = ? + WHERE (? = 'all' OR provider = ?) AND (? IS NULL OR session_id = ?) AND ({placeholder_predicates})" ); let session_value = util::opt_text(session_id); let mut values = vec![ + Value::Text(provider.to_string()), Value::Text(provider.to_string()), session_value.clone(), session_value, diff --git a/tests/cli_help_test.rs b/tests/cli_help_test.rs index 9b915500..c899bf0f 100644 --- a/tests/cli_help_test.rs +++ b/tests/cli_help_test.rs @@ -50,6 +50,7 @@ fn top_level_subcommands_accept_help() { "gain", "monitor", "sessions", + "projects", "branch", "memory", "automation", @@ -69,6 +70,9 @@ fn nested_subcommands_accept_help() { &["daemon", "install-service", "--help"], &["sessions", "ingest", "--help"], &["sessions", "search", "--help"], + &["projects", "list", "--help"], + &["projects", "search", "--help"], + &["projects", "context", "--help"], &["branch", "list", "--help"], &["branch", "add", "--help"], &["memory", "status", "--help"], diff --git a/tests/cli_non_interactive_test.rs b/tests/cli_non_interactive_test.rs index cb335755..73a7f101 100644 --- a/tests/cli_non_interactive_test.rs +++ b/tests/cli_non_interactive_test.rs @@ -1054,6 +1054,100 @@ async fn list_all_reports_profile_sharded_store_without_stale_label() { ); } +#[tokio::test] +async fn projects_list_json_reads_global_registry() { + let home = TempDir::new().unwrap(); + let project = TempDir::new().unwrap(); + let db = GlobalDb::open_at(&profile_root(home.path()).join("global.db")) + .await + .unwrap(); + register_profile_sharded_store(&db, project.path(), "proj_cli").await; + drop(db); + + let mut command = tracedecay_command(home.path(), project.path()); + command.args(["projects", "list", "--json"]); + let output = run_with_timeout(command, cli_timeout()); + + assert!( + output.status.success(), + "projects list --json should succeed\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let payload: serde_json::Value = serde_json::from_slice(&output.stdout).unwrap(); + assert_eq!(payload["projects"][0]["project_id"], "proj_cli"); + assert_eq!(payload["projects"][0]["default_branch"], "main"); +} + +#[tokio::test] +async fn projects_search_text_matches_registered_alias() { + let home = TempDir::new().unwrap(); + let project = TempDir::new().unwrap(); + let db = GlobalDb::open_at(&profile_root(home.path()).join("global.db")) + .await + .unwrap(); + register_profile_sharded_store(&db, project.path(), "proj_cli").await; + drop(db); + + let mut command = tracedecay_command(home.path(), project.path()); + command.args(["projects", "search", "proj_cli"]); + let output = run_with_timeout(command, cli_timeout()); + + assert!( + output.status.success(), + "projects search should succeed\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("proj_cli") && stdout.contains("main"), + "search output should include project id and branch\nstdout:\n{stdout}" + ); +} + +#[tokio::test] +async fn projects_context_resolves_project_id_and_path() { + let home = TempDir::new().unwrap(); + let project = TempDir::new().unwrap(); + let db = GlobalDb::open_at(&profile_root(home.path()).join("global.db")) + .await + .unwrap(); + register_profile_sharded_store(&db, project.path(), "proj_cli").await; + drop(db); + + let mut by_id = tracedecay_command(home.path(), project.path()); + by_id.args(["projects", "context", "proj_cli", "--json"]); + let by_id_output = run_with_timeout(by_id, cli_timeout()); + assert!( + by_id_output.status.success(), + "projects context by id should succeed\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&by_id_output.stdout), + String::from_utf8_lossy(&by_id_output.stderr) + ); + let by_id_payload: serde_json::Value = serde_json::from_slice(&by_id_output.stdout).unwrap(); + assert_eq!(by_id_payload["project"]["project_id"], "proj_cli"); + assert_eq!( + by_id_payload["stores"][0]["store"]["storage_mode"], + "profile_sharded" + ); + + let mut by_path = tracedecay_command(home.path(), project.path()); + by_path.args(["projects", "context", project.path().to_str().unwrap()]); + let by_path_output = run_with_timeout(by_path, cli_timeout()); + assert!( + by_path_output.status.success(), + "projects context by path should succeed\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&by_path_output.stdout), + String::from_utf8_lossy(&by_path_output.stderr) + ); + let stdout = String::from_utf8_lossy(&by_path_output.stdout); + assert!( + stdout.contains("Project: proj_cli") && stdout.contains("profile_sharded"), + "path context output should include project and store\nstdout:\n{stdout}" + ); +} + #[tokio::test] async fn wipe_all_removes_profile_sharded_store_and_global_row() { let home = TempDir::new().unwrap(); diff --git a/tests/mcp_handler_test.rs b/tests/mcp_handler_test.rs index d64a412d..d0174496 100644 --- a/tests/mcp_handler_test.rs +++ b/tests/mcp_handler_test.rs @@ -6884,11 +6884,23 @@ async fn seed_lcm_tool_result_message( message_id: &str, text: impl Into, ordinal: i64, +) { + seed_lcm_tool_result_message_for_provider(cg, "cursor", session_id, message_id, text, ordinal) + .await; +} + +async fn seed_lcm_tool_result_message_for_provider( + cg: &TraceDecay, + provider: &str, + session_id: &str, + message_id: &str, + text: impl Into, + ordinal: i64, ) { let db = open_active_project_session_db(cg).await; assert!( db.upsert_session(&SessionRecord { - provider: "cursor".to_string(), + provider: provider.to_string(), session_id: session_id.to_string(), project_key: cg.project_root().to_string_lossy().to_string(), project_path: cg.project_root().to_string_lossy().to_string(), @@ -6906,7 +6918,7 @@ async fn seed_lcm_tool_result_message( ); assert!( db.upsert_session_message(&SessionMessageRecord { - provider: "cursor".to_string(), + provider: provider.to_string(), message_id: message_id.to_string(), session_id: session_id.to_string(), role: "tool".to_string(), @@ -12578,6 +12590,85 @@ async fn lcm_status_reports_dag_store_and_config_diagnostics_over_mcp() { assert_eq!(lcm["config"]["compression_boundary_cooldown_seconds"], 60); } +#[tokio::test] +async fn lcm_status_all_provider_aggregates_provider_counts() { + let (cg, _dir) = setup_project().await; + seed_lcm_session_message_for_provider( + &cg, + "cursor", + "cursor-session", + "cursor-msg", + "alpha beta", + 1, + ) + .await; + seed_lcm_session_message_for_provider( + &cg, + "codex", + "codex-session", + "codex-msg", + "gamma delta epsilon", + 2, + ) + .await; + + let result = handle_tool_call( + &cg, + "tracedecay_lcm_status", + json!({"provider": "all"}), + None, + None, + ) + .await + .unwrap(); + let payload: Value = serde_json::from_str(extract_text(&result.value)).unwrap(); + + assert_eq!(payload["status"], "ok"); + assert_eq!(payload["provider"], "all"); + assert_eq!(payload["lcm"]["raw_message_count"], 2); + assert_eq!(payload["lcm"]["store"]["messages"], 2); + assert_eq!(payload["lcm"]["store"]["estimated_tokens"], 5); +} + +#[tokio::test] +async fn lcm_status_all_provider_counts_payload_health_once() { + let (cg, _dir) = setup_project().await; + seed_lcm_tool_result_message_for_provider( + &cg, + "cursor", + "lcm-status-all-payload-cursor", + "lcm-status-all-payload-cursor-message", + format!("cursor payload\n{}", "cursor-body ".repeat(30_000)), + 1, + ) + .await; + seed_lcm_tool_result_message_for_provider( + &cg, + "codex", + "lcm-status-all-payload-codex", + "lcm-status-all-payload-codex-message", + format!("codex payload\n{}", "codex-body ".repeat(30_000)), + 2, + ) + .await; + + let result = handle_tool_call( + &cg, + "tracedecay_lcm_status", + json!({"provider": "all"}), + None, + None, + ) + .await + .unwrap(); + let payload: Value = serde_json::from_str(extract_text(&result.value)).unwrap(); + + assert_eq!(payload["status"], "ok"); + assert_eq!(payload["lcm"]["payload"]["externalized_count"], 2); + assert_eq!(payload["lcm"]["payload"]["orphan_file_count"], 0); + assert_eq!(payload["lcm"]["payload"]["missing_count"], 0); +} + // Repeated LCM tool calls in one process must reuse the per-process // "schema already ensured" flag instead of re-opening the project DB with a // full DDL ensure each time. Observable via the version gate: after the From d51c379a6720408c9a5c5c09b73bf3eb65aa4f02 Mon Sep 17 00:00:00 2001 From: ScriptedAlchemy Date: Mon, 29 Jun 2026 07:01:56 +0000 Subject: [PATCH 2/6] feat: add read-only transcript search mode --- src/mcp/tools/definitions.rs | 6 ++- src/mcp/tools/handlers/session.rs | 9 +++- tests/mcp_handler_test.rs | 78 +++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 2 deletions(-) diff --git a/src/mcp/tools/definitions.rs b/src/mcp/tools/definitions.rs index 3d9a970b..dc61bbc7 100644 --- a/src/mcp/tools/definitions.rs +++ b/src/mcp/tools/definitions.rs @@ -2276,7 +2276,7 @@ fn def_message_search() -> ToolDefinition { def( "tracedecay_message_search", "Message Search", - "Search ingested transcript messages across all supported providers by default. Every search first catches up all supported provider adapters for the selected project; pass provider only when explicitly scoping results to one provider.", + "Search ingested transcript messages across all supported providers by default. Every search first catches up all supported provider adapters for the selected project unless catch_up is false; pass provider only when explicitly scoping results to one provider.", json!({ "type": "object", "properties": { @@ -2297,6 +2297,10 @@ fn def_message_search() -> ToolDefinition { "type": "boolean", "description": "Whether to include child subagent sessions in results (default: true)." }, + "catch_up": { + "type": "boolean", + "description": "Whether to ingest/catch up local provider transcripts before searching (default: true). Set false for strictly read-only audits of already-ingested messages." + }, "parent_session_id": { "type": "string", "description": "Optional parent session id filter. Primarily useful with scope=subagents_only." diff --git a/src/mcp/tools/handlers/session.rs b/src/mcp/tools/handlers/session.rs index b341bb69..69d8e210 100644 --- a/src/mcp/tools/handlers/session.rs +++ b/src/mcp/tools/handlers/session.rs @@ -1399,6 +1399,10 @@ pub(super) async fn handle_message_search( .get("include_subagents") .and_then(Value::as_bool) .unwrap_or(true); + let catch_up = args + .get("catch_up") + .and_then(Value::as_bool) + .unwrap_or(true); let mut scope = parse_message_search_scope(&args)?; if !include_subagents && matches!(scope, SessionSearchScope::All) { scope = SessionSearchScope::ParentsOnly; @@ -1439,7 +1443,9 @@ pub(super) async fn handle_message_search( }), )); }; - let _ = crate::sessions::ingest_global_sources(&db, &target_root).await; + if catch_up { + let _ = crate::sessions::ingest_global_sources(&db, &target_root).await; + } let results = if let Some(provider) = requested_provider { db.search_session_messages_filtered( provider, @@ -1471,6 +1477,7 @@ pub(super) async fn handle_message_search( "project_key": project_key, "parent_session_id": parent_session_id, "include_subagents": include_subagents, + "catch_up": catch_up, "scope": match scope { SessionSearchScope::All => "all", SessionSearchScope::ParentsOnly => "parents_only", diff --git a/tests/mcp_handler_test.rs b/tests/mcp_handler_test.rs index d0174496..eb94b42f 100644 --- a/tests/mcp_handler_test.rs +++ b/tests/mcp_handler_test.rs @@ -6729,6 +6729,84 @@ async fn message_search_catches_up_provider_transcripts_before_querying() { assert!(providers.contains("cursor")); } +#[tokio::test] +async fn message_search_can_skip_catch_up_for_read_only_audits() { + let (cg, _dir) = setup_project().await; + let home = cg.project_root().join("home"); + let project = cg.project_root().to_path_buf(); + let project_text = project.to_string_lossy(); + + let codex_dir = home.join(".codex/sessions/2026/01/01"); + fs::create_dir_all(&codex_dir).unwrap(); + fs::write( + codex_dir.join("rollout-2026-01-01T00-00-00-codex-readonly.jsonl"), + format!( + "{}\n{}\n", + json!({ + "timestamp": "2026-01-01T00:00:00.000Z", + "type": "session_meta", + "payload": {"id": "codex-readonly", "cwd": project_text} + }), + json!({ + "timestamp": "2026-01-01T00:00:01.000Z", + "type": "event_msg", + "payload": { + "type": "user_message", + "message": "Read only transcript catchup should not import this." + } + }) + ), + ) + .unwrap(); + + let read_only_result = handle_tool_call( + &cg, + "tracedecay_message_search", + json!({ + "query": "read only transcript catchup", + "provider": "codex", + "catch_up": false, + "limit": 5 + }), + None, + None, + ) + .await + .unwrap(); + let read_only = extract_json(&read_only_result.value); + assert_eq!(read_only["status"], "ok"); + assert_eq!(read_only["catch_up"], false); + assert_eq!(read_only["count"], 0); + + let db = open_active_project_session_db(&cg).await; + let skipped = db + .search_session_messages("codex", None, "read only transcript catchup", 10) + .await; + assert!( + skipped.is_empty(), + "catch_up=false must not ingest provider transcripts" + ); + + let catch_up_result = handle_tool_call( + &cg, + "tracedecay_message_search", + json!({ + "query": "read only transcript catchup", + "provider": "codex", + "limit": 5 + }), + None, + None, + ) + .await + .unwrap(); + let catch_up = extract_json(&catch_up_result.value); + assert_eq!(catch_up["status"], "ok"); + assert_eq!(catch_up["catch_up"], true); + assert_eq!(catch_up["count"], 1); + assert_eq!(catch_up["results"][0]["message"]["provider"], "codex"); +} + #[tokio::test] async fn message_search_reads_profile_sharded_session_db() { let _guard = GLOBAL_DB_ENV_LOCK.lock().await; From 9bddc4b32bbdf145d16472a9cd016dee3f16510d Mon Sep 17 00:00:00 2001 From: ScriptedAlchemy Date: Mon, 29 Jun 2026 07:07:06 +0000 Subject: [PATCH 3/6] fix: search all providers for skill writer evidence --- src/automation/runner.rs | 11 ++-- tests/automation_skill_writer_runner_test.rs | 54 ++++++++++++++++++++ 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/src/automation/runner.rs b/src/automation/runner.rs index 2ab233cf..bf1392de 100644 --- a/src/automation/runner.rs +++ b/src/automation/runner.rs @@ -108,7 +108,7 @@ pub struct SkillWriterAutomationOptions { pub trigger: AutomationTrigger, #[serde(default, skip_serializing_if = "Option::is_none")] pub run_id: Option, - #[serde(default = "default_session_provider")] + #[serde(default = "default_skill_writer_provider")] pub provider: String, #[serde(default = "default_skill_writer_query")] pub query: String, @@ -123,7 +123,7 @@ impl Default for SkillWriterAutomationOptions { Self { trigger: AutomationTrigger::ManualCli, run_id: None, - provider: default_session_provider(), + provider: default_skill_writer_provider(), query: default_skill_writer_query(), evidence_limit: default_skill_writer_evidence_limit(), profile_root: None, @@ -500,7 +500,8 @@ async fn build_skill_writer_evidence( Some(path) => path, None => crate::storage::default_profile_root()?, }; - let provider = normalized_non_empty(&options.provider).unwrap_or_else(default_session_provider); + let provider = + normalized_non_empty(&options.provider).unwrap_or_else(default_skill_writer_provider); let query = normalized_non_empty(&options.query).unwrap_or_else(default_skill_writer_query); let evidence_limit = options.evidence_limit.clamp(1, 50); @@ -703,6 +704,10 @@ fn default_session_provider() -> String { "cursor".to_string() } +fn default_skill_writer_provider() -> String { + "all".to_string() +} + fn default_lcm_storage_scope() -> String { "project_local".to_string() } diff --git a/tests/automation_skill_writer_runner_test.rs b/tests/automation_skill_writer_runner_test.rs index bcc7c5d7..02b7ea01 100644 --- a/tests/automation_skill_writer_runner_test.rs +++ b/tests/automation_skill_writer_runner_test.rs @@ -35,6 +35,60 @@ async fn skill_writer_runner_skips_when_task_is_disabled() { ); } +#[tokio::test] +async fn skill_writer_default_provider_searches_all_providers() { + let temp = tempdir().unwrap(); + let profile_root = temp.path().join("profile"); + let cg = init_project(temp.path()).await; + let db = GlobalDb::open_at(&cg.store_layout().sessions_db_path) + .await + .expect("session db open"); + seed_session_message_in_db( + &db, + cg.project_root(), + SeedSessionMessage { + provider: "codex", + session_id: "skill-writer-codex-default", + message_id: "skill-writer-codex-default-message-001", + role: "assistant", + timestamp: 1_715_000_001, + text: "Codex workflow correction repeated skill tool pattern evidence should be found by the default skill writer provider.", + source: None, + }, + ) + .await; + let backend = SkillJsonBackend::new(json!({"skills": []})); + let config = AutomationConfig { + enabled: true, + backend: AutomationBackend::CodexAppServer, + host_mode: AutomationHostMode::Standalone, + tasks: AutomationTaskSet { + skill_writer: AutomationTaskConfig { + enabled: true, + schedule: Some("manual".to_string()), + ..AutomationTaskConfig::default() + }, + ..AutomationTaskSet::default() + }, + ..AutomationConfig::default() + }; + + let run = run_skill_writer_with_backend( + &cg, + &config, + &backend, + SkillWriterAutomationOptions { + profile_root: Some(profile_root), + ..SkillWriterAutomationOptions::default() + }, + ) + .await + .unwrap(); + + assert_eq!(backend.calls(), 1); + assert_eq!(run.ledger_record.status, AutomationRunStatus::Succeeded); +} + #[tokio::test] async fn skill_writer_runner_creates_pending_skill_drafts_for_approval() { let temp = tempdir().unwrap(); From f2aa1175bf8ed228d4e9a5fae45d0a0f10c1fbba Mon Sep 17 00:00:00 2001 From: ScriptedAlchemy Date: Mon, 29 Jun 2026 07:08:42 +0000 Subject: [PATCH 4/6] fix: handle hash queries in lcm grep --- src/sessions/lcm/query.rs | 4 ++-- tests/session_lcm_query_test.rs | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/sessions/lcm/query.rs b/src/sessions/lcm/query.rs index 24c60bab..ed4b96cd 100644 --- a/src/sessions/lcm/query.rs +++ b/src/sessions/lcm/query.rs @@ -32,7 +32,7 @@ const PLACEHOLDER_PREFIXES: [&str; 5] = [ ]; const PLACEHOLDER_TEXT_COLUMNS: [&str; 4] = ["content", "snippet_text", "index_text", "metadata_json"]; -const TERM_SEPARATORS: [char; 3] = ['-', ':', '/']; +const TERM_SEPARATORS: [char; 4] = ['-', ':', '/', '#']; const RAW_GREP_RECENCY_EXPR: &str = "COALESCE(r.timestamp, r.store_id)"; const SUMMARY_GREP_RECENCY_EXPR: &str = "COALESCE(n.source_time_end, n.source_time_start, n.created_at)"; @@ -2707,7 +2707,7 @@ fn sanitize_fts5_query(query: &str) -> String { fn is_fts5_special_char(ch: char) -> bool { matches!( ch, - '"' | '(' | ')' | '*' | '^' | '-' | ':' | '{' | '}' | '.' + '"' | '(' | ')' | '*' | '^' | '-' | ':' | '{' | '}' | '.' | '#' ) } diff --git a/tests/session_lcm_query_test.rs b/tests/session_lcm_query_test.rs index f87f8992..22e1e63e 100644 --- a/tests/session_lcm_query_test.rs +++ b/tests/session_lcm_query_test.rs @@ -416,6 +416,38 @@ async fn grep_like_fallback_recalls_infix_slash_query_matches() { .any(|hit| hit.snippet.to_ascii_lowercase().contains("srcfoo"))); } +#[tokio::test] +async fn grep_like_fallback_handles_hash_separator_queries() { + let tmp = TempDir::new().unwrap(); + let db = open_lcm_db(&tmp).await; + let store_ids = insert_raw_messages( + &db, + "cursor", + "session-1", + &["the log references issue#123 inside a Cursor transcript".to_string()], + ) + .await; + + let hits = db + .lcm_grep(LcmGrepRequest { + provider: "cursor".into(), + query: "issue#123".into(), + scope: LcmScope::Session, + session_id: Some("session-1".into()), + include_summaries: false, + limit: 10, + sort: LcmGrepSort::Recency, + source: None, + role: None, + start_time: None, + end_time: None, + }) + .await + .expect("hash separator grep should not produce an FTS syntax error"); + + assert!(hits.iter().any(|hit| hit.store_id == Some(store_ids[0]))); +} + #[tokio::test] async fn grep_quotes_reserved_operator_looking_query_text() { let tmp = TempDir::new().unwrap(); From 494faea0965bb70e85bc17946b31206a6ee1d77a Mon Sep 17 00:00:00 2001 From: ScriptedAlchemy Date: Mon, 29 Jun 2026 07:24:50 +0000 Subject: [PATCH 5/6] feat: include project memory in context --- src/mcp/tools/definitions.rs | 3 +- src/mcp/tools/handlers/graph.rs | 42 ++++++++- src/memory/retrieval.rs | 39 +++++++-- src/tracedecay.rs | 24 ++++++ tests/mcp_handler_test.rs | 147 ++++++++++++++++++++++++++++++++ 5 files changed, 246 insertions(+), 9 deletions(-) diff --git a/src/mcp/tools/definitions.rs b/src/mcp/tools/definitions.rs index dc61bbc7..67ffed46 100644 --- a/src/mcp/tools/definitions.rs +++ b/src/mcp/tools/definitions.rs @@ -199,7 +199,8 @@ pub fn explore_call_budget(total_nodes: u64) -> u8 { pub fn context_description(node_count: u64, budget: u8) -> String { format!( "Build an AI-ready context for a task description. Returns relevant symbols, \ - relationships, and optionally code snippets.\n\n\ + relationships, up to three untracked project memory matches when available, \ + and optionally code snippets.\n\n\ CALL BUDGET: {budget} calls maximum for this project ({node_count} nodes). \ Stop after {budget} calls. If the question is not fully answered, synthesise \ from what you have — do not exceed the budget." diff --git a/src/mcp/tools/handlers/graph.rs b/src/mcp/tools/handlers/graph.rs index 189c2d72..b9ed7388 100644 --- a/src/mcp/tools/handlers/graph.rs +++ b/src/mcp/tools/handlers/graph.rs @@ -9,10 +9,13 @@ use serde_json::{json, Value}; use crate::context::format_context_as_markdown; use crate::errors::{Result, TraceDecayError}; +use crate::memory::types::{FactSearchResult, SearchFactsRequest}; use crate::path_tree::format_compact_path_list; use crate::tracedecay::TraceDecay; use crate::types::{BuildContextOptions, EdgeKind, Node, NodeKind, TaskContext, Visibility}; +const CONTEXT_MEMORY_MATCH_LIMIT: usize = 3; + use super::super::render::{self, Md}; use super::super::ToolResult; use super::support::{ @@ -164,6 +167,10 @@ pub(super) async fn handle_context( let options = build_context_options(&args, scope_prefix); let context = cg.build_context(task, &options).await?; + let (memory_matches, memory_matches_error) = match context_memory_matches(cg, task).await { + Ok(matches) => (matches, None), + Err(err) => (Vec::new(), Some(err.to_string())), + }; let touched_files = unique_file_paths( context .subgraph @@ -178,6 +185,19 @@ pub(super) async fn handle_context( ), ); let mut output = format_context_as_markdown(&context); + if !memory_matches.is_empty() { + let _ = writeln!(output, "\n### Memory Matches"); + for hit in &memory_matches { + let fact = &hit.fact; + let _ = writeln!( + output, + "- fact_id={} category={} trust={:.2} score={:.3}: {}", + fact.fact_id, fact.category, fact.trust_score, hit.score, fact.content + ); + } + } else if let Some(err) = &memory_matches_error { + let _ = writeln!(output, "\n### Memory Matches\nUnavailable: {err}"); + } if let Some(hint) = cg.index_coverage_hint(context.subgraph.nodes.len()) { let _ = writeln!( output, @@ -201,7 +221,16 @@ pub(super) async fn handle_context( ); } - let value = serde_json::to_value(&context).unwrap_or_else(|_| json!({})); + let mut value = serde_json::to_value(&context).unwrap_or_else(|_| json!({})); + if let Some(object) = value.as_object_mut() { + object.insert( + "memory_matches".to_string(), + serde_json::to_value(&memory_matches).unwrap_or_else(|_| json!([])), + ); + if let Some(err) = memory_matches_error { + object.insert("memory_matches_error".to_string(), json!(err)); + } + } Ok(rendered_tool_result( cg, &args, @@ -211,6 +240,17 @@ pub(super) async fn handle_context( )) } +async fn context_memory_matches(cg: &TraceDecay, task: &str) -> Result> { + cg.search_facts_untracked(SearchFactsRequest { + query: task.to_string(), + category: None, + limit: Some(CONTEXT_MEMORY_MATCH_LIMIT), + min_trust: Some(0.5), + include_why: false, + }) + .await +} + fn build_context_options(args: &Value, scope_prefix: Option<&str>) -> BuildContextOptions { let max_nodes = args .get("max_nodes") diff --git a/src/memory/retrieval.rs b/src/memory/retrieval.rs index 20f22485..1b9f0faa 100644 --- a/src/memory/retrieval.rs +++ b/src/memory/retrieval.rs @@ -39,6 +39,29 @@ impl<'a> FactRetriever<'a> { category: Option, min_trust: Option, limit: usize, + ) -> Result> { + self.search_with_tracking(query, category, min_trust, limit, true) + .await + } + + pub async fn search_untracked( + &self, + query: &str, + category: Option, + min_trust: Option, + limit: usize, + ) -> Result> { + self.search_with_tracking(query, category, min_trust, limit, false) + .await + } + + async fn search_with_tracking( + &self, + query: &str, + category: Option, + min_trust: Option, + limit: usize, + track_recalls: bool, ) -> Result> { let min_trust = min_trust.unwrap_or(DEFAULT_MIN_TRUST); let limit = normalized_limit(limit); @@ -135,13 +158,15 @@ impl<'a> FactRetriever<'a> { }); results.truncate(limit); - // Access tracking for the facts actually RETURNED to the caller — - // candidates scanned and dropped above never count, and the other - // retrieval modes (probe/list/related/reason) deliberately do not - // bump access_count. Batched single UPDATE, fire-and-forget: a - // tracking failure must never fail the search itself. - let returned_ids: Vec = results.iter().map(|result| result.fact.fact_id).collect(); - let _ = self.store.record_fact_recalls(&returned_ids).await; + if track_recalls { + // Access tracking for the facts actually RETURNED to the caller — + // candidates scanned and dropped above never count, and the other + // retrieval modes (probe/list/related/reason) deliberately do not + // bump access_count. Batched single UPDATE, fire-and-forget: a + // tracking failure must never fail the search itself. + let returned_ids: Vec = results.iter().map(|result| result.fact.fact_id).collect(); + let _ = self.store.record_fact_recalls(&returned_ids).await; + } Ok(results) } diff --git a/src/tracedecay.rs b/src/tracedecay.rs index 015c40f3..b2905afc 100644 --- a/src/tracedecay.rs +++ b/src/tracedecay.rs @@ -4368,6 +4368,30 @@ impl TraceDecay { Ok(results) } + /// Search facts without updating recall/access counters. This is for + /// background enrichment surfaces such as `tracedecay_context`, where a + /// memory match is supporting context rather than an explicit recall. + pub async fn search_facts_untracked( + &self, + request: SearchFactsRequest, + ) -> Result> { + let db = self.open_project_store_db().await?; + let mut results = FactRetriever::new(db.conn()) + .search_untracked( + &request.query, + request.category, + request.min_trust, + request.limit.unwrap_or(DEFAULT_FACT_LIMIT), + ) + .await?; + if !request.include_why { + for result in &mut results { + result.why = None; + } + } + Ok(results) + } + pub async fn probe_entity( &self, entity: &str, diff --git a/tests/mcp_handler_test.rs b/tests/mcp_handler_test.rs index eb94b42f..d5458e35 100644 --- a/tests/mcp_handler_test.rs +++ b/tests/mcp_handler_test.rs @@ -2035,6 +2035,153 @@ async fn test_context() { assert!(!text.is_empty()); } +#[tokio::test] +async fn context_includes_matching_memory_facts() { + let (cg, _dir) = setup_project().await; + let added = handle_tool_call( + &cg, + "tracedecay_fact_store", + json!({ + "action": "add", + "content": "Helper function reviews should check durable memory before broad file search.", + "category": "decision", + "entity": "helper function", + "tags": ["context", "memory"], + "trust": 0.91, + "source": "mcp-context-test" + }), + None, + None, + ) + .await + .unwrap(); + let added: Value = serde_json::from_str(extract_text(&added.value)).unwrap(); + let fact_id = added["fact"]["fact_id"].as_i64().unwrap(); + let before_context = cg.get_fact(fact_id).await.unwrap().unwrap(); + + let markdown_result = handle_tool_call( + &cg, + "tracedecay_context", + json!({"task": "helper function durable memory review"}), + None, + None, + ) + .await + .unwrap(); + let markdown = extract_text(&markdown_result.value); + assert!(markdown.contains("### Memory Matches")); + assert!(markdown.contains(&format!("fact_id={fact_id}"))); + assert!(markdown.contains("Helper function reviews should check durable memory")); + + let json_result = handle_tool_call( + &cg, + "tracedecay_context", + json!({"task": "helper function durable memory review", "format": "json"}), + None, + None, + ) + .await + .unwrap(); + let payload: Value = serde_json::from_str(extract_text(&json_result.value)).unwrap(); + assert!(payload["memory_matches"] + .as_array() + .is_some_and(|matches| matches + .iter() + .any(|hit| hit["fact"]["fact_id"].as_i64() == Some(fact_id)))); + + let after_context = cg.get_fact(fact_id).await.unwrap().unwrap(); + assert_eq!( + after_context.retrieval_count, before_context.retrieval_count, + "context memory enrichment should not count as an explicit memory retrieval" + ); + assert_eq!( + after_context.access_count, before_context.access_count, + "context memory enrichment should not count as an explicit memory recall" + ); +} + +#[tokio::test] +async fn context_memory_matches_use_project_store_when_serving_branch_db() { + fn git(project: &Path, args: &[&str]) { + let output = Command::new("git") + .args(args) + .current_dir(project) + .output() + .unwrap_or_else(|err| panic!("git {args:?} failed to spawn: {err}")); + assert!( + output.status.success(), + "git {args:?} failed\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + } + + let _guard = GLOBAL_DB_ENV_LOCK.lock().await; + let dir = test_temp_dir(); + let project = dir.path().join("repo"); + let home = dir.path().join("home"); + let _home_guard = HomeEnvGuard::set(&home); + let _global_db_guard = GlobalDbEnvGuard::set(&home.join(".tracedecay/global.db")); + + fs::create_dir_all(project.join("src")).unwrap(); + fs::write(project.join("src/lib.rs"), "pub fn f() -> u32 { 1 }\n").unwrap(); + git(&project, &["init"]); + git(&project, &["config", "user.email", "test@test.com"]); + git(&project, &["config", "user.name", "Test"]); + git(&project, &["add", "."]); + git(&project, &["commit", "-m", "initial"]); + git(&project, &["branch", "-M", "main"]); + + let cg = TestTraceDecay::new(TraceDecay::init(&project).await.unwrap()); + index_all_retrying_sync_lock(&cg).await; + git(&project, &["checkout", "-b", "feature"]); + let cg = TestTraceDecay::new(TraceDecay::open(&project).await.unwrap()); + assert_ne!( + cg.db_path(), + cg.store_layout().graph_db_path, + "test must serve a branch DB distinct from the shared project store" + ); + + let added = handle_tool_call( + &cg, + "tracedecay_fact_store", + json!({ + "action": "add", + "content": "Branch context recall must read project-scoped memory facts", + "category": "project", + "entity": "Branch context recall", + "trust": 0.91 + }), + None, + None, + ) + .await + .unwrap(); + let added: Value = serde_json::from_str(extract_text(&added.value)).unwrap(); + let fact_id = added["fact"]["fact_id"] + .as_i64() + .expect("fact_store add should return numeric id"); + + let result = handle_tool_call( + &cg, + "tracedecay_context", + json!({"task": "branch context recall project-scoped memory", "format": "json"}), + None, + None, + ) + .await + .unwrap(); + let payload: Value = serde_json::from_str(extract_text(&result.value)).unwrap(); + assert!( + payload["memory_matches"] + .as_array() + .is_some_and(|matches| matches + .iter() + .any(|hit| hit["fact"]["fact_id"].as_i64() == Some(fact_id))), + "context memory matches must come from the shared project memory store" + ); +} + // --------------------------------------------------------------------------- // 3. tracedecay_callers // --------------------------------------------------------------------------- From 7c5a0ade6a2b828d324484b367593a060387daae Mon Sep 17 00:00:00 2001 From: ScriptedAlchemy Date: Mon, 29 Jun 2026 07:29:36 +0000 Subject: [PATCH 6/6] feat: add context memory controls --- src/mcp/tools/definitions.rs | 12 +++++ src/mcp/tools/handlers/graph.rs | 74 ++++++++++++++++++++++--- tests/mcp_handler_test.rs | 95 +++++++++++++++++++++++++++++++++ 3 files changed, 174 insertions(+), 7 deletions(-) diff --git a/src/mcp/tools/definitions.rs b/src/mcp/tools/definitions.rs index 67ffed46..0fd47762 100644 --- a/src/mcp/tools/definitions.rs +++ b/src/mcp/tools/definitions.rs @@ -716,6 +716,18 @@ fn def_context() -> ToolDefinition { "max_per_file": { "type": "number", "description": "Maximum symbols from a single file in results. Prevents one large file from dominating (default: max_nodes/3, minimum 3)" + }, + "include_memory": { + "type": "boolean", + "description": "When true, include up to memory_limit matching project memory facts as a separate context lane (default: true)" + }, + "memory_limit": { + "type": "number", + "description": "Maximum memory facts to include when include_memory is true (default: 3, max: 10)" + }, + "memory_min_trust": { + "type": "number", + "description": "Minimum trust score for memory facts included in context (default: 0.5)" } })), "required": ["task"] diff --git a/src/mcp/tools/handlers/graph.rs b/src/mcp/tools/handlers/graph.rs index b9ed7388..c1877918 100644 --- a/src/mcp/tools/handlers/graph.rs +++ b/src/mcp/tools/handlers/graph.rs @@ -15,6 +15,8 @@ use crate::tracedecay::TraceDecay; use crate::types::{BuildContextOptions, EdgeKind, Node, NodeKind, TaskContext, Visibility}; const CONTEXT_MEMORY_MATCH_LIMIT: usize = 3; +const CONTEXT_MEMORY_MATCH_LIMIT_MAX: usize = 10; +const CONTEXT_MEMORY_SNIPPET_CHARS: usize = 240; use super::super::render::{self, Md}; use super::super::ToolResult; @@ -167,9 +169,14 @@ pub(super) async fn handle_context( let options = build_context_options(&args, scope_prefix); let context = cg.build_context(task, &options).await?; - let (memory_matches, memory_matches_error) = match context_memory_matches(cg, task).await { - Ok(matches) => (matches, None), - Err(err) => (Vec::new(), Some(err.to_string())), + let memory_options = context_memory_options(&args); + let (memory_matches, memory_matches_error) = if memory_options.include_memory { + match context_memory_matches(cg, task, &memory_options).await { + Ok(matches) => (matches, None), + Err(err) => (Vec::new(), Some(err.to_string())), + } + } else { + (Vec::new(), None) }; let touched_files = unique_file_paths( context @@ -192,7 +199,11 @@ pub(super) async fn handle_context( let _ = writeln!( output, "- fact_id={} category={} trust={:.2} score={:.3}: {}", - fact.fact_id, fact.category, fact.trust_score, hit.score, fact.content + fact.fact_id, + fact.category, + fact.trust_score, + hit.score, + compact_memory_content(&fact.content) ); } } else if let Some(err) = &memory_matches_error { @@ -240,17 +251,66 @@ pub(super) async fn handle_context( )) } -async fn context_memory_matches(cg: &TraceDecay, task: &str) -> Result> { +struct ContextMemoryOptions { + include_memory: bool, + limit: usize, + min_trust: f64, +} + +fn context_memory_options(args: &Value) -> ContextMemoryOptions { + let include_memory = args + .get("include_memory") + .and_then(Value::as_bool) + .unwrap_or(true); + let limit = args + .get("memory_limit") + .and_then(Value::as_u64) + .map(|value| value as usize) + .unwrap_or(CONTEXT_MEMORY_MATCH_LIMIT) + .clamp(1, CONTEXT_MEMORY_MATCH_LIMIT_MAX); + let min_trust = args + .get("memory_min_trust") + .and_then(Value::as_f64) + .unwrap_or(0.5) + .clamp(0.0, 1.0); + ContextMemoryOptions { + include_memory, + limit, + min_trust, + } +} + +async fn context_memory_matches( + cg: &TraceDecay, + task: &str, + options: &ContextMemoryOptions, +) -> Result> { cg.search_facts_untracked(SearchFactsRequest { query: task.to_string(), category: None, - limit: Some(CONTEXT_MEMORY_MATCH_LIMIT), - min_trust: Some(0.5), + limit: Some(options.limit), + min_trust: Some(options.min_trust), include_why: false, }) .await } +fn compact_memory_content(content: &str) -> String { + let mut snippet = String::new(); + let mut truncated = false; + for (idx, ch) in content.chars().enumerate() { + if idx >= CONTEXT_MEMORY_SNIPPET_CHARS { + truncated = true; + break; + } + snippet.push(ch); + } + if truncated { + snippet.push_str("..."); + } + snippet +} + fn build_context_options(args: &Value, scope_prefix: Option<&str>) -> BuildContextOptions { let max_nodes = args .get("max_nodes") diff --git a/tests/mcp_handler_test.rs b/tests/mcp_handler_test.rs index d5458e35..f5253bf1 100644 --- a/tests/mcp_handler_test.rs +++ b/tests/mcp_handler_test.rs @@ -2100,6 +2100,101 @@ async fn context_includes_matching_memory_facts() { ); } +#[tokio::test] +async fn context_memory_controls_filter_disable_and_compact_markdown() { + let (cg, _dir) = setup_project().await; + let long_content = format!("Long memory control fact {}", "x".repeat(320)); + handle_tool_call( + &cg, + "tracedecay_fact_store", + json!({ + "action": "add", + "content": long_content, + "category": "decision", + "entity": "long memory control", + "tags": ["context-memory-controls"], + "trust": 0.92, + "source": "mcp-context-test" + }), + None, + None, + ) + .await + .unwrap(); + handle_tool_call( + &cg, + "tracedecay_fact_store", + json!({ + "action": "add", + "content": "Low trust memory control fact should stay filtered.", + "category": "decision", + "entity": "low trust memory control", + "tags": ["context-memory-controls"], + "trust": 0.2, + "source": "mcp-context-test" + }), + None, + None, + ) + .await + .unwrap(); + + let disabled = handle_tool_call( + &cg, + "tracedecay_context", + json!({ + "task": "long memory control fact", + "format": "json", + "include_memory": false + }), + None, + None, + ) + .await + .unwrap(); + let disabled_payload: Value = serde_json::from_str(extract_text(&disabled.value)).unwrap(); + assert_eq!( + disabled_payload["memory_matches"].as_array().map(Vec::len), + Some(0) + ); + + let filtered = handle_tool_call( + &cg, + "tracedecay_context", + json!({ + "task": "low trust memory control fact", + "format": "json", + "memory_min_trust": 0.9 + }), + None, + None, + ) + .await + .unwrap(); + let filtered_payload: Value = serde_json::from_str(extract_text(&filtered.value)).unwrap(); + assert!(!filtered_payload["memory_matches"] + .as_array() + .unwrap() + .iter() + .any(|hit| hit["fact"]["content"] + .as_str() + .is_some_and(|content| content.contains("Low trust memory control")))); + + let markdown = handle_tool_call( + &cg, + "tracedecay_context", + json!({"task": "long memory control fact", "memory_limit": 1}), + None, + None, + ) + .await + .unwrap(); + let text = extract_text(&markdown.value); + assert!(text.contains("Long memory control fact")); + assert!(text.contains("...")); + assert!(!text.contains(&"x".repeat(300))); +} + #[tokio::test] async fn context_memory_matches_use_project_store_when_serving_branch_db() { fn git(project: &Path, args: &[&str]) {