From 8cfd4d786155b4c59db131764bd8429c35a38899 Mon Sep 17 00:00:00 2001 From: Mike Date: Wed, 3 Jun 2026 12:37:13 -0600 Subject: [PATCH] Fix Gemini CLI JSONL session parsing --- src/analyzers/gemini_cli.rs | 53 ++++++++++++++++----- src/analyzers/tests/gemini_cli.rs | 78 +++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 11 deletions(-) diff --git a/src/analyzers/gemini_cli.rs b/src/analyzers/gemini_cli.rs index 239f307..7555db2 100644 --- a/src/analyzers/gemini_cli.rs +++ b/src/analyzers/gemini_cli.rs @@ -265,6 +265,12 @@ fn is_gemini_cli_chat_path(path: &Path) -> bool { .any(|ancestor| ancestor.file_name().is_some_and(|name| name == "chats")) } +fn is_internal_session_context(content: Option<&GeminiCliContent>) -> bool { + content + .map(GeminiCliContent::as_text) + .is_some_and(|text| text.trim_start().starts_with("")) +} + fn messages_from_session( file_path: &Path, messages: Vec, @@ -282,6 +288,10 @@ fn messages_from_session( timestamp, content, } => { + if is_internal_session_context(content.as_ref()) { + continue; + } + if fallback_session_name.is_none() { let text_str = content .as_ref() @@ -368,6 +378,26 @@ fn parse_json_session_file(file_path: &Path) -> Result> Ok(messages_from_session(file_path, session.messages)) } +fn upsert_jsonl_message( + message_order: &mut Vec, + latest_messages: &mut HashMap, + message: GeminiCliMessage, +) { + let id = match &message { + GeminiCliMessage::User { id, .. } + | GeminiCliMessage::Gemini { id, .. } + | GeminiCliMessage::System { id, .. } + | GeminiCliMessage::Error { id, .. } + | GeminiCliMessage::Info { id, .. } + | GeminiCliMessage::Warning { id, .. } => id.clone(), + }; + + if !latest_messages.contains_key(&id) { + message_order.push(id.clone()); + } + latest_messages.insert(id, message); +} + fn parse_jsonl_session_file(file_path: &Path) -> Result> { let content = std::fs::read_to_string(file_path)?; let mut message_order = Vec::new(); @@ -377,7 +407,16 @@ fn parse_jsonl_session_file(file_path: &Path) -> Result let mut line_bytes = line.as_bytes().to_vec(); let value: simd_json::OwnedValue = simd_json::from_slice(&mut line_bytes)?; - if value.get("$set").is_some() { + if let Some(messages) = value + .get("$set") + .and_then(|set| set.get("messages")) + .and_then(|messages| messages.as_array()) + { + for message_value in messages { + let mut message_bytes = simd_json::to_vec(message_value)?; + let message: GeminiCliMessage = simd_json::from_slice(&mut message_bytes)?; + upsert_jsonl_message(&mut message_order, &mut latest_messages, message); + } continue; } @@ -385,18 +424,9 @@ fn parse_jsonl_session_file(file_path: &Path) -> Result continue; } - let id = match value.get("id").and_then(|v| v.as_str()) { - Some(id) => id.to_string(), - None => continue, - }; - let mut message_bytes = line.as_bytes().to_vec(); let message: GeminiCliMessage = simd_json::from_slice(&mut message_bytes)?; - - if !latest_messages.contains_key(&id) { - message_order.push(id.clone()); - } - latest_messages.insert(id, message); + upsert_jsonl_message(&mut message_order, &mut latest_messages, message); } let messages = message_order @@ -419,6 +449,7 @@ impl Analyzer for GeminiCliAnalyzer { if let Some(home_dir) = dirs::home_dir() { let home_str = home_dir.to_string_lossy(); patterns.push(format!("{home_str}/.gemini/tmp/*/chats/*.json")); + patterns.push(format!("{home_str}/.gemini/tmp/*/chats/*.jsonl")); } patterns diff --git a/src/analyzers/tests/gemini_cli.rs b/src/analyzers/tests/gemini_cli.rs index f749f38..c909a16 100644 --- a/src/analyzers/tests/gemini_cli.rs +++ b/src/analyzers/tests/gemini_cli.rs @@ -398,6 +398,84 @@ async fn test_gemini_cli_warning_messages_are_ignored() { assert_eq!(messages[1].role, crate::types::MessageRole::Assistant); } +#[test] +fn test_gemini_cli_glob_patterns_include_jsonl() { + let analyzer = GeminiCliAnalyzer::new(); + let patterns = analyzer.get_data_glob_patterns().join("\n"); + + assert!(patterns.contains("*.json")); + assert!(patterns.contains("*.jsonl")); +} + +#[tokio::test] +async fn test_gemini_cli_jsonl_set_messages_snapshot() { + let dir = tempdir().unwrap(); + let session_dir = dir + .path() + .join("tmp") + .join("project-jsonl-set") + .join("chats"); + std::fs::create_dir_all(&session_dir).unwrap(); + let session_path = session_dir.join("session.jsonl"); + let jsonl_content = r#"{"sessionId":"sess-jsonl-set","projectHash":"proj-hash","startTime":"2026-06-03T18:12:29.005Z","lastUpdated":"2026-06-03T18:12:29.005Z","kind":"main"} +{"$set":{"messages":[{"id":"u-1","timestamp":"2026-06-03T18:12:35.384Z","type":"user","content":[{"text":"hello from the snapshot"}]},{"id":"g-1","timestamp":"2026-06-03T18:12:38.058Z","type":"gemini","content":"Hi there","thoughts":[],"tokens":{"input":17256,"output":26,"cached":0,"thoughts":133,"tool":0,"total":17415},"model":"gemini-3-flash-preview"}],"lastUpdated":"2026-06-03T18:12:38.058Z"}} +{"$set":{"lastUpdated":"2026-06-03T18:12:38.058Z"}} +"#; + let mut file = File::create(&session_path).unwrap(); + file.write_all(jsonl_content.as_bytes()).unwrap(); + + let analyzer = GeminiCliAnalyzer::new(); + let source = crate::analyzer::DataSource { path: session_path }; + let messages = analyzer + .parse_source(&source) + .expect("jsonl $set.messages snapshots should parse successfully"); + + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].role, crate::types::MessageRole::User); + assert_eq!( + messages[0].session_name.as_deref(), + Some("hello from the snapshot") + ); + + let assistant = messages + .iter() + .find(|m| m.role == crate::types::MessageRole::Assistant) + .unwrap(); + assert_eq!(assistant.stats.input_tokens, 17256); + assert_eq!(assistant.stats.output_tokens, 26); + assert_eq!(assistant.stats.reasoning_tokens, 133); +} + +#[tokio::test] +async fn test_gemini_cli_jsonl_internal_session_context_is_ignored() { + let dir = tempdir().unwrap(); + let session_dir = dir + .path() + .join("tmp") + .join("project-jsonl-context") + .join("chats"); + std::fs::create_dir_all(&session_dir).unwrap(); + let session_path = session_dir.join("session.jsonl"); + let jsonl_content = r#"{"sessionId":"sess-jsonl-context","projectHash":"proj-hash","startTime":"2026-06-03T18:12:29.005Z","lastUpdated":"2026-06-03T18:12:29.005Z","kind":"main"} +{"$set":{"messages":[{"id":"context","timestamp":"2026-06-03T18:12:29.005Z","type":"user","content":[{"text":"\nThis is the Gemini CLI.\n"}]},{"id":"u-1","timestamp":"2026-06-03T18:12:35.384Z","type":"user","content":[{"text":"actual user prompt"}]},{"id":"g-1","timestamp":"2026-06-03T18:12:38.058Z","type":"gemini","content":"Hi there","thoughts":[],"tokens":{"input":10,"output":3,"cached":0,"thoughts":1,"tool":0,"total":14},"model":"gemini-3-flash-preview"}],"lastUpdated":"2026-06-03T18:12:38.058Z"}} +"#; + let mut file = File::create(&session_path).unwrap(); + file.write_all(jsonl_content.as_bytes()).unwrap(); + + let analyzer = GeminiCliAnalyzer::new(); + let source = crate::analyzer::DataSource { path: session_path }; + let messages = analyzer + .parse_source(&source) + .expect("jsonl sessions should parse successfully"); + + assert_eq!(messages.len(), 2); + assert_eq!(messages[0].role, crate::types::MessageRole::User); + assert_eq!( + messages[0].session_name.as_deref(), + Some("actual user prompt") + ); +} + #[tokio::test] async fn test_gemini_cli_jsonl_latest_message_version_wins() { let dir = tempdir().unwrap();