Skip to content

Commit a6fe223

Browse files
committed
feat(fs_write): fuzzy str_replace with 3-strategy fallback chain
The current str_replace implementation uses exact byte matching only. When the model's old_str has minor differences from the file (indentation drift, whitespace, or small context edits), the match fails and the model either retries wastefully or falls back to destructive shell commands. Implement str_replace_fuzzy() with a 3-strategy fallback chain inspired by opencode and cline's diff-apply approaches: 1. Exact match — unchanged behaviour for the common case 2. Line-trimmed match — compares lines after trim(), then replaces using byte offsets into the original content. Handles indentation drift. 3. Block-anchor match — uses first+last line as anchors, scores middle lines with Levenshtein similarity, picks the best candidate above a 0.6 threshold. Handles minor edits in surrounding context lines. Key correctness fixes over naive implementation: - Strategies 2 and 3 return byte ranges (start, end) instead of the matched substring, so replacement is always at the correct position even if the matched text appears elsewhere in the file. - block_anchor_match skips candidates where first == last to avoid false positives on symmetric blocks (e.g. closing braces). - similarity_score respects the actual content window bounds (ci < end) to avoid scoring lines outside the candidate window. - levenshtein uses O(n) rolling-row space instead of O(m*n) matrix. - similarity_score uses char count (not byte len) for the denominator to handle multi-byte UTF-8 correctly. 10 tests cover: exact match, ambiguous rejection, indentation drift, minor middle-line edits, correct-position replacement when matched text appears elsewhere, symmetric anchor rejection, Levenshtein correctness, line-trimmed indentation preservation, and ambiguity rejection.
1 parent e14ea18 commit a6fe223

1 file changed

Lines changed: 278 additions & 9 deletions

File tree

crates/chat-cli/src/cli/chat/tools/fs_write.rs

Lines changed: 278 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,6 @@ impl FsWrite {
133133
},
134134
FsWrite::StrReplace { old_str, new_str, .. } => {
135135
let file = os.fs.read_to_string(&path).await?;
136-
let matches = file.match_indices(old_str).collect::<Vec<_>>();
137136
queue!(
138137
output,
139138
style::Print("Updating: "),
@@ -142,14 +141,8 @@ impl FsWrite {
142141
StyledText::reset(),
143142
style::Print("\n"),
144143
)?;
145-
match matches.len() {
146-
0 => return Err(eyre!("no occurrences of \"{old_str}\" were found")),
147-
1 => {
148-
let file = file.replacen(old_str, new_str, 1);
149-
os.fs.write(&path, file).await?;
150-
},
151-
x => return Err(eyre!("{x} occurrences of old_str were found when only 1 is expected")),
152-
}
144+
let updated = str_replace_fuzzy(&file, old_str, new_str)?;
145+
os.fs.write(&path, updated).await?;
153146
},
154147
FsWrite::Insert {
155148
insert_line, new_str, ..
@@ -858,6 +851,189 @@ fn syntect_to_crossterm_color(syntect: syntect::highlighting::Color) -> style::C
858851
}
859852
}
860853

854+
/// Attempts to replace `old_str` with `new_str` in `content` using a fallback chain:
855+
///
856+
/// 1. **Exact match** — fastest, most precise.
857+
/// 2. **Line-trimmed match** — matches lines after stripping leading/trailing whitespace,
858+
/// then replaces the original (indented) text. Handles indentation drift.
859+
/// 3. **Block-anchor match** — matches by first+last line as anchors, uses Levenshtein
860+
/// similarity on middle lines to find the best candidate. Handles minor edits in context.
861+
///
862+
/// Returns an error if no strategy finds exactly one unambiguous match.
863+
fn str_replace_fuzzy(content: &str, old_str: &str, new_str: &str) -> eyre::Result<String> {
864+
// Strategy 1: exact match
865+
let exact_count = content.match_indices(old_str).count();
866+
match exact_count {
867+
1 => return Ok(content.replacen(old_str, new_str, 1)),
868+
x if x > 1 => {
869+
return Err(eyre::eyre!(
870+
"{x} occurrences of old_str were found when only 1 is expected — \
871+
add more surrounding context to old_str to make it unique"
872+
))
873+
},
874+
_ => {},
875+
}
876+
877+
// Strategies 2 & 3: fuzzy — both return a byte range to splice at
878+
let range = line_trimmed_match(content, old_str)
879+
.or_else(|| block_anchor_match(content, old_str));
880+
881+
if let Some((start, end)) = range {
882+
return Ok(format!("{}{}{}", &content[..start], new_str, &content[end..]));
883+
}
884+
885+
Err(eyre::eyre!(
886+
"no occurrences of the provided old_str were found (tried exact, \
887+
line-trimmed, and block-anchor matching) — use fs_read to read the \
888+
current file content and retry str_replace with the exact text. \
889+
Do NOT fall back to shell commands like sed."
890+
))
891+
}
892+
893+
/// Strips leading and trailing empty lines from a split-by-newline vec.
894+
fn strip_empty_boundary_lines(mut lines: Vec<&str>) -> Vec<&str> {
895+
while lines.last().map(|l: &&str| l.trim().is_empty()).unwrap_or(false) {
896+
lines.pop();
897+
}
898+
while lines.first().map(|l: &&str| l.trim().is_empty()).unwrap_or(false) {
899+
lines.remove(0);
900+
}
901+
lines
902+
}
903+
904+
/// Builds a prefix-sum table of byte offsets for lines split by `\n`.
905+
/// `offsets[i]` = byte offset of the start of line `i` in the original string.
906+
/// `offsets[lines.len()]` = one past the last byte (i.e. content.len() + 1 conceptually).
907+
fn build_line_offsets(lines: &[&str]) -> Vec<usize> {
908+
let mut offsets = Vec::with_capacity(lines.len() + 1);
909+
offsets.push(0usize);
910+
for line in lines {
911+
offsets.push(offsets.last().unwrap() + line.len() + 1); // +1 for '\n'
912+
}
913+
offsets
914+
}
915+
916+
/// Matches `find` against `content` by comparing trimmed lines.
917+
/// Returns the byte range `(start, end)` in `content` if exactly one match is found.
918+
fn line_trimmed_match(content: &str, find: &str) -> Option<(usize, usize)> {
919+
let content_lines: Vec<&str> = content.split('\n').collect();
920+
let search_lines = strip_empty_boundary_lines(find.split('\n').collect());
921+
922+
if search_lines.is_empty() {
923+
return None;
924+
}
925+
926+
let offsets = build_line_offsets(&content_lines);
927+
928+
let mut matches: Vec<(usize, usize)> = Vec::new();
929+
'outer: for i in 0..=content_lines.len().saturating_sub(search_lines.len()) {
930+
for (j, search_line) in search_lines.iter().enumerate() {
931+
if content_lines[i + j].trim() != search_line.trim() {
932+
continue 'outer;
933+
}
934+
}
935+
let start = offsets[i];
936+
let end = offsets[i + search_lines.len()].saturating_sub(1).min(content.len());
937+
matches.push((start, end));
938+
}
939+
940+
if matches.len() == 1 { Some(matches[0]) } else { None }
941+
}
942+
943+
/// Levenshtein distance between two strings (char-level, O(min(m,n)) space).
944+
/// `a` is placed in the row dimension (longer), `b` in the column (shorter).
945+
fn levenshtein(a: &str, b: &str) -> usize {
946+
let a: Vec<char> = a.chars().collect();
947+
let b: Vec<char> = b.chars().collect();
948+
// Ensure `a` is the longer string so `b` (columns) is the smaller allocation
949+
let (a, b) = if a.len() >= b.len() { (a, b) } else { (b, a) };
950+
let (m, n) = (a.len(), b.len());
951+
let mut prev: Vec<usize> = (0..=n).collect();
952+
let mut curr = vec![0usize; n + 1];
953+
for i in 1..=m {
954+
curr[0] = i;
955+
for j in 1..=n {
956+
curr[j] = if a[i - 1] == b[j - 1] {
957+
prev[j - 1]
958+
} else {
959+
1 + prev[j].min(curr[j - 1]).min(prev[j - 1])
960+
};
961+
}
962+
std::mem::swap(&mut prev, &mut curr);
963+
}
964+
prev[n]
965+
}
966+
967+
const SIMILARITY_THRESHOLD: f64 = 0.6;
968+
969+
/// Matches `find` against `content` using first+last line as anchors and Levenshtein
970+
/// similarity on middle lines. Returns the byte range `(start, end)` in `content` if
971+
/// similarity exceeds the threshold and the match is unambiguous.
972+
fn block_anchor_match(content: &str, find: &str) -> Option<(usize, usize)> {
973+
let content_lines: Vec<&str> = content.split('\n').collect();
974+
let search_lines = strip_empty_boundary_lines(find.split('\n').collect());
975+
976+
// Need at least 2 distinct lines for anchor matching
977+
if search_lines.len() < 2 {
978+
return None;
979+
}
980+
981+
let first = search_lines[0].trim();
982+
let last = search_lines[search_lines.len() - 1].trim();
983+
984+
// Symmetric anchors (e.g. `}` / `}`) produce too many false positives
985+
if first == last {
986+
return None;
987+
}
988+
989+
// Build offsets once — reused for both scoring and final byte range
990+
let offsets = build_line_offsets(&content_lines);
991+
992+
// Collect candidate windows where first and last anchor lines match
993+
let mut candidates: Vec<(usize, usize, f64)> = Vec::new();
994+
for i in 0..content_lines.len() {
995+
if content_lines[i].trim() != first { continue; }
996+
for j in (i + 1)..content_lines.len() {
997+
if content_lines[j].trim() == last {
998+
let score = similarity_score(&content_lines, i, j, &search_lines);
999+
candidates.push((i, j, score));
1000+
break;
1001+
}
1002+
}
1003+
}
1004+
1005+
// Pick the single best candidate above the threshold
1006+
let best = candidates
1007+
.into_iter()
1008+
.filter(|&(_, _, s)| s >= SIMILARITY_THRESHOLD)
1009+
.max_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal))?;
1010+
1011+
let start = offsets[best.0];
1012+
let end = offsets[best.1 + 1].saturating_sub(1).min(content.len());
1013+
Some((start, end))
1014+
}
1015+
1016+
/// Average Levenshtein similarity of middle lines between `search_lines` and the
1017+
/// corresponding window `content_lines[start..=end]`.
1018+
fn similarity_score(content_lines: &[&str], start: usize, end: usize, search_lines: &[&str]) -> f64 {
1019+
let middle_count = search_lines.len().saturating_sub(2);
1020+
if middle_count == 0 { return 1.0; }
1021+
1022+
let mut total = 0.0;
1023+
let mut counted = 0;
1024+
for k in 1..search_lines.len().saturating_sub(1) {
1025+
let ci = start + k;
1026+
if ci >= end { break; }
1027+
let a = content_lines[ci].trim();
1028+
let b = search_lines[k].trim();
1029+
let max_len = a.chars().count().max(b.chars().count());
1030+
if max_len == 0 { total += 1.0; counted += 1; continue; }
1031+
total += 1.0 - levenshtein(a, b) as f64 / max_len as f64;
1032+
counted += 1;
1033+
}
1034+
if counted == 0 { 1.0 } else { total / counted as f64 }
1035+
}
1036+
8611037
#[cfg(test)]
8621038
mod tests {
8631039
use std::collections::HashMap;
@@ -870,6 +1046,99 @@ mod tests {
8701046
setup_test_directory,
8711047
};
8721048

1049+
// ── str_replace_fuzzy tests ──────────────────────────────────────────────
1050+
1051+
#[test]
1052+
fn fuzzy_exact_match() {
1053+
let content = "fn foo() {\n let x = 1;\n}\n";
1054+
let result = str_replace_fuzzy(content, "let x = 1;", "let x = 42;").unwrap();
1055+
assert_eq!(result, "fn foo() {\n let x = 42;\n}\n");
1056+
}
1057+
1058+
#[test]
1059+
fn fuzzy_exact_match_fails_on_ambiguous() {
1060+
let content = "let x = 1;\nlet x = 1;\n";
1061+
assert!(str_replace_fuzzy(content, "let x = 1;", "let x = 2;").is_err());
1062+
}
1063+
1064+
#[test]
1065+
fn fuzzy_line_trimmed_handles_indentation_drift() {
1066+
// old_str has different indentation than the file
1067+
let content = "fn foo() {\n let x = 1;\n let y = 2;\n}\n";
1068+
let old_str = "let x = 1;\nlet y = 2;"; // no indentation
1069+
let result = str_replace_fuzzy(content, old_str, "let x = 10;\nlet y = 20;").unwrap();
1070+
assert!(result.contains("let x = 10;"));
1071+
assert!(result.contains("let y = 20;"));
1072+
}
1073+
1074+
#[test]
1075+
fn fuzzy_block_anchor_handles_minor_middle_edits() {
1076+
// Middle line has a minor typo vs what's in the file
1077+
let content = "fn calculate() {\n let result = a + b;\n return result;\n}\n";
1078+
// old_str has slightly different middle line
1079+
let old_str = "fn calculate() {\n let result = a + b; // sum\n return result;\n}";
1080+
let result = str_replace_fuzzy(content, old_str, "fn calculate() {\n return a + b;\n}");
1081+
// Should find a match via block anchor (first+last line match)
1082+
assert!(result.is_ok(), "block anchor should match: {:?}", result);
1083+
}
1084+
1085+
#[test]
1086+
fn fuzzy_returns_error_when_no_strategy_matches() {
1087+
let content = "fn foo() {}\n";
1088+
let result = str_replace_fuzzy(content, "fn bar() {}", "fn baz() {}");
1089+
assert!(result.is_err());
1090+
let msg = result.unwrap_err().to_string();
1091+
assert!(msg.contains("fs_read"), "error should mention fs_read: {msg}");
1092+
assert!(msg.contains("sed"), "error should warn against sed: {msg}");
1093+
}
1094+
1095+
#[test]
1096+
fn fuzzy_replaces_correct_occurrence_when_matched_text_appears_elsewhere() {
1097+
// The fuzzy-matched substring also appears earlier in the file.
1098+
// We must replace the matched position, not the first occurrence.
1099+
let content = " let x = 1;\nfn foo() {\n let x = 1;\n let y = 2;\n}\n";
1100+
// old_str with no indentation — line-trimmed will match the block inside fn foo
1101+
let old_str = "let x = 1;\nlet y = 2;";
1102+
let result = str_replace_fuzzy(content, old_str, "let x = 10;\nlet y = 20;").unwrap();
1103+
// The standalone "let x = 1;" at the top must be untouched
1104+
assert!(result.starts_with(" let x = 1;\n"), "first occurrence must be untouched");
1105+
assert!(result.contains("let x = 10;"), "matched block must be replaced");
1106+
}
1107+
1108+
#[test]
1109+
fn block_anchor_skips_symmetric_first_last_lines() {
1110+
// first == last — should not produce false positive via block anchor
1111+
let content = "}\n}\n";
1112+
let find = "}\n}";
1113+
// block_anchor_match should return None because first == last
1114+
assert!(block_anchor_match(content, find).is_none());
1115+
}
1116+
1117+
#[test]
1118+
fn levenshtein_space_optimised_matches_naive() {
1119+
// Verify the O(n) space implementation gives correct results
1120+
assert_eq!(levenshtein("", "abc"), 3);
1121+
assert_eq!(levenshtein("abc", ""), 3);
1122+
assert_eq!(levenshtein("saturday", "sunday"), 3);
1123+
}
1124+
1125+
#[test]
1126+
fn line_trimmed_match_finds_indented_block() {
1127+
let content = "class Foo {\n void bar() {\n int x = 1;\n }\n}\n";
1128+
let find = "void bar() {\n int x = 1;\n}";
1129+
let matched = line_trimmed_match(content, find);
1130+
assert!(matched.is_some(), "should find indented block");
1131+
let (start, end) = matched.unwrap();
1132+
assert!(content[start..end].contains(" void bar()"), "should preserve original indentation");
1133+
}
1134+
1135+
#[test]
1136+
fn line_trimmed_match_returns_none_on_ambiguous() {
1137+
let content = " foo()\n foo()\n";
1138+
let find = "foo()";
1139+
assert!(line_trimmed_match(content, find).is_none());
1140+
}
1141+
8731142
#[test]
8741143
fn test_fs_write_deserialize() {
8751144
let path = "/my-file";

0 commit comments

Comments
 (0)