@@ -133,7 +133,6 @@ impl FsWrite {
133133 } ,
134134 FsWrite :: StrReplace { old_str, new_str, .. } => {
135135 let file = os. fs . read_to_string ( & path) . await ?;
136- let matches = file. match_indices ( old_str) . collect :: < Vec < _ > > ( ) ;
137136 queue ! (
138137 output,
139138 style:: Print ( "Updating: " ) ,
@@ -142,14 +141,8 @@ impl FsWrite {
142141 StyledText :: reset( ) ,
143142 style:: Print ( "\n " ) ,
144143 ) ?;
145- match matches. len ( ) {
146- 0 => return Err ( eyre ! ( "no occurrences of \" {old_str}\" were found" ) ) ,
147- 1 => {
148- let file = file. replacen ( old_str, new_str, 1 ) ;
149- os. fs . write ( & path, file) . await ?;
150- } ,
151- x => return Err ( eyre ! ( "{x} occurrences of old_str were found when only 1 is expected" ) ) ,
152- }
144+ let updated = str_replace_fuzzy ( & file, old_str, new_str) ?;
145+ os. fs . write ( & path, updated) . await ?;
153146 } ,
154147 FsWrite :: Insert {
155148 insert_line, new_str, ..
@@ -858,6 +851,189 @@ fn syntect_to_crossterm_color(syntect: syntect::highlighting::Color) -> style::C
858851 }
859852}
860853
854+ /// Attempts to replace `old_str` with `new_str` in `content` using a fallback chain:
855+ ///
856+ /// 1. **Exact match** — fastest, most precise.
857+ /// 2. **Line-trimmed match** — matches lines after stripping leading/trailing whitespace,
858+ /// then replaces the original (indented) text. Handles indentation drift.
859+ /// 3. **Block-anchor match** — matches by first+last line as anchors, uses Levenshtein
860+ /// similarity on middle lines to find the best candidate. Handles minor edits in context.
861+ ///
862+ /// Returns an error if no strategy finds exactly one unambiguous match.
863+ fn str_replace_fuzzy ( content : & str , old_str : & str , new_str : & str ) -> eyre:: Result < String > {
864+ // Strategy 1: exact match
865+ let exact_count = content. match_indices ( old_str) . count ( ) ;
866+ match exact_count {
867+ 1 => return Ok ( content. replacen ( old_str, new_str, 1 ) ) ,
868+ x if x > 1 => {
869+ return Err ( eyre:: eyre!(
870+ "{x} occurrences of old_str were found when only 1 is expected — \
871+ add more surrounding context to old_str to make it unique"
872+ ) )
873+ } ,
874+ _ => { } ,
875+ }
876+
877+ // Strategies 2 & 3: fuzzy — both return a byte range to splice at
878+ let range = line_trimmed_match ( content, old_str)
879+ . or_else ( || block_anchor_match ( content, old_str) ) ;
880+
881+ if let Some ( ( start, end) ) = range {
882+ return Ok ( format ! ( "{}{}{}" , & content[ ..start] , new_str, & content[ end..] ) ) ;
883+ }
884+
885+ Err ( eyre:: eyre!(
886+ "no occurrences of the provided old_str were found (tried exact, \
887+ line-trimmed, and block-anchor matching) — use fs_read to read the \
888+ current file content and retry str_replace with the exact text. \
889+ Do NOT fall back to shell commands like sed."
890+ ) )
891+ }
892+
893+ /// Strips leading and trailing empty lines from a split-by-newline vec.
894+ fn strip_empty_boundary_lines ( mut lines : Vec < & str > ) -> Vec < & str > {
895+ while lines. last ( ) . map ( |l : & & str | l. trim ( ) . is_empty ( ) ) . unwrap_or ( false ) {
896+ lines. pop ( ) ;
897+ }
898+ while lines. first ( ) . map ( |l : & & str | l. trim ( ) . is_empty ( ) ) . unwrap_or ( false ) {
899+ lines. remove ( 0 ) ;
900+ }
901+ lines
902+ }
903+
904+ /// Builds a prefix-sum table of byte offsets for lines split by `\n`.
905+ /// `offsets[i]` = byte offset of the start of line `i` in the original string.
906+ /// `offsets[lines.len()]` = one past the last byte (i.e. content.len() + 1 conceptually).
907+ fn build_line_offsets ( lines : & [ & str ] ) -> Vec < usize > {
908+ let mut offsets = Vec :: with_capacity ( lines. len ( ) + 1 ) ;
909+ offsets. push ( 0usize ) ;
910+ for line in lines {
911+ offsets. push ( offsets. last ( ) . unwrap ( ) + line. len ( ) + 1 ) ; // +1 for '\n'
912+ }
913+ offsets
914+ }
915+
916+ /// Matches `find` against `content` by comparing trimmed lines.
917+ /// Returns the byte range `(start, end)` in `content` if exactly one match is found.
918+ fn line_trimmed_match ( content : & str , find : & str ) -> Option < ( usize , usize ) > {
919+ let content_lines: Vec < & str > = content. split ( '\n' ) . collect ( ) ;
920+ let search_lines = strip_empty_boundary_lines ( find. split ( '\n' ) . collect ( ) ) ;
921+
922+ if search_lines. is_empty ( ) {
923+ return None ;
924+ }
925+
926+ let offsets = build_line_offsets ( & content_lines) ;
927+
928+ let mut matches: Vec < ( usize , usize ) > = Vec :: new ( ) ;
929+ ' outer: for i in 0 ..=content_lines. len ( ) . saturating_sub ( search_lines. len ( ) ) {
930+ for ( j, search_line) in search_lines. iter ( ) . enumerate ( ) {
931+ if content_lines[ i + j] . trim ( ) != search_line. trim ( ) {
932+ continue ' outer;
933+ }
934+ }
935+ let start = offsets[ i] ;
936+ let end = offsets[ i + search_lines. len ( ) ] . saturating_sub ( 1 ) . min ( content. len ( ) ) ;
937+ matches. push ( ( start, end) ) ;
938+ }
939+
940+ if matches. len ( ) == 1 { Some ( matches[ 0 ] ) } else { None }
941+ }
942+
943+ /// Levenshtein distance between two strings (char-level, O(min(m,n)) space).
944+ /// `a` is placed in the row dimension (longer), `b` in the column (shorter).
945+ fn levenshtein ( a : & str , b : & str ) -> usize {
946+ let a: Vec < char > = a. chars ( ) . collect ( ) ;
947+ let b: Vec < char > = b. chars ( ) . collect ( ) ;
948+ // Ensure `a` is the longer string so `b` (columns) is the smaller allocation
949+ let ( a, b) = if a. len ( ) >= b. len ( ) { ( a, b) } else { ( b, a) } ;
950+ let ( m, n) = ( a. len ( ) , b. len ( ) ) ;
951+ let mut prev: Vec < usize > = ( 0 ..=n) . collect ( ) ;
952+ let mut curr = vec ! [ 0usize ; n + 1 ] ;
953+ for i in 1 ..=m {
954+ curr[ 0 ] = i;
955+ for j in 1 ..=n {
956+ curr[ j] = if a[ i - 1 ] == b[ j - 1 ] {
957+ prev[ j - 1 ]
958+ } else {
959+ 1 + prev[ j] . min ( curr[ j - 1 ] ) . min ( prev[ j - 1 ] )
960+ } ;
961+ }
962+ std:: mem:: swap ( & mut prev, & mut curr) ;
963+ }
964+ prev[ n]
965+ }
966+
967+ const SIMILARITY_THRESHOLD : f64 = 0.6 ;
968+
969+ /// Matches `find` against `content` using first+last line as anchors and Levenshtein
970+ /// similarity on middle lines. Returns the byte range `(start, end)` in `content` if
971+ /// similarity exceeds the threshold and the match is unambiguous.
972+ fn block_anchor_match ( content : & str , find : & str ) -> Option < ( usize , usize ) > {
973+ let content_lines: Vec < & str > = content. split ( '\n' ) . collect ( ) ;
974+ let search_lines = strip_empty_boundary_lines ( find. split ( '\n' ) . collect ( ) ) ;
975+
976+ // Need at least 2 distinct lines for anchor matching
977+ if search_lines. len ( ) < 2 {
978+ return None ;
979+ }
980+
981+ let first = search_lines[ 0 ] . trim ( ) ;
982+ let last = search_lines[ search_lines. len ( ) - 1 ] . trim ( ) ;
983+
984+ // Symmetric anchors (e.g. `}` / `}`) produce too many false positives
985+ if first == last {
986+ return None ;
987+ }
988+
989+ // Build offsets once — reused for both scoring and final byte range
990+ let offsets = build_line_offsets ( & content_lines) ;
991+
992+ // Collect candidate windows where first and last anchor lines match
993+ let mut candidates: Vec < ( usize , usize , f64 ) > = Vec :: new ( ) ;
994+ for i in 0 ..content_lines. len ( ) {
995+ if content_lines[ i] . trim ( ) != first { continue ; }
996+ for j in ( i + 1 ) ..content_lines. len ( ) {
997+ if content_lines[ j] . trim ( ) == last {
998+ let score = similarity_score ( & content_lines, i, j, & search_lines) ;
999+ candidates. push ( ( i, j, score) ) ;
1000+ break ;
1001+ }
1002+ }
1003+ }
1004+
1005+ // Pick the single best candidate above the threshold
1006+ let best = candidates
1007+ . into_iter ( )
1008+ . filter ( |& ( _, _, s) | s >= SIMILARITY_THRESHOLD )
1009+ . max_by ( |a, b| a. 2 . partial_cmp ( & b. 2 ) . unwrap_or ( std:: cmp:: Ordering :: Equal ) ) ?;
1010+
1011+ let start = offsets[ best. 0 ] ;
1012+ let end = offsets[ best. 1 + 1 ] . saturating_sub ( 1 ) . min ( content. len ( ) ) ;
1013+ Some ( ( start, end) )
1014+ }
1015+
1016+ /// Average Levenshtein similarity of middle lines between `search_lines` and the
1017+ /// corresponding window `content_lines[start..=end]`.
1018+ fn similarity_score ( content_lines : & [ & str ] , start : usize , end : usize , search_lines : & [ & str ] ) -> f64 {
1019+ let middle_count = search_lines. len ( ) . saturating_sub ( 2 ) ;
1020+ if middle_count == 0 { return 1.0 ; }
1021+
1022+ let mut total = 0.0 ;
1023+ let mut counted = 0 ;
1024+ for k in 1 ..search_lines. len ( ) . saturating_sub ( 1 ) {
1025+ let ci = start + k;
1026+ if ci >= end { break ; }
1027+ let a = content_lines[ ci] . trim ( ) ;
1028+ let b = search_lines[ k] . trim ( ) ;
1029+ let max_len = a. chars ( ) . count ( ) . max ( b. chars ( ) . count ( ) ) ;
1030+ if max_len == 0 { total += 1.0 ; counted += 1 ; continue ; }
1031+ total += 1.0 - levenshtein ( a, b) as f64 / max_len as f64 ;
1032+ counted += 1 ;
1033+ }
1034+ if counted == 0 { 1.0 } else { total / counted as f64 }
1035+ }
1036+
8611037#[ cfg( test) ]
8621038mod tests {
8631039 use std:: collections:: HashMap ;
@@ -870,6 +1046,99 @@ mod tests {
8701046 setup_test_directory,
8711047 } ;
8721048
1049+ // ── str_replace_fuzzy tests ──────────────────────────────────────────────
1050+
1051+ #[ test]
1052+ fn fuzzy_exact_match ( ) {
1053+ let content = "fn foo() {\n let x = 1;\n }\n " ;
1054+ let result = str_replace_fuzzy ( content, "let x = 1;" , "let x = 42;" ) . unwrap ( ) ;
1055+ assert_eq ! ( result, "fn foo() {\n let x = 42;\n }\n " ) ;
1056+ }
1057+
1058+ #[ test]
1059+ fn fuzzy_exact_match_fails_on_ambiguous ( ) {
1060+ let content = "let x = 1;\n let x = 1;\n " ;
1061+ assert ! ( str_replace_fuzzy( content, "let x = 1;" , "let x = 2;" ) . is_err( ) ) ;
1062+ }
1063+
1064+ #[ test]
1065+ fn fuzzy_line_trimmed_handles_indentation_drift ( ) {
1066+ // old_str has different indentation than the file
1067+ let content = "fn foo() {\n let x = 1;\n let y = 2;\n }\n " ;
1068+ let old_str = "let x = 1;\n let y = 2;" ; // no indentation
1069+ let result = str_replace_fuzzy ( content, old_str, "let x = 10;\n let y = 20;" ) . unwrap ( ) ;
1070+ assert ! ( result. contains( "let x = 10;" ) ) ;
1071+ assert ! ( result. contains( "let y = 20;" ) ) ;
1072+ }
1073+
1074+ #[ test]
1075+ fn fuzzy_block_anchor_handles_minor_middle_edits ( ) {
1076+ // Middle line has a minor typo vs what's in the file
1077+ let content = "fn calculate() {\n let result = a + b;\n return result;\n }\n " ;
1078+ // old_str has slightly different middle line
1079+ let old_str = "fn calculate() {\n let result = a + b; // sum\n return result;\n }" ;
1080+ let result = str_replace_fuzzy ( content, old_str, "fn calculate() {\n return a + b;\n }" ) ;
1081+ // Should find a match via block anchor (first+last line match)
1082+ assert ! ( result. is_ok( ) , "block anchor should match: {:?}" , result) ;
1083+ }
1084+
1085+ #[ test]
1086+ fn fuzzy_returns_error_when_no_strategy_matches ( ) {
1087+ let content = "fn foo() {}\n " ;
1088+ let result = str_replace_fuzzy ( content, "fn bar() {}" , "fn baz() {}" ) ;
1089+ assert ! ( result. is_err( ) ) ;
1090+ let msg = result. unwrap_err ( ) . to_string ( ) ;
1091+ assert ! ( msg. contains( "fs_read" ) , "error should mention fs_read: {msg}" ) ;
1092+ assert ! ( msg. contains( "sed" ) , "error should warn against sed: {msg}" ) ;
1093+ }
1094+
1095+ #[ test]
1096+ fn fuzzy_replaces_correct_occurrence_when_matched_text_appears_elsewhere ( ) {
1097+ // The fuzzy-matched substring also appears earlier in the file.
1098+ // We must replace the matched position, not the first occurrence.
1099+ let content = " let x = 1;\n fn foo() {\n let x = 1;\n let y = 2;\n }\n " ;
1100+ // old_str with no indentation — line-trimmed will match the block inside fn foo
1101+ let old_str = "let x = 1;\n let y = 2;" ;
1102+ let result = str_replace_fuzzy ( content, old_str, "let x = 10;\n let y = 20;" ) . unwrap ( ) ;
1103+ // The standalone "let x = 1;" at the top must be untouched
1104+ assert ! ( result. starts_with( " let x = 1;\n " ) , "first occurrence must be untouched" ) ;
1105+ assert ! ( result. contains( "let x = 10;" ) , "matched block must be replaced" ) ;
1106+ }
1107+
1108+ #[ test]
1109+ fn block_anchor_skips_symmetric_first_last_lines ( ) {
1110+ // first == last — should not produce false positive via block anchor
1111+ let content = "}\n }\n " ;
1112+ let find = "}\n }" ;
1113+ // block_anchor_match should return None because first == last
1114+ assert ! ( block_anchor_match( content, find) . is_none( ) ) ;
1115+ }
1116+
1117+ #[ test]
1118+ fn levenshtein_space_optimised_matches_naive ( ) {
1119+ // Verify the O(n) space implementation gives correct results
1120+ assert_eq ! ( levenshtein( "" , "abc" ) , 3 ) ;
1121+ assert_eq ! ( levenshtein( "abc" , "" ) , 3 ) ;
1122+ assert_eq ! ( levenshtein( "saturday" , "sunday" ) , 3 ) ;
1123+ }
1124+
1125+ #[ test]
1126+ fn line_trimmed_match_finds_indented_block ( ) {
1127+ let content = "class Foo {\n void bar() {\n int x = 1;\n }\n }\n " ;
1128+ let find = "void bar() {\n int x = 1;\n }" ;
1129+ let matched = line_trimmed_match ( content, find) ;
1130+ assert ! ( matched. is_some( ) , "should find indented block" ) ;
1131+ let ( start, end) = matched. unwrap ( ) ;
1132+ assert ! ( content[ start..end] . contains( " void bar()" ) , "should preserve original indentation" ) ;
1133+ }
1134+
1135+ #[ test]
1136+ fn line_trimmed_match_returns_none_on_ambiguous ( ) {
1137+ let content = " foo()\n foo()\n " ;
1138+ let find = "foo()" ;
1139+ assert ! ( line_trimmed_match( content, find) . is_none( ) ) ;
1140+ }
1141+
8731142 #[ test]
8741143 fn test_fs_write_deserialize ( ) {
8751144 let path = "/my-file" ;
0 commit comments