Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/main/java/org/apache/commons/lang3/StringUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -3002,6 +3002,12 @@ public static int indexOfDifference(final CharSequence... css) {
break;
}
}
if (firstDiff > 0 && Character.isLowSurrogate(css[0].charAt(firstDiff))
&& Character.isHighSurrogate(css[0].charAt(firstDiff - 1))) {
// the difference splits a surrogate pair whose high half is common; report the start of the
// pair so getCommonPrefix never slices it in half and leaves a stray high surrogate.
firstDiff--;
}
if (firstDiff == -1 && shortestStrLen != longestStrLen) {
// we compared all of the characters up to the length of the
// shortest string and didn't find a match, but the string lengths
Expand Down Expand Up @@ -3048,6 +3054,12 @@ public static int indexOfDifference(final CharSequence cs1, final CharSequence c
break;
}
}
if (i > 0 && i < cs1.length() && i < cs2.length() && Character.isHighSurrogate(cs1.charAt(i - 1))
&& (Character.isLowSurrogate(cs1.charAt(i)) || Character.isLowSurrogate(cs2.charAt(i)))) {
// the difference splits a surrogate pair whose high half is common; report the start of the
// pair so difference does not return a string that begins with a stray low surrogate.
i--;
}
if (i < cs2.length() || i < cs1.length()) {
return i;
}
Expand Down
20 changes: 20 additions & 0 deletions src/test/java/org/apache/commons/lang3/StringUtilsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,11 @@ void testDifference_StringString() {
assertEquals("robot", StringUtils.difference("i am a machine", "i am a robot"));
assertEquals("", StringUtils.difference("abc", "abc"));
assertEquals("you are a robot", StringUtils.difference("i am a robot", "you are a robot"));
// 0x10400 and 0x10401 share the same high surrogate; the difference must not begin with a lone low surrogate
final String cp10400 = new String(Character.toChars(0x10400));
final String cp10401 = new String(Character.toChars(0x10401));
assertEquals(cp10401, StringUtils.difference(cp10400, cp10401));
assertEquals("Y", StringUtils.difference(cp10400 + "X", cp10400 + "Y"));
}

@Test
Expand All @@ -563,6 +568,11 @@ void testDifferenceAt_StringArray() {
assertEquals(0, StringUtils.indexOfDifference("abcde", "xyz"));
assertEquals(0, StringUtils.indexOfDifference("xyz", "abcde"));
assertEquals(7, StringUtils.indexOfDifference("i am a machine", "i am a robot"));
// a difference that falls inside a shared surrogate pair is reported at the start of the pair, not mid-pair
final String cp10400 = new String(Character.toChars(0x10400));
final String cp10401 = new String(Character.toChars(0x10401));
assertEquals(0, StringUtils.indexOfDifference(new String[] {cp10400, cp10401}));
assertEquals(2, StringUtils.indexOfDifference(new String[] {cp10400 + "X", cp10400 + "Y"}));
}

@Test
Expand All @@ -576,6 +586,11 @@ void testDifferenceAt_StringString() {
assertEquals(7, StringUtils.indexOfDifference("i am a machine", "i am a robot"));
assertEquals(-1, StringUtils.indexOfDifference("foo", "foo"));
assertEquals(0, StringUtils.indexOfDifference("i am a robot", "you are a robot"));
// a difference that falls inside a shared surrogate pair is reported at the start of the pair, not mid-pair
final String cp10400 = new String(Character.toChars(0x10400));
final String cp10401 = new String(Character.toChars(0x10401));
assertEquals(0, StringUtils.indexOfDifference(cp10400, cp10401));
assertEquals(2, StringUtils.indexOfDifference(cp10400 + "X", cp10400 + "Y"));
}

/**
Expand Down Expand Up @@ -679,6 +694,11 @@ void testGetCommonPrefix_StringArray() {
assertEquals("", StringUtils.getCommonPrefix("abcde", "xyz"));
assertEquals("", StringUtils.getCommonPrefix("xyz", "abcde"));
assertEquals("i am a ", StringUtils.getCommonPrefix("i am a machine", "i am a robot"));
// 0x10400 and 0x10401 share the high surrogate but differ; the common prefix must not be a lone high surrogate
final String cp10400 = new String(Character.toChars(0x10400));
final String cp10401 = new String(Character.toChars(0x10401));
assertEquals("", StringUtils.getCommonPrefix(cp10400, cp10401));
assertEquals(cp10400, StringUtils.getCommonPrefix(cp10400 + "X", cp10400 + "Y"));
}

@Test
Expand Down
Loading