From ef9eff9064d3d1ae024930d1a7792f77b4f9751e Mon Sep 17 00:00:00 2001 From: Karl Meakin Date: Sun, 3 May 2026 21:16:44 +0100 Subject: [PATCH] Further optimize `SliceIndex` impl for `Range` We can shave a further 2 `icmp`s by inlining `is_char_boundary` and rearranging the checks. --- library/core/src/str/traits.rs | 93 ++++++++++-------------- tests/codegen-llvm/str-range-indexing.rs | 13 ++-- 2 files changed, 46 insertions(+), 60 deletions(-) diff --git a/library/core/src/str/traits.rs b/library/core/src/str/traits.rs index 3b5cec22b69ea..21d1a5b907aa8 100644 --- a/library/core/src/str/traits.rs +++ b/library/core/src/str/traits.rs @@ -118,6 +118,35 @@ unsafe impl const SliceIndex for ops::RangeFull { } } +/// Check that a range is in bounds for slicing a string. +/// If this returns true, it is safe to call `slice.get_unchecked(range)` or +/// `slice.get_unchecked_mut(range)`. +#[inline(always)] +const fn check_range(slice: &str, range: crate::range::Range) -> bool { + let crate::range::Range { start, end } = range; + let bytes = slice.as_bytes(); + + if start > end || end > slice.len() { + return false; + } + + if start == slice.len() { + // If `start == slice.len()`, then `end == slice.len()` must also be true. + return true; + } + + // SAFETY: + // `start > end || end > slice.len()` is false, so `start <= end <= slice.len()` is true. + // `start == slice.len()` is false, so `start < slice.len()` is also true. + // + // No need to check for `end == 0`, because if `end == 0` is true then `start == slice.len()` + // would also be true, which is already handled above. + unsafe { + (start == 0 || bytes.as_ptr().add(start).read().is_utf8_char_boundary()) + && (end == slice.len() || bytes.as_ptr().add(end).read().is_utf8_char_boundary()) + } +} + /// Implements substring slicing with syntax `&self[begin .. end]` or `&mut /// self[begin .. end]`. /// @@ -159,30 +188,11 @@ unsafe impl const SliceIndex for ops::Range { type Output = str; #[inline] fn get(self, slice: &str) -> Option<&Self::Output> { - if self.start <= self.end - && slice.is_char_boundary(self.start) - && slice.is_char_boundary(self.end) - { - // SAFETY: just checked that `start` and `end` are on a char boundary, - // and we are passing in a safe reference, so the return value will also be one. - // We also checked char boundaries, so this is valid UTF-8. - Some(unsafe { &*self.get_unchecked(slice) }) - } else { - None - } + range::Range::from(self).get(slice) } #[inline] fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> { - if self.start <= self.end - && slice.is_char_boundary(self.start) - && slice.is_char_boundary(self.end) - { - // SAFETY: just checked that `start` and `end` are on a char boundary. - // We know the pointer is unique because we got it from `slice`. - Some(unsafe { &mut *self.get_unchecked_mut(slice) }) - } else { - None - } + range::Range::from(self).get_mut(slice) } #[inline] #[track_caller] @@ -235,26 +245,11 @@ unsafe impl const SliceIndex for ops::Range { } #[inline] fn index(self, slice: &str) -> &Self::Output { - let (start, end) = (self.start, self.end); - match self.get(slice) { - Some(s) => s, - None => super::slice_error_fail(slice, start, end), - } + range::Range::from(self).index(slice) } #[inline] fn index_mut(self, slice: &mut str) -> &mut Self::Output { - // is_char_boundary checks that the index is in [0, .len()] - // cannot reuse `get` as above, because of NLL trouble - if self.start <= self.end - && slice.is_char_boundary(self.start) - && slice.is_char_boundary(self.end) - { - // SAFETY: just checked that `start` and `end` are on a char boundary, - // and we are passing in a safe reference, so the return value will also be one. - unsafe { &mut *self.get_unchecked_mut(slice) } - } else { - super::slice_error_fail(slice, self.start, self.end) - } + range::Range::from(self).index_mut(slice) } } @@ -264,11 +259,8 @@ unsafe impl const SliceIndex for range::Range { type Output = str; #[inline] fn get(self, slice: &str) -> Option<&Self::Output> { - if self.start <= self.end - && slice.is_char_boundary(self.start) - && slice.is_char_boundary(self.end) - { - // SAFETY: just checked that `start` and `end` are on a char boundary, + if check_range(slice, self) { + // SAFETY: just checked that `self` is in bounds, // and we are passing in a safe reference, so the return value will also be one. // We also checked char boundaries, so this is valid UTF-8. Some(unsafe { &*self.get_unchecked(slice) }) @@ -278,11 +270,8 @@ unsafe impl const SliceIndex for range::Range { } #[inline] fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> { - if self.start <= self.end - && slice.is_char_boundary(self.start) - && slice.is_char_boundary(self.end) - { - // SAFETY: just checked that `start` and `end` are on a char boundary. + if check_range(slice, self) { + // SAFETY: just checked that `self` is in bounds. // We know the pointer is unique because we got it from `slice`. Some(unsafe { &mut *self.get_unchecked_mut(slice) }) } else { @@ -348,13 +337,9 @@ unsafe impl const SliceIndex for range::Range { } #[inline] fn index_mut(self, slice: &mut str) -> &mut Self::Output { - // is_char_boundary checks that the index is in [0, .len()] // cannot reuse `get` as above, because of NLL trouble - if self.start <= self.end - && slice.is_char_boundary(self.start) - && slice.is_char_boundary(self.end) - { - // SAFETY: just checked that `start` and `end` are on a char boundary, + if check_range(slice, self) { + // SAFETY: just checked that `self` is in bounds, // and we are passing in a safe reference, so the return value will also be one. unsafe { &mut *self.get_unchecked_mut(slice) } } else { diff --git a/tests/codegen-llvm/str-range-indexing.rs b/tests/codegen-llvm/str-range-indexing.rs index 5fa8e0dd17d3c..57fd886f9fc71 100644 --- a/tests/codegen-llvm/str-range-indexing.rs +++ b/tests/codegen-llvm/str-range-indexing.rs @@ -18,18 +18,19 @@ macro_rules! tests { }; } -// 9 comparisons required: -// start <= end -// && (start == 0 || (start >= len && start == len) || bytes[start] >= -0x40) -// && (end == 0 || (end >= len && end == len) || bytes[end] >= -0x40) +// 7 comparisons required: +// start <= end && end <= len +// && (start == len || +// ( (start == 0 || bytes[start] >= -0x40) +// && (end == len || bytes[end] >= -0x40))) // CHECK-LABEL: @get_range -// CHECK-COUNT-9: %{{.+}} = icmp +// CHECK-COUNT-7: %{{.+}} = icmp // CHECK-NOT: %{{.+}} = icmp // CHECK: ret // CHECK-LABEL: @index_range -// CHECK-COUNT-9: %{{.+}} = icmp +// CHECK-COUNT-7: %{{.+}} = icmp // CHECK-NOT: %{{.+}} = icmp // CHECK: ret tests!(Range, get_range, index_range);