1010use core::borrow::{Borrow, BorrowMut};
1111use core::iter::FusedIterator;
1212use core::mem;
13+ use core::mem::MaybeUninit;
1314use core::ptr;
1415use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher};
1516use core::unicode::conversions;
@@ -367,14 +368,9 @@ impl str {
367368 without modifying the original"]
368369 #[stable(feature = "unicode_case_mapping", since = "1.2.0")]
369370 pub fn to_lowercase(&self) -> String {
370- let out = convert_while_ascii(self.as_bytes() , u8::to_ascii_lowercase);
371+ let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_lowercase);
371372
372- // Safety: we know this is a valid char boundary since
373- // out.len() is only progressed if ascii bytes are found
374- let rest = unsafe { self.get_unchecked(out.len()..) };
375-
376- // Safety: We have written only valid ASCII to our vec
377- let mut s = unsafe { String::from_utf8_unchecked(out) };
373+ let prefix_len = s.len();
378374
379375 for (i, c) in rest.char_indices() {
380376 if c == 'Σ' {
@@ -383,8 +379,7 @@ impl str {
383379 // in `SpecialCasing.txt`,
384380 // so hard-code it rather than have a generic "condition" mechanism.
385381 // See https://github.com/rust-lang/rust/issues/26035
386- let out_len = self.len() - rest.len();
387- let sigma_lowercase = map_uppercase_sigma(&self, i + out_len);
382+ let sigma_lowercase = map_uppercase_sigma(self, prefix_len + i);
388383 s.push(sigma_lowercase);
389384 } else {
390385 match conversions::to_lower(c) {
@@ -460,14 +455,7 @@ impl str {
460455 without modifying the original"]
461456 #[stable(feature = "unicode_case_mapping", since = "1.2.0")]
462457 pub fn to_uppercase(&self) -> String {
463- let out = convert_while_ascii(self.as_bytes(), u8::to_ascii_uppercase);
464-
465- // Safety: we know this is a valid char boundary since
466- // out.len() is only progressed if ascii bytes are found
467- let rest = unsafe { self.get_unchecked(out.len()..) };
468-
469- // Safety: We have written only valid ASCII to our vec
470- let mut s = unsafe { String::from_utf8_unchecked(out) };
458+ let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_uppercase);
471459
472460 for c in rest.chars() {
473461 match conversions::to_upper(c) {
@@ -616,50 +604,83 @@ pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box<str> {
616604 unsafe { Box::from_raw(Box::into_raw(v) as *mut str) }
617605}
618606
619- /// Converts the bytes while the bytes are still ascii.
607+ /// Converts leading ascii bytes in `s` by calling the `convert` function.
608+ ///
620609/// For better average performance, this happens in chunks of `2*size_of::<usize>()`.
621- /// Returns a vec with the converted bytes.
610+ ///
611+ /// Returns a tuple of the converted prefix and the remainder starting from
612+ /// the first non-ascii character.
622613#[inline]
623614#[cfg(not(test))]
624615#[cfg(not(no_global_oom_handling))]
625- fn convert_while_ascii(b: &[u8], convert: fn(&u8) -> u8) -> Vec<u8> {
626- let mut out = Vec::with_capacity(b.len());
616+ fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {
617+ // Process the input in chunks of 16 bytes to enable auto-vectorization.
618+ // Previously the chunk size depended on the size of `usize`,
619+ // but on 32-bit platforms with sse or neon is also the better choice.
620+ // The only downside on other platforms would be a bit more loop-unrolling.
621+ const N: usize = 16;
622+
623+ let mut slice = s.as_bytes();
624+ let mut out = Vec::with_capacity(slice.len());
625+ let mut out_slice = out.spare_capacity_mut();
626+
627+ let mut ascii_prefix_len = 0_usize;
628+ let mut is_ascii = [false; N];
629+
630+ while slice.len() >= N {
631+ // Safety: checked in loop condition
632+ let chunk = unsafe { slice.get_unchecked(..N) };
633+ // Safety: out_slice has at least same length as input slice and gets sliced with the same offsets
634+ let out_chunk = unsafe { out_slice.get_unchecked_mut(..N) };
635+
636+ for j in 0..N {
637+ is_ascii[j] = chunk[j] <= 127;
638+ }
627639
628- const USIZE_SIZE: usize = mem::size_of::<usize>();
629- const MAGIC_UNROLL: usize = 2;
630- const N: usize = USIZE_SIZE * MAGIC_UNROLL;
631- const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; USIZE_SIZE]);
640+ // Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk
641+ // size gives the best result, specifically a pmovmsk instruction on x86.
642+ // There is a codegen test in `issue-123712-str-to-lower-autovectorization.rs` which should
643+ // be updated when this method is changed.
644+ // See also https://github.com/llvm/llvm-project/issues/96395
645+ if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N {
646+ break;
647+ }
632648
633- let mut i = 0;
634- unsafe {
635- while i + N <= b.len() {
636- // Safety: we have checks the sizes `b` and `out` to know that our
637- let in_chunk = b.get_unchecked(i..i + N);
638- let out_chunk = out.spare_capacity_mut().get_unchecked_mut(i..i + N);
639-
640- let mut bits = 0;
641- for j in 0..MAGIC_UNROLL {
642- // read the bytes 1 usize at a time (unaligned since we haven't checked the alignment)
643- // safety: in_chunk is valid bytes in the range
644- bits |= in_chunk.as_ptr().cast::<usize>().add(j).read_unaligned();
645- }
646- // if our chunks aren't ascii, then return only the prior bytes as init
647- if bits & NONASCII_MASK != 0 {
648- break;
649- }
649+ for j in 0..N {
650+ out_chunk[j] = MaybeUninit::new(convert(&chunk[j]));
651+ }
650652
651- // perform the case conversions on N bytes (gets heavily autovec'd)
652- for j in 0..N {
653- // safety: in_chunk and out_chunk is valid bytes in the range
654- let out = out_chunk.get_unchecked_mut(j);
655- out.write(convert(in_chunk.get_unchecked(j)));
656- }
653+ ascii_prefix_len += N;
654+ slice = unsafe { slice.get_unchecked(N..) };
655+ out_slice = unsafe { out_slice.get_unchecked_mut(N..) };
656+ }
657657
658- // mark these bytes as initialised
659- i += N;
658+ // handle the remainder as individual bytes
659+ while slice.len() > 0 {
660+ let byte = slice[0];
661+ if byte > 127 {
662+ break;
660663 }
661- out.set_len(i);
664+ // Safety: out_slice has same length as input slice and gets sliced with the same offsets
665+ unsafe {
666+ *out_slice.get_unchecked_mut(0) = MaybeUninit::new(convert(&byte));
667+ }
668+ ascii_prefix_len += 1;
669+ slice = unsafe { slice.get_unchecked(1..) };
670+ out_slice = unsafe { out_slice.get_unchecked_mut(1..) };
662671 }
663672
664- out
673+ unsafe {
674+ // SAFETY: ascii_prefix_len bytes have been initialized above
675+ out.set_len(ascii_prefix_len);
676+
677+ // SAFETY: We have written only valid ascii to the output vec
678+ let ascii_string = String::from_utf8_unchecked(out);
679+
680+ // SAFETY: we know this is a valid char boundary
681+ // since we only skipped over leading ascii bytes
682+ let rest = core::str::from_utf8_unchecked(slice);
683+
684+ (ascii_string, rest)
685+ }
665686}
0 commit comments