@@ -348,6 +348,40 @@ pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
348348// Impl for char
349349/////////////////////////////////////////////////////////////////////////////
350350
351+ #[ derive( Clone , Copy , Debug ) ]
352+ enum Utf8Size {
353+ // Values are indexes, so `- 1`
354+ One = 0 ,
355+ Two = 1 ,
356+ Three = 2 ,
357+ Four = 3 ,
358+ }
359+
360+ impl Utf8Size {
361+ fn new ( size : usize ) -> Option < Self > {
362+ match size {
363+ 1 => Some ( Self :: One ) ,
364+ 2 => Some ( Self :: Two ) ,
365+ 3 => Some ( Self :: Three ) ,
366+ 4 => Some ( Self :: Four ) ,
367+ _ => None ,
368+ }
369+ }
370+
371+ // # Safety
372+ //
373+ // `size` must be more than `0` and less than `5`
374+ unsafe fn new_unchecked ( size : usize ) -> Self {
375+ // SAFETY: Invariant held by caller
376+ unsafe { Self :: new ( size) . unwrap_unchecked ( ) }
377+ }
378+
379+ fn index ( self , arr : & [ u8 ; 4 ] ) -> & u8 {
380+ // SAFETY: max value is 3, which indexes to the 4th element.
381+ unsafe { arr. get_unchecked ( self as usize ) }
382+ }
383+ }
384+
351385/// Associated type for `<char as Pattern<'a>>::Searcher`.
352386#[ derive( Clone , Debug ) ]
353387pub struct CharSearcher < ' a > {
@@ -368,9 +402,8 @@ pub struct CharSearcher<'a> {
368402 /// The character being searched for
369403 needle : char ,
370404
371- // safety invariant: `utf8_size` must be less than 5
372405 /// The number of bytes `needle` takes up when encoded in utf8.
373- utf8_size : usize ,
406+ utf8_size : Utf8Size ,
374407 /// A utf8 encoded copy of the `needle`
375408 utf8_encoded : [ u8 ; 4 ] ,
376409}
@@ -413,8 +446,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
413446 // get the haystack after the last character found
414447 let bytes = self . haystack . as_bytes ( ) . get ( self . finger ..self . finger_back ) ?;
415448 // the last byte of the utf8 encoded needle
416- // SAFETY: we have an invariant that `utf8_size < 5`
417- let last_byte = unsafe { * self . utf8_encoded . get_unchecked ( self . utf8_size - 1 ) } ;
449+ let last_byte = * self . utf8_size . index ( & self . utf8_encoded ) ;
418450 if let Some ( index) = memchr:: memchr ( last_byte, bytes) {
419451 // The new finger is the index of the byte we found,
420452 // plus one, since we memchr'd for the last byte of the character.
@@ -434,10 +466,12 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
434466 // find something. When we find something the `finger` will be set
435467 // to a UTF8 boundary.
436468 self . finger += index + 1 ;
437- if self . finger >= self . utf8_size {
438- let found_char = self . finger - self . utf8_size ;
469+
470+ let utf8_size = self . utf8_size as usize ;
471+ if self . finger >= utf8_size {
472+ let found_char = self . finger - utf8_size;
439473 if let Some ( slice) = self . haystack . as_bytes ( ) . get ( found_char..self . finger ) {
440- if slice == & self . utf8_encoded [ 0 ..self . utf8_size ] {
474+ if slice == & self . utf8_encoded [ 0 ..utf8_size] {
441475 return Some ( ( found_char, self . finger ) ) ;
442476 }
443477 }
@@ -481,8 +515,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
481515 // get the haystack up to but not including the last character searched
482516 let bytes = haystack. get ( self . finger ..self . finger_back ) ?;
483517 // the last byte of the utf8 encoded needle
484- // SAFETY: we have an invariant that `utf8_size < 5`
485- let last_byte = unsafe { * self . utf8_encoded . get_unchecked ( self . utf8_size - 1 ) } ;
518+ let last_byte = * self . utf8_size . index ( & self . utf8_encoded ) ;
486519 if let Some ( index) = memchr:: memrchr ( last_byte, bytes) {
487520 // we searched a slice that was offset by self.finger,
488521 // add self.finger to recoup the original index
@@ -493,14 +526,15 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
493526 // char in the paradigm of reverse iteration). For
494527 // multibyte chars we need to skip down by the number of more
495528 // bytes they have than ASCII
496- let shift = self . utf8_size - 1 ;
529+ let utf8_size = self . utf8_size as usize ;
530+ let shift = utf8_size - 1 ;
497531 if index >= shift {
498532 let found_char = index - shift;
499- if let Some ( slice) = haystack. get ( found_char..( found_char + self . utf8_size ) ) {
500- if slice == & self . utf8_encoded [ 0 ..self . utf8_size ] {
533+ if let Some ( slice) = haystack. get ( found_char..( found_char + utf8_size) ) {
534+ if slice == & self . utf8_encoded [ 0 ..utf8_size] {
501535 // move finger to before the character found (i.e., at its start index)
502536 self . finger_back = found_char;
503- return Some ( ( self . finger_back , self . finger_back + self . utf8_size ) ) ;
537+ return Some ( ( self . finger_back , self . finger_back + utf8_size) ) ;
504538 }
505539 }
506540 }
@@ -543,6 +577,9 @@ impl<'a> Pattern<'a> for char {
543577 fn into_searcher ( self , haystack : & ' a str ) -> Self :: Searcher {
544578 let mut utf8_encoded = [ 0 ; 4 ] ;
545579 let utf8_size = self . encode_utf8 ( & mut utf8_encoded) . len ( ) ;
580+
581+ // SAFETY: utf8_size is below 5
582+ let utf8_size = unsafe { Utf8Size :: new_unchecked ( utf8_size) } ;
546583 CharSearcher {
547584 haystack,
548585 finger : 0 ,
0 commit comments