@@ -68,6 +68,7 @@ private static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(by
6868 // We scan the input from buf to len, possibly going back howFarBack bytes, to find the end of
6969 // a valid UTF-8 sequence. We return buf + len if the buffer is valid, otherwise we return the
7070 // pointer to the first invalid byte.
71+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
7172 private unsafe static byte * SimpleRewindAndValidateWithErrors ( int howFarBack , byte * buf , int len )
7273 {
7374 int extraLen = 0 ;
@@ -90,7 +91,6 @@ private static (int utfAdjust, int scalarAdjust) GetFinalScalarUtfAdjustments(by
9091 {
9192 return buf - howFarBack ;
9293 }
93-
9494 int pos = 0 ;
9595 int nextPos ;
9696 uint codePoint = 0 ;
@@ -598,7 +598,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
598598 }
599599 else
600600 {
601- invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( processedLength - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
601+ invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
602602 }
603603 if ( invalidBytePointer < pInputBuffer + processedLength )
604604 {
@@ -624,16 +624,17 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
624624
625625
626626 // We may still have an error.
627- if ( processedLength < inputLength || ! Sse42 . TestZ ( prevIncomplete , prevIncomplete ) )
627+ bool hasIncompete = ! Sse42 . TestZ ( prevIncomplete , prevIncomplete ) ;
628+ if ( processedLength < inputLength || hasIncompete )
628629 {
629630 byte * invalidBytePointer ;
630- if ( processedLength == 0 )
631+ if ( processedLength == 0 || ! hasIncompete )
631632 {
632633 invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 0 , pInputBuffer + processedLength , inputLength - processedLength ) ;
633634 }
634635 else
635636 {
636- invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( processedLength - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
637+ invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
637638
638639 }
639640 if ( invalidBytePointer != pInputBuffer + inputLength )
@@ -813,7 +814,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
813814 if ( ! Avx2 . TestZ ( prevIncomplete , prevIncomplete ) )
814815 {
815816 int off = processedLength >= 3 ? processedLength - 3 : processedLength ;
816- byte * invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 16 - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
817+ byte * invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 32 - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
817818 // So the code is correct up to invalidBytePointer
818819 if ( invalidBytePointer < pInputBuffer + processedLength )
819820 {
@@ -877,7 +878,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
877878 }
878879 else
879880 {
880- invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( processedLength - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
881+ invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
881882 }
882883 if ( invalidBytePointer < pInputBuffer + processedLength )
883884 {
@@ -899,17 +900,17 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
899900 }
900901 }
901902 // We may still have an error.
902- if ( processedLength < inputLength || ! Avx2 . TestZ ( prevIncomplete , prevIncomplete ) )
903+ bool hasIncompete = ! Avx2 . TestZ ( prevIncomplete , prevIncomplete ) ;
904+ if ( processedLength < inputLength || hasIncompete )
903905 {
904906 byte * invalidBytePointer ;
905- if ( processedLength == 0 )
907+ if ( processedLength == 0 || ! hasIncompete )
906908 {
907909 invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 0 , pInputBuffer + processedLength , inputLength - processedLength ) ;
908910 }
909911 else
910912 {
911- invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( processedLength - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
912-
913+ invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
913914 }
914915 if ( invalidBytePointer != pInputBuffer + inputLength )
915916 {
@@ -1215,7 +1216,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
12151216 }
12161217 else
12171218 {
1218- invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( processedLength - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
1219+ invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
12191220 }
12201221 if ( invalidBytePointer < pInputBuffer + processedLength )
12211222 {
@@ -1237,16 +1238,17 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
12371238 }
12381239 }
12391240 // We may still have an error.
1240- if ( processedLength < inputLength || Avx512BW . CompareGreaterThan ( prevIncomplete , Vector512 < byte > . Zero ) . ExtractMostSignificantBits ( ) != 0 )
1241+ bool hasIncompete = Avx512BW . CompareGreaterThan ( prevIncomplete , Vector512 < byte > . Zero ) . ExtractMostSignificantBits ( ) != 0 ;
1242+ if ( processedLength < inputLength || hasIncompete )
12411243 {
12421244 byte * invalidBytePointer ;
1243- if ( processedLength == 0 )
1245+ if ( processedLength == 0 || ! hasIncompete )
12441246 {
12451247 invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 0 , pInputBuffer + processedLength , inputLength - processedLength ) ;
12461248 }
12471249 else
12481250 {
1249- invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( processedLength - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
1251+ invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
12501252
12511253 }
12521254 if ( invalidBytePointer != pInputBuffer + inputLength )
@@ -1360,8 +1362,9 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
13601362 {
13611363
13621364 Vector128 < byte > currentBlock = AdvSimd . LoadVector128 ( pInputBuffer + processedLength ) ;
1363-
1364- if ( AdvSimd . Arm64 . MaxAcross ( currentBlock ) . ToScalar ( ) <= 127 )
1365+ if ( AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( AdvSimd . And ( currentBlock , v80 ) ) ) . ToScalar ( ) == 0 )
1366+ // We could it with (AdvSimd.Arm64.MaxAcross(currentBlock).ToScalar() <= 127) but it is slower on some
1367+ // hardware.
13651368 {
13661369 // We have an ASCII block, no need to process it, but
13671370 // we need to check if the previous block was incomplete.
@@ -1431,7 +1434,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
14311434 }
14321435 else
14331436 {
1434- invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( processedLength - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
1437+ invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
14351438 }
14361439 if ( invalidBytePointer < pInputBuffer + processedLength )
14371440 {
@@ -1457,18 +1460,17 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
14571460 n4 += negn4add ;
14581461 }
14591462 }
1460-
1461- // We may still have an error.
1462- if ( processedLength < inputLength || AdvSimd . Arm64 . MaxAcross ( prevIncomplete ) . ToScalar ( ) != 0 )
1463+ bool hasIncompete = AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( prevIncomplete ) ) . ToScalar ( ) != 0 ;
1464+ if ( processedLength < inputLength || hasIncompete )
14631465 {
14641466 byte * invalidBytePointer ;
1465- if ( processedLength == 0 )
1467+ if ( processedLength == 0 || ! hasIncompete )
14661468 {
14671469 invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 0 , pInputBuffer + processedLength , inputLength - processedLength ) ;
14681470 }
14691471 else
14701472 {
1471- invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( processedLength - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
1473+ invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
14721474 }
14731475 if ( invalidBytePointer != pInputBuffer + inputLength )
14741476 {
@@ -1497,6 +1499,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
14971499 return GetPointerToFirstInvalidByteScalar ( pInputBuffer + processedLength , inputLength - processedLength , out utf16CodeUnitCountAdjustment , out scalarCountAdjustment ) ;
14981500 }
14991501
1502+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
15001503 private static unsafe void removeCounters ( byte * start , byte * end , ref int n4 , ref int contbytes )
15011504 {
15021505 for ( byte * p = start ; p < end ; p ++ )
@@ -1512,6 +1515,7 @@ private static unsafe void removeCounters(byte* start, byte* end, ref int n4, re
15121515 }
15131516 }
15141517
1518+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
15151519 private static unsafe void addCounters ( byte * start , byte * end , ref int n4 , ref int contbytes )
15161520 {
15171521 for ( byte * p = start ; p < end ; p ++ )
0 commit comments