@@ -131,7 +131,72 @@ static size_t srchr4(unsigned int* str, unsigned int ch, size_t len){
131131 return 0 ;
132132}
133133
134- #if defined(__SSE2__ ) || EMU_AVX
134+ #if C_AVX2 || EMU_AVX2
135+
136+ /* A SIMD function for SSE2 which changes all uppercase ASCII digits to lowercase. */
137+ void StringToLower (char * str ,size_t len ){
138+ while (len >= 32 ) {
139+ __m256i sv = _mm256_loadu_si256 (( __m256i * )str );
140+ /* From A */
141+ __m256i rangeshift = _mm256_sub_epi8 (sv , _mm256_set1_epi8 ((char )('A' + 256 )));
142+ /* To Z */
143+ __m256i nomodify = _mm256_cmpgt_epi8 (rangeshift , _mm256_set1_epi8 (-256 + 25 ));
144+ /* ^ ' ' */
145+ _mm_storeu_si256 ((__m256i * )str , _mm256_xor_si256 (sv , _mm256_andnot_si256 (nomodify , _mm256_set1_epi8 (0x20 ))));
146+ len -= 32 ;
147+ str += 32 ;
148+ }
149+ while (len >= 16 ) {
150+ __m128i sv = _mm_loadu_si128 (( __m128i * )str );
151+ /* From A */
152+ __m128i rangeshift = _mm_sub_epi8 (sv , _mm_set1_epi8 ((char )('A' + 128 )));
153+ /* To Z */
154+ __m128i nomodify = _mm_cmpgt_epi8 (rangeshift , _mm_set1_epi8 (-128 + 25 ));
155+ /* ^ ' ' */
156+ _mm_storeu_si128 ((__m128i * )str , _mm_xor_si128 (sv , _mm_andnot_si128 (nomodify , _mm_set1_epi8 (0x20 ))));
157+ len -= 16 ;
158+ str += 16 ;
159+ }
160+ while (len -- > 0 ) {
161+ * str = tolower (* str );
162+ ++ str ;
163+ }
164+ }
165+
166+ /* Same, but to uppercase. */
167+ void StringToUpper (char * str ,size_t len ){
168+ while (len >= 32 ) {
169+ // Unaligned load.
170+ __m256i r0 = _mm256_loadu_si256 ((__m256i * )str );
171+ // It is also possible to perform aligned loads by skipping enough characters in the front
172+
173+ // maskaz contains 0x00 where character between 'a' and 'z', 0xff otherwise.
174+ __m256i maskaz = _mm256_or_si256 (_mm256_cmplt_epi8 (r0 , _mm256_set1_epi8 ( 'a' )), _mm256_cmpgt_epi8 (r0 , _mm256_set1_epi8 ( 'z' )));
175+
176+ // Set the 6th bit to 0 only for lowercase characters.
177+ _mm_storeu_si256 ((__m256i * )str , _mm256_andnot_si256 (_mm256_andnot_si256 (maskaz , _mm256_set1_epi8 (0x20 )),r0 ));
178+ len -= 32 ;
179+ str += 32 ;
180+ }
181+ while (len >= 16 ) {
182+ // Unaligned load.
183+ __m128i r0 = _mm_loadu_si128 ((__m128i * )str );
184+ // It is also possible to perform aligned loads by skipping enough characters in the front
185+
186+ // maskaz contains 0x00 where character between 'a' and 'z', 0xff otherwise.
187+ __m128i maskaz = _mm_or_si128 (_mm_cmplt_epi8 (r0 , _mm_set1_epi8 ( 'a' )), _mm_cmpgt_epi8 (r0 , _mm_set1_epi8 ( 'z' )));
188+
189+ // Set the 6th bit to 0 only for lowercase characters.
190+ _mm_storeu_si128 ((__m128i * )str , _mm_andnot_si128 (_mm_andnot_si128 (maskaz , _mm_set1_epi8 (0x20 )),r0 ));
191+ len -= 16 ;
192+ str += 16 ;
193+ }
194+ while (len -- > 0 ) {
195+ * str = toupper (* str );
196+ ++ str ;
197+ }
198+ }
199+ #elif defined(__SSE2__ ) || EMU_AVX
135200
136201/* A SIMD function for SSE2 which changes all uppercase ASCII digits to lowercase. */
137202void StringToLower (char * str ,size_t len ){
0 commit comments