Skip to content

Commit 7d76490

Browse files
committed
tolower/toupper avx2
1 parent 107a0ad commit 7d76490

File tree

1 file changed

+66
-1
lines changed

1 file changed

+66
-1
lines changed

jsrc/str.c

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,72 @@ static size_t srchr4(unsigned int* str, unsigned int ch, size_t len){
131131
return 0;
132132
}
133133

134-
#if defined(__SSE2__) || EMU_AVX
134+
#if C_AVX2 || EMU_AVX2
135+
136+
/* A SIMD function for SSE2 which changes all uppercase ASCII digits to lowercase. */
137+
void StringToLower(char *str,size_t len){
138+
while (len >= 32) {
139+
__m256i sv = _mm256_loadu_si256(( __m256i*)str);
140+
/* From A */
141+
__m256i rangeshift = _mm256_sub_epi8(sv, _mm256_set1_epi8((char)('A'+256)));
142+
/* To Z */
143+
__m256i nomodify = _mm256_cmpgt_epi8(rangeshift, _mm256_set1_epi8(-256 + 25));
144+
/* ^ ' ' */
145+
_mm_storeu_si256((__m256i*)str, _mm256_xor_si256(sv, _mm256_andnot_si256(nomodify, _mm256_set1_epi8(0x20))));
146+
len -= 32;
147+
str += 32;
148+
}
149+
while (len >= 16) {
150+
__m128i sv = _mm_loadu_si128(( __m128i*)str);
151+
/* From A */
152+
__m128i rangeshift = _mm_sub_epi8(sv, _mm_set1_epi8((char)('A'+128)));
153+
/* To Z */
154+
__m128i nomodify = _mm_cmpgt_epi8(rangeshift, _mm_set1_epi8(-128 + 25));
155+
/* ^ ' ' */
156+
_mm_storeu_si128((__m128i*)str, _mm_xor_si128(sv, _mm_andnot_si128(nomodify, _mm_set1_epi8(0x20))));
157+
len -= 16;
158+
str += 16;
159+
}
160+
while (len-- > 0) {
161+
*str = tolower(*str);
162+
++str;
163+
}
164+
}
165+
166+
/* Same, but to uppercase. */
167+
void StringToUpper(char *str,size_t len){
168+
while (len >= 32) {
169+
// Unaligned load.
170+
__m256i r0 = _mm256_loadu_si256((__m256i*)str);
171+
// It is also possible to perform aligned loads by skipping enough characters in the front
172+
173+
// maskaz contains 0x00 where character between 'a' and 'z', 0xff otherwise.
174+
__m256i maskaz = _mm256_or_si256(_mm256_cmplt_epi8(r0, _mm256_set1_epi8( 'a' )), _mm256_cmpgt_epi8(r0, _mm256_set1_epi8( 'z' )));
175+
176+
// Set the 6th bit to 0 only for lowercase characters.
177+
_mm_storeu_si256((__m256i*)str, _mm256_andnot_si256(_mm256_andnot_si256(maskaz, _mm256_set1_epi8(0x20)),r0));
178+
len -= 32;
179+
str += 32;
180+
}
181+
while (len >= 16) {
182+
// Unaligned load.
183+
__m128i r0 = _mm_loadu_si128((__m128i*)str);
184+
// It is also possible to perform aligned loads by skipping enough characters in the front
185+
186+
// maskaz contains 0x00 where character between 'a' and 'z', 0xff otherwise.
187+
__m128i maskaz = _mm_or_si128(_mm_cmplt_epi8(r0, _mm_set1_epi8( 'a' )), _mm_cmpgt_epi8(r0, _mm_set1_epi8( 'z' )));
188+
189+
// Set the 6th bit to 0 only for lowercase characters.
190+
_mm_storeu_si128((__m128i*)str, _mm_andnot_si128(_mm_andnot_si128(maskaz, _mm_set1_epi8(0x20)),r0));
191+
len -= 16;
192+
str += 16;
193+
}
194+
while (len-- > 0) {
195+
*str = toupper(*str);
196+
++str;
197+
}
198+
}
199+
#elif defined(__SSE2__) || EMU_AVX
135200

136201
/* A SIMD function for SSE2 which changes all uppercase ASCII digits to lowercase. */
137202
void StringToLower(char *str,size_t len){

0 commit comments

Comments
 (0)