Skip to content

Commit d9b0258

Browse files
author
Your Name
committed
1
1 parent 1cfacc8 commit d9b0258

File tree

9 files changed

+335
-74
lines changed

9 files changed

+335
-74
lines changed

utf8/test.out.txt

Whitespace-only changes.

utf8/valid.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import (
99
type Validation byte
1010

1111
const (
12-
Invalid = 0
12+
Invalid = 0b00
1313
UTF8 = 0b01
1414
ASCII = 0b10 | UTF8
1515
)

utf8/valid_amd64.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

utf8/valid_arm64.go

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

utf8/valid_arm64.s

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
// TODO: license
2+
3+
//go:build !purego
4+
5+
#include "textflag.h"
6+
7+
// func validateNEON(p []byte) byte
8+
TEXT ·validateNEON(SB),NOSPLIT,$0-25
9+
MOVD s_base+0(FP), R10
10+
MOVD s_len+8(FP), R11
11+
CBZ R11, valid
12+
CMP $16, R11
13+
BLT small
14+
15+
VMOVQ $0x8080808080808080, $0x8080808080808080, V0
16+
17+
ascii_loop:
18+
CMP $16, R11
19+
BLT small
20+
21+
VLD1 (R10), [V1.B16]
22+
VCMTST V1.B16, V0.B16, V2.B16
23+
VMOV V2.D[0], R2
24+
VMOV V2.D[1], R3
25+
ORR R2, R3, R2
26+
CBNZ R2, stop_ascii
27+
28+
ADD $16, R10
29+
SUB $16, R11
30+
B ascii_loop
31+
32+
stop_ascii:
33+
VMOVQ $0x0202020202020202, $0x4915012180808080, V11
34+
VMOVQ $0xcbcbcb8b8383a3e7, $0xcbcbdbcbcbcbcbcb, V13
35+
VMOVQ $0x0101010101010101, $0x01010101babaaee6, V15
36+
VMOVQ $0x0F0F0F0F0F0F0F0F, $0x0F0F0F0F0F0F0F0F, V18
37+
VMOVQ $0x0707070707070707, $0x0707070707070707, V12
38+
VMOVQ $0xFFFFFFFFFFFFFFFF, $0xFFFFFFFFFFFFFFFF, V14
39+
VMOVQ $0x7F7F7F7F7F7F7F7F, $0x7F7F7F7F7F7F7F7F, V16
40+
VMOVQ $0xDFDFDFDFDFDFDFDF, $0xDFDFDFDFDFDFDFDF, V17
41+
VMOVQ $0x0808080808080808, $0x0808080808080808, V19
42+
VMOVQ $0x8080808080808080, $0x8080808080808080, V20
43+
VMOVQ $0x0000000000000000, $0x0000000000000000, V30
44+
VMOVQ $0x0000000000000000, $0x0000000000000000, V3
45+
46+
aligned_loop:
47+
VLD1.P 16(R10), [V4.B16]
48+
VEXT $15, V4.B16, V3.B16, V5.B16
49+
VUSHR $4, V5.B16, V6.B16
50+
VTBL V6.B16, [V11.B16], V6.B16
51+
VAND V5.B16, V18.B16, V7.B16
52+
VTBL V7.B16, [V13.B16], V7.B16
53+
VUSHR $4, V4.B16, V8.B16
54+
VTBL V8.B16, [V15.B16], V8.B16
55+
VAND V6.B16, V7.B16, V9.B16
56+
VAND V9.B16, V8.B16, V10.B16
57+
VEXT $14, V4.B16, V3.B16, V5.B16
58+
VUSHR $5, V5.B16, V6.B16
59+
VCMEQ V12.B16, V6.B16, V6.B16
60+
VEXT $13, V4.B16, V3.B16, V5.B16
61+
VUSHR $4, V5.B16, V9.B16
62+
VCMEQ V18.B16, V9.B16, V9.B16
63+
VORR V6.B16, V9.B16, V9.B16
64+
VAND V9.B16, V20.B16, V9.B16
65+
VSUB V9.B16, V10.B16, V9.B16
66+
VMOV V9.D[0], R1
67+
VMOV V9.D[1], R2
68+
ORR R1, R2, R1
69+
CBNZ R1, no_valid
70+
VMOV V4.B16, V3.B16
71+
SUB $16, R11, R11
72+
CMP $16, R11
73+
74+
BGE aligned_loop
75+
76+
B small_no_const
77+
78+
small:
79+
VMOVQ $0x0202020202020202, $0x4915012180808080, V11
80+
VMOVQ $0xcbcbcb8b8383a3e7, $0xcbcbdbcbcbcbcbcb, V13
81+
VMOVQ $0x0101010101010101, $0x01010101babaaee6, V15
82+
VMOVQ $0x0F0F0F0F0F0F0F0F, $0x0F0F0F0F0F0F0F0F, V18
83+
VMOVQ $0x0707070707070707, $0x0707070707070707, V12
84+
VMOVQ $0xFFFFFFFFFFFFFFFF, $0xFFFFFFFFFFFFFFFF, V14
85+
VMOVQ $0x7F7F7F7F7F7F7F7F, $0x7F7F7F7F7F7F7F7F, V16
86+
VMOVQ $0xDFDFDFDFDFDFDFDF, $0xDFDFDFDFDFDFDFDF, V17
87+
VMOVQ $0x0808080808080808, $0x0808080808080808, V19
88+
VMOVQ $0x8080808080808080, $0x8080808080808080, V20
89+
VMOVQ $0x0000000000000000, $0x0000000000000000, V30
90+
VMOVQ $0x0000000000000000, $0x0000000000000000, V3
91+
92+
small_no_const:
93+
94+
SUB $16, R10, R10
95+
ADD R11, R10, R10
96+
VLD1.P 16(R10), [V4.B16]
97+
98+
ADR shift_table, R2
99+
MOVW R11, R3
100+
LSL $2, R3
101+
ADD R3, R2
102+
B (R2)
103+
104+
105+
shift_table:
106+
B do_shift_0
107+
B do_shift_1
108+
B do_shift_2
109+
B do_shift_3
110+
B do_shift_4
111+
B do_shift_5
112+
B do_shift_6
113+
B do_shift_7
114+
B do_shift_8
115+
B do_shift_9
116+
B do_shift_10
117+
B do_shift_11
118+
B do_shift_12
119+
B do_shift_13
120+
B do_shift_14
121+
B do_shift_15
122+
123+
do_shift_0:
124+
VMOVQ $0x6161616161616161, $0x6161616161616161, V4
125+
B end_swith
126+
do_shift_1:
127+
VEXT $15, V30.B16, V4.B16, V4.B16
128+
B end_swith
129+
do_shift_2:
130+
VEXT $14, V30.B16, V4.B16, V4.B16
131+
B end_swith
132+
do_shift_3:
133+
VEXT $13, V30.B16, V4.B16, V4.B16
134+
B end_swith
135+
do_shift_4:
136+
VEXT $12, V30.B16, V4.B16, V4.B16
137+
B end_swith
138+
do_shift_5:
139+
VEXT $11, V30.B16, V4.B16, V4.B16
140+
B end_swith
141+
do_shift_6:
142+
VEXT $10, V30.B16, V4.B16, V4.B16
143+
B end_swith
144+
do_shift_7:
145+
VEXT $9, V30.B16, V4.B16, V4.B16
146+
B end_swith
147+
do_shift_8:
148+
VEXT $8, V30.B16, V4.B16, V4.B16
149+
B end_swith
150+
do_shift_9:
151+
VEXT $7, V30.B16, V4.B16, V4.B16
152+
B end_swith
153+
do_shift_10:
154+
VEXT $6, V30.B16, V4.B16, V4.B16
155+
B end_swith
156+
do_shift_11:
157+
VEXT $5, V30.B16, V4.B16, V4.B16
158+
B end_swith
159+
do_shift_12:
160+
VEXT $4, V30.B16, V4.B16, V4.B16
161+
B end_swith
162+
do_shift_13:
163+
VEXT $3, V30.B16, V4.B16, V4.B16
164+
B end_swith
165+
do_shift_14:
166+
VEXT $2, V30.B16, V4.B16, V4.B16
167+
B end_swith
168+
do_shift_15:
169+
VEXT $1, V30.B16, V4.B16, V4.B16
170+
B end_swith
171+
172+
end_swith:
173+
VEXT $15, V4.B16, V3.B16, V5.B16
174+
VUSHR $4, V5.B16, V6.B16
175+
VTBL V6.B16, [V11.B16], V6.B16
176+
VAND V5.B16, V18.B16, V7.B16
177+
VTBL V7.B16, [V13.B16], V7.B16
178+
VUSHR $4, V4.B16, V8.B16
179+
VTBL V8.B16, [V15.B16], V8.B16
180+
VAND V6.B16, V7.B16, V9.B16
181+
VAND V9.B16, V8.B16, V10.B16
182+
183+
VEXT $14, V4.B16, V3.B16, V5.B16
184+
VUSHR $5, V5.B16, V6.B16
185+
VCMEQ V12.B16, V6.B16, V6.B16
186+
187+
VEXT $13, V4.B16, V3.B16, V5.B16
188+
VUSHR $4, V5.B16, V9.B16
189+
VCMEQ V18.B16, V9.B16, V9.B16
190+
VORR V6.B16, V9.B16, V9.B16
191+
192+
VAND V9.B16, V20.B16, V9.B16
193+
VSUB V9.B16, V10.B16, V9.B16
194+
VMOV V9.D[0], R1
195+
VMOV V9.D[1], R2
196+
ORR R1, R2, R1
197+
CBNZ R1, no_valid
198+
199+
valid:
200+
MOVD $1, R0
201+
MOVD R0, ret+24(FP)
202+
RET
203+
204+
no_valid:
205+
MOVD $0, R0
206+
MOVD R0, ret+24(FP)
207+
RET
208+
209+
210+
end_7:
211+
MOVD $7, R0
212+
MOVD R0, ret+24(FP)
213+
RET
214+
215+
end_R11:
216+
MOVD R11, R0
217+
MOVD R0, ret+24(FP)
218+
RET
219+
220+
221+
ret7:
222+
MOVD $7, R0
223+
MOVD R0, ret+24(FP) // Возвращаем 0 (строка не валидна)
224+
RET
225+
///////////////////////////
226+

utf8/valid_default.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
//go:build purego || !amd64
2-
// +build purego !amd64
1+
//go:build purego
2+
// +build purego
33

44
package utf8
55

utf8/valid_support_amd64.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
//go:build !purego
2-
// +build !purego
1+
//go:build !purego || amd64
2+
// +build !purego amd64
33

44
package utf8
55

utf8/valid_support_arm64.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//go:build !purego || arm64
2+
// +build !purego arm64
3+
4+
package utf8
5+
6+
import (
7+
"github.com/segmentio/asm/cpu"
8+
"github.com/segmentio/asm/cpu/arm64"
9+
)
10+
11+
var noNEON = !cpu.ARM64.Has(arm64.ASIMD)
12+
13+
// Validate is a more precise version of Valid that also indicates whether the
14+
// input was valid ASCII.
15+
func Validate(p []byte) Validation {
16+
if noNEON || len(p) < 32 {
17+
return validate(p)
18+
}
19+
r := validateNEON(p)
20+
return Validation(r)
21+
}

0 commit comments

Comments
 (0)