@@ -46,7 +46,6 @@ func genExamples(current string, ranges []byteRange) []string {
4646}
4747
4848func TestValid (t * testing.T ) {
49-
5049 var examples = []string {
5150 // Tests copied from the stdlib
5251 "" ,
@@ -57,85 +56,87 @@ func TestValid(t *testing.T) {
5756 "брэд-ЛГТМ" ,
5857 "☺☻☹" ,
5958
60- // // overlong
61- // "\xE0\x80",
62- // // unfinished continuation
63- // "aa\xE2",
59+ // overlong
60+ "\xE0 \x80 " ,
61+ // unfinished continuation
62+ "aa\xE2 " ,
6463
65- // string([]byte{66, 250}),
64+ string ([]byte {66 , 250 }),
6665
67- // string([]byte{66, 250, 67}),
66+ string ([]byte {66 , 250 , 67 }),
6867
69- // "a\uFFFDb",
68+ "a\uFFFD b" ,
7069
71- // "\xF4\x8F\xBF\xBF", // U+10FFFF
70+ "\xF4 \x8F \xBF \xBF " , // U+10FFFF
7271
73- // "\xF4\x90\x80\x80", // U+10FFFF+1; out of range
74- // "\xF7\xBF\xBF\xBF", // 0x1FFFFF; out of range
72+ "\xF4 \x90 \x80 \x80 " , // U+10FFFF+1; out of range
73+ "\xF7 \xBF \xBF \xBF " , // 0x1FFFFF; out of range
7574
76- // "\xFB\xBF\xBF\xBF\xBF", // 0x3FFFFFF; out of range
75+ "\xFB \xBF \xBF \xBF \xBF " , // 0x3FFFFFF; out of range
7776
78- // "\xc0\x80", // U+0000 encoded in two bytes: incorrect
79- // "\xed\xa0\x80", // U+D800 high surrogate (sic)
80- // "\xed\xbf\xbf", // U+DFFF low surrogate (sic)
77+ "\xc0 \x80 " , // U+0000 encoded in two bytes: incorrect
78+ "\xed \xa0 \x80 " , // U+D800 high surrogate (sic)
79+ "\xed \xbf \xbf " , // U+DFFF low surrogate (sic)
8180
82- // // valid at boundary
83- // strings.Repeat("a", 32+28) + "☺☻☹",
84- // strings.Repeat("a", 32+29) + "☺☻☹",
85- // strings.Repeat("a", 32+30) + "☺☻☹",
86- // strings.Repeat("a", 32+31) + "☺☻☹",
87- // // invalid at boundary
88- // strings.Repeat("a", 32+31) + "\xE2a",
81+ // valid at boundary
82+ strings .Repeat ("a" , 32 + 28 ) + "☺☻☹" ,
83+ strings .Repeat ("a" , 32 + 29 ) + "☺☻☹" ,
84+ strings .Repeat ("a" , 32 + 30 ) + "☺☻☹" ,
85+ strings .Repeat ("a" , 32 + 31 ) + "☺☻☹" ,
86+ // invalid at boundary
87+ strings .Repeat ("a" , 32 + 31 ) + "\xE2 a" ,
8988
90- // // same inputs as benchmarks
91- // "0123456789",
92- // "日本語日本語日本語日",
93- // "\xF4\x8F\xBF\xBF",
89+ // same inputs as benchmarks
90+ "0123456789" ,
91+ "日本語日本語日本語日" ,
92+ "\xF4 \x8F \xBF \xBF " ,
9493
95- // // bugs found with fuzzing
96- // "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\xc60",
97- // "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\xc300",
98- // "߀0000000000000000000000000000訨",
99- // "0000000000000000000000000000000˂00000000000000000000000000000000",
94+ // bugs found with fuzzing
95+ "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\xc6 0" ,
96+ "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000\xc3 00" ,
97+ "߀0000000000000000000000000000訨" ,
98+ "0000000000000000000000000000000˂00000000000000000000000000000000" ,
10099 }
101100
102- // any := byteRange{0, 0xFF}
103- // ascii := byteRange{0, 0x7F}
104- // cont := byteRange{0x80, 0xBF}
101+ any := byteRange {0 , 0xFF }
102+ ascii := byteRange {0 , 0x7F }
103+ cont := byteRange {0x80 , 0xBF }
105104
106105 rangesToTest := [][]byteRange {
107- // {one(0x20), ascii, ascii, ascii},
108-
109- // // 2-byte sequences
110- // {one(0xC2)},
111- // {one(0xC2), ascii},
112- // {one(0xC2), cont},
113- // {one(0xC2), {0xC0, 0xFF}},
114- // {one(0xC2), cont, cont},
115- // {one(0xC2), cont, cont, cont},
116-
117- // // 3-byte sequences
118- // {one(0xE1)},
119- // {one(0xE1), cont},
120- // {one(0xE1), cont, cont},
121- // {one(0xE1), cont, cont, ascii},
122- // {one(0xE1), cont, ascii},
123- // {one(0xE1), cont, cont, cont},
124-
125- // // 4-byte sequences
126- // {one(0xF1)},
127- // {one(0xF1), cont},
128- // {one(0xF1), cont, cont},
129- // {one(0xF1), cont, cont, cont},
130- // {one(0xF1), cont, cont, ascii},
131- // {one(0xF1), cont, cont, cont, ascii},
132-
133- // // overlong
134- // {{0xC0, 0xC1}, any},
135- // {{0xC0, 0xC1}, any, any},
136- // {{0xC0, 0xC1}, any, any, any},
137- // {one(0xE0), {0x0, 0x9F}, cont},
138- // {one(0xE0), {0xA0, 0xBF}, cont},
106+ {one (0x20 ), ascii , ascii , ascii },
107+
108+ {one (0x04 ), ascii , ascii , ascii },
109+
110+ // 2-byte sequences
111+ {one (0xC2 )},
112+ {one (0xC2 ), ascii },
113+ {one (0xC2 ), cont },
114+ {one (0xC2 ), {0xC0 , 0xFF }},
115+ {one (0xC2 ), cont , cont },
116+ {one (0xC2 ), cont , cont , cont },
117+
118+ // 3-byte sequences
119+ {one (0xE1 )},
120+ {one (0xE1 ), cont },
121+ {one (0xE1 ), cont , cont },
122+ {one (0xE1 ), cont , cont , ascii },
123+ {one (0xE1 ), cont , ascii },
124+ {one (0xE1 ), cont , cont , cont },
125+
126+ // 4-byte sequences
127+ {one (0xF1 )},
128+ {one (0xF1 ), cont },
129+ {one (0xF1 ), cont , cont },
130+ {one (0xF1 ), cont , cont , cont },
131+ {one (0xF1 ), cont , cont , ascii },
132+ {one (0xF1 ), cont , cont , cont , ascii },
133+
134+ // overlong
135+ {{0xC0 , 0xC1 }, any },
136+ {{0xC0 , 0xC1 }, any , any },
137+ {{0xC0 , 0xC1 }, any , any , any },
138+ {one (0xE0 ), {0x0 , 0x9F }, cont },
139+ {one (0xE0 ), {0xA0 , 0xBF }, cont },
139140 }
140141
141142 for _ , r := range rangesToTest {
@@ -165,7 +166,7 @@ func TestValid(t *testing.T) {
165166
166167 t .Run ("boundary-" + tt , func (t * testing.T ) {
167168 size := 32 - len (tt )
168- prefix := strings .Repeat ("q " , size )
169+ prefix := strings .Repeat ("a " , size )
169170 b := []byte (prefix + tt )
170171 check (t , b )
171172 })
@@ -195,7 +196,6 @@ func TestValid(t *testing.T) {
195196}
196197
197198func TestValidPageBoundary (t * testing.T ) {
198-
199199 buf , err := buffer .New (64 )
200200 if err != nil {
201201 t .Fatal (err )
@@ -233,8 +233,7 @@ func check(t *testing.T, b []byte) {
233233 if err != nil {
234234 panic (err )
235235 }
236- fmt .Println ("qwe\t Valid(b)" , Valid (b ))
237- fmt .Println ("qwe\t utf8.Valid(b)" , utf8 .Valid (b ))
236+
238237 t .Errorf ("Valid(%q) = %v; want %v" , string (b ), ! expected , expected )
239238 }
240239
@@ -246,9 +245,7 @@ func check(t *testing.T, b []byte) {
246245
247246 expected = ascii .Valid (b )
248247 if v .IsASCII () != expected {
249- // t.Errorf("qwe\tValid(b) %q", ascii.Valid(b))
250- t .Errorf ("qwe\t ascii.Valid(b) %v" , ascii .Valid (b ))
251- t .Errorf ("qwe\t ascii.Valid(b) %v" , Valid (b ))
248+ t .Errorf ("STRING(%q): %v" , b , string (b ))
252249 t .Errorf ("Validate(%q) ascii valid: %v; want %v" , string (b ), ! expected , expected )
253250 }
254251}
@@ -259,7 +256,7 @@ var someutf8 = []byte("\xF4\x8F\xBF\xBF")
259256
260257func BenchmarkValid (b * testing.B ) {
261258 impls := map [string ]func ([]byte ) bool {
262- "SIMD" : Valid ,
259+ "AVX" : Valid ,
263260 "Stdlib" : utf8 .Valid ,
264261 }
265262
0 commit comments