Skip to content

Commit f759b1c

Browse files
committed
fix: support utf8 string and add some test cases:
1. column number support utf8 string located 2. add some line and column number test cases 3. add utf8_str_len function 4. error object has correct columnNumber and lineNumber properties
1 parent 975477e commit f759b1c

File tree

8 files changed

+7530
-7230
lines changed

8 files changed

+7530
-7230
lines changed

include/quickjs/cutils.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ static inline void dbuf_set_error(DynBuf *s)
277277

278278
int unicode_to_utf8(uint8_t *buf, unsigned int c);
279279
int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp);
280+
int utf8_str_len(const uint8_t *p_start, const uint8_t *p_end);
280281

281282
static inline int from_hex(int c)
282283
{

qjscalc.c

Lines changed: 5250 additions & 5232 deletions
Large diffs are not rendered by default.

repl.c

Lines changed: 1978 additions & 1981 deletions
Large diffs are not rendered by default.

src/core/parser.c

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,15 @@ static void __attribute((unused)) dump_token(JSParseState *s,
129129
}
130130
}
131131

132+
static int calc_column_position(JSParseState *s) {
133+
if(s->column_last_ptr > s->column_ptr) {
134+
s->column_num_count += utf8_str_len(s->column_ptr, s->column_last_ptr);
135+
s->column_ptr = s->column_last_ptr;
136+
}
137+
138+
return s->column_num_count;
139+
}
140+
132141
int __attribute__((format(printf, 2, 3))) js_parse_error(JSParseState *s, const char *fmt, ...)
133142
{
134143
JSContext *ctx = s->ctx;
@@ -142,7 +151,7 @@ int __attribute__((format(printf, 2, 3))) js_parse_error(JSParseState *s, const
142151
if (s->cur_func && s->cur_func->backtrace_barrier)
143152
backtrace_flags = JS_BACKTRACE_FLAG_SINGLE_LEVEL;
144153

145-
int column_num = s->column_last_ptr - s->column_ptr;
154+
int column_num = calc_column_position(s);
146155
build_backtrace(ctx, ctx->rt->current_exception,
147156
s->filename, s->line_num,
148157
column_num < 0 ? -1 : column_num,
@@ -183,7 +192,7 @@ static __exception int js_parse_template_part(JSParseState *s, const uint8_t *p)
183192
{
184193
uint32_t c;
185194
StringBuffer b_s, *b = &b_s;
186-
s->token.column_num = s->column_last_ptr - s->column_ptr;
195+
s->token.column_num = calc_column_position(s);
187196

188197
/* p points to the first byte of the template part */
189198
if (string_buffer_init(s->ctx, b, 32))
@@ -216,8 +225,8 @@ static __exception int js_parse_template_part(JSParseState *s, const uint8_t *p)
216225
}
217226
if (c == '\n') {
218227
s->line_num++;
219-
s->column_last_ptr = p;
220-
s->column_ptr = p;
228+
s->column_ptr = s->column_last_ptr = p;
229+
s->column_num_count = 0;
221230
} else if (c >= 0x80) {
222231
const uint8_t *p_next;
223232
c = unicode_from_utf8(p - 1, UTF8_CHAR_LEN_MAX, &p_next);
@@ -251,7 +260,7 @@ static __exception int js_parse_string(JSParseState *s, int sep,
251260
int ret;
252261
uint32_t c;
253262
StringBuffer b_s, *b = &b_s;
254-
s->token.column_num = s->column_last_ptr - s->column_ptr;
263+
s->token.column_num = calc_column_position(s);
255264

256265
/* string */
257266
if (string_buffer_init(s->ctx, b, 32))
@@ -309,8 +318,8 @@ static __exception int js_parse_string(JSParseState *s, int sep,
309318
p++;
310319
if (sep != '`') {
311320
s->line_num++;
312-
s->column_last_ptr = p;
313-
s->column_ptr = p;
321+
s->column_ptr = s->column_last_ptr = p;
322+
s->column_num_count = 0;
314323
}
315324
continue;
316325
default:
@@ -567,7 +576,6 @@ static JSAtom parse_ident(JSParseState *s, const uint8_t **pp,
567576
return atom;
568577
}
569578

570-
571579
static __exception int next_token(JSParseState *s)
572580
{
573581
const uint8_t *p;
@@ -619,6 +627,7 @@ static __exception int next_token(JSParseState *s)
619627
s->got_lf = TRUE;
620628
s->line_num++;
621629
s->column_ptr = p;
630+
s->column_num_count = 0;
622631
goto redo;
623632
case '\f':
624633
case '\v':
@@ -643,6 +652,7 @@ static __exception int next_token(JSParseState *s)
643652
s->line_num++;
644653
s->got_lf = TRUE; /* considered as LF for ASI */
645654
s->column_ptr = ++p;
655+
s->column_num_count = 0;
646656
} else if (*p == '\r') {
647657
s->got_lf = TRUE; /* considered as LF for ASI */
648658
p++;
@@ -1077,8 +1087,8 @@ static __exception int next_token(JSParseState *s)
10771087
}
10781088

10791089
s->buf_ptr = p;
1080-
if(!s->token.column_num && s->column_last_ptr > s->column_ptr) {
1081-
s->token.column_num = s->column_last_ptr - s->column_ptr;
1090+
if (!s->token.column_num) {
1091+
s->token.column_num = calc_column_position(s);
10821092
}
10831093

10841094
// dump_token(s, &s->token);
@@ -1170,6 +1180,7 @@ __exception int json_next_token(JSParseState *s)
11701180
p++;
11711181
s->line_num++;
11721182
s->column_ptr = p;
1183+
s->column_num_count = 0;
11731184
goto redo;
11741185
case '\f':
11751186
case '\v':
@@ -1202,6 +1213,7 @@ __exception int json_next_token(JSParseState *s)
12021213
if (*p == '\n') {
12031214
s->line_num++;
12041215
s->column_ptr = ++p;
1216+
s->column_num_count = 0;
12051217
} else if (*p == '\r') {
12061218
p++;
12071219
} else if (*p >= 0x80) {
@@ -1312,8 +1324,8 @@ __exception int json_next_token(JSParseState *s)
13121324
}
13131325

13141326
s->buf_ptr = p;
1315-
if(!s->token.column_num && s->column_last_ptr > s->column_ptr){
1316-
s->token.column_num = s->column_last_ptr - s->column_ptr;
1327+
if (!s->token.column_num) {
1328+
s->token.column_num = calc_column_position(s);
13171329
}
13181330

13191331
// dump_token(s, &s->token);
@@ -2354,16 +2366,20 @@ static int __exception js_parse_property_name(JSParseState *s,
23542366
typedef struct JSParsePos {
23552367
int last_line_num;
23562368
int line_num;
2357-
int column_num;
23582369
BOOL got_lf;
23592370
const uint8_t *ptr;
2371+
const uint8_t *column_ptr;
2372+
const uint8_t *column_last_ptr;
2373+
int column_num_count;
23602374
} JSParsePos;
23612375

23622376
static int js_parse_get_pos(JSParseState *s, JSParsePos *sp)
23632377
{
23642378
sp->last_line_num = s->last_line_num;
23652379
sp->line_num = s->token.line_num;
2366-
sp->column_num = s->token.column_num;
2380+
sp->column_ptr = s->column_ptr;
2381+
sp->column_last_ptr = s->column_last_ptr;
2382+
sp->column_num_count = s->column_num_count;
23672383
sp->ptr = s->token.ptr;
23682384
sp->got_lf = s->got_lf;
23692385
return 0;
@@ -2373,7 +2389,9 @@ static __exception int js_parse_seek_token(JSParseState *s, const JSParsePos *sp
23732389
{
23742390
s->token.line_num = sp->last_line_num;
23752391
s->line_num = sp->line_num;
2376-
s->column_last_ptr = sp->ptr;
2392+
s->column_ptr = sp->column_ptr;
2393+
s->column_last_ptr = sp->column_last_ptr;
2394+
s->column_num_count = sp->column_num_count;
23772395
s->buf_ptr = sp->ptr;
23782396
s->got_lf = sp->got_lf;
23792397
return next_token(s);
@@ -12259,6 +12277,8 @@ void js_parse_init(JSContext *ctx, JSParseState *s,
1225912277
s->filename = filename;
1226012278
s->line_num = 1;
1226112279
s->column_ptr = (const uint8_t*)input;
12280+
s->column_last_ptr = s->column_ptr;
12281+
s->column_num_count = 0;
1226212282
s->buf_ptr = (const uint8_t *)input;
1226312283
s->buf_end = s->buf_ptr + input_len;
1226412284
s->token.val = ' ';

src/core/parser.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,7 @@ typedef struct JSParseState {
368368
int line_num; /* line number of current offset */
369369
const uint8_t *column_ptr; /* column head pointer on every line */
370370
const uint8_t *column_last_ptr;
371+
int column_num_count;
371372
const char *filename;
372373
JSToken token;
373374
BOOL got_lf; /* true if got line feed before the current token */

src/core/runtime.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,15 +1137,19 @@ void build_backtrace(JSContext* ctx, JSValueConst error_obj, const char* filenam
11371137
const char* str1;
11381138
JSObject* p;
11391139
BOOL backtrace_barrier;
1140+
int latest_line_num = -1;
1141+
int latest_column_num = -1;
11401142

11411143
js_dbuf_init(ctx, &dbuf);
11421144
if (filename) {
11431145
dbuf_printf(&dbuf, " at %s", filename);
11441146
if (line_num != -1) {
1147+
latest_line_num = line_num;
11451148
dbuf_printf(&dbuf, ":%d", line_num);
11461149
}
11471150

11481151
if (column_num != -1) {
1152+
latest_column_num = column_num;
11491153
dbuf_printf(&dbuf, ":%d", column_num);
11501154
}
11511155

@@ -1186,6 +1190,14 @@ void build_backtrace(JSContext* ctx, JSValueConst error_obj, const char* filenam
11861190
column_num += 1;
11871191
}
11881192

1193+
if (latest_line_num == -1) {
1194+
latest_line_num = line_num;
1195+
}
1196+
1197+
if (latest_column_num == -1) {
1198+
latest_column_num = column_num;
1199+
}
1200+
11891201
atom_str = JS_AtomToCString(ctx, b->debug.filename);
11901202
dbuf_printf(&dbuf, " (%s", atom_str ? atom_str : "<null>");
11911203
JS_FreeCString(ctx, atom_str);
@@ -1220,9 +1232,22 @@ void build_backtrace(JSContext* ctx, JSValueConst error_obj, const char* filenam
12201232
JS_DefinePropertyValue(ctx, error_obj, JS_ATOM_stack, str, JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE);
12211233

12221234
if (line_num != -1) {
1223-
JS_DefinePropertyValue(ctx, error_obj, JS_ATOM_lineNumber, JS_NewInt32(ctx, line_num), JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE);
1235+
JS_DefinePropertyValue(
1236+
ctx,
1237+
error_obj,
1238+
JS_ATOM_lineNumber,
1239+
JS_NewInt32(ctx, latest_line_num),
1240+
JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE
1241+
);
1242+
12241243
if (column_num != -1) {
1225-
JS_DefinePropertyValue(ctx, error_obj, JS_ATOM_columnNumber, JS_NewInt32(ctx, column_num), JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE);
1244+
JS_DefinePropertyValue(
1245+
ctx,
1246+
error_obj,
1247+
JS_ATOM_columnNumber,
1248+
JS_NewInt32(ctx, latest_column_num),
1249+
JS_PROP_WRITABLE | JS_PROP_CONFIGURABLE
1250+
);
12261251
}
12271252
}
12281253
}

src/cutils.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,18 @@ int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp)
303303
return c;
304304
}
305305

306+
int utf8_str_len(const uint8_t *p_start, const uint8_t *p_end) {
307+
int count = 0;
308+
while (p_start < p_end) {
309+
if (!unicode_from_utf8(p_start, UTF8_CHAR_LEN_MAX, &p_start)) {
310+
break;
311+
}
312+
count += 1;
313+
}
314+
315+
return count;
316+
}
317+
306318
#if 0
307319

308320
#if defined(EMSCRIPTEN) || defined(__ANDROID__)

0 commit comments

Comments
 (0)