Skip to content

Commit a626bea

Browse files
committed
fix(parser): implement unicode escape sequence parsing for round-trip support
- Add \uXXXX parsing (4-digit hex unicode) in parseString() - Add \xXX parsing (2-digit hex ascii) in parseString() - Refactor parseString() to use explicit j increment for clarity - Ensures parse(serialize(doc)) works with unicode characters - Maintains symmetry between parsing and serialization Fixes P0-2: Round-trip now works correctly with unicode
1 parent 7311836 commit a626bea

File tree

3 files changed

+899
-682
lines changed

3 files changed

+899
-682
lines changed

cli/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@
3838
},
3939
"dependencies": {
4040
"ajv": "^8.12.0",
41-
"omniscript-parser": "^1.1.0",
42-
"omniscript-converters": "^1.1.0"
41+
"omniscript-parser": "workspace:*",
42+
"omniscript-converters": "^1.0.0"
4343
},
4444
"devDependencies": {
4545
"@types/node": "^22.15.33",

parser/src/parser.ts

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,40 +110,78 @@ function parseString(str: string, i: number): { value: string; index: number } {
110110
switch (nextChar) {
111111
case '"':
112112
out += '"';
113+
j += 2;
113114
break;
114115
case '\\':
115116
out += '\\';
117+
j += 2;
116118
break;
117119
case 'n':
118120
out += '\n';
121+
j += 2;
119122
break;
120123
case 't':
121124
out += '\t';
125+
j += 2;
122126
break;
123127
case 'r':
124128
out += '\r';
129+
j += 2;
125130
break;
126131
case 'b':
127132
out += '\b';
133+
j += 2;
128134
break;
129135
case 'f':
130136
out += '\f';
137+
j += 2;
131138
break;
132139
case 'v':
133140
out += '\v';
141+
j += 2;
134142
break;
135143
case '0':
136144
out += '\0';
145+
j += 2;
137146
break;
138147
case '/':
139148
out += '/';
149+
j += 2;
150+
break;
151+
case 'u':
152+
// Parse \uXXXX (4-digit hex unicode)
153+
if (j + 5 < str.length) {
154+
const hex = str.slice(j + 2, j + 6);
155+
if (/^[0-9A-Fa-f]{4}$/.test(hex)) {
156+
out += String.fromCharCode(parseInt(hex, 16));
157+
j += 6;
158+
break;
159+
}
160+
}
161+
// Invalid \u sequence, preserve as-is
162+
out += str[j] + (nextChar || '');
163+
j += 2;
164+
break;
165+
case 'x':
166+
// Parse \xXX (2-digit hex ascii)
167+
if (j + 3 < str.length) {
168+
const hex = str.slice(j + 2, j + 4);
169+
if (/^[0-9A-Fa-f]{2}$/.test(hex)) {
170+
out += String.fromCharCode(parseInt(hex, 16));
171+
j += 4;
172+
break;
173+
}
174+
}
175+
// Invalid \x sequence, preserve as-is
176+
out += str[j] + (nextChar || '');
177+
j += 2;
140178
break;
141179
default:
142180
// For unknown escape sequences, preserve the backslash and character
143181
out += str[j] + (nextChar || '');
182+
j += 2;
144183
break;
145184
}
146-
j += 2;
147185
} else {
148186
out += str[j];
149187
j++;

0 commit comments

Comments
 (0)