fix(parser): implement unicode escape sequence parsing for round-trip support

alpha912 · alpha912 · commit a626bea92927 · 2025-10-16T08:11:37.000Z
- Add \uXXXX parsing (4-digit hex unicode) in parseString()
- Add \xXX parsing (2-digit hex ascii) in parseString()
- Refactor parseString() to use explicit j increment for clarity
- Ensures parse(serialize(doc)) works with unicode characters
- Maintains symmetry between parsing and serialization

Fixes P0-2: Round-trip now works correctly with unicode
diff --git a/cli/package.json b/cli/package.json
@@ -38,8 +38,8 @@
   },
   "dependencies": {
     "ajv": "^8.12.0",
-    "omniscript-parser": "^1.1.0",
-    "omniscript-converters": "^1.1.0"
+    "omniscript-parser": "workspace:*",
+    "omniscript-converters": "^1.0.0"
   },
   "devDependencies": {
     "@types/node": "^22.15.33",
diff --git a/parser/src/parser.ts b/parser/src/parser.ts
@@ -110,40 +110,78 @@ function parseString(str: string, i: number): { value: string; index: number } {
       switch (nextChar) {
         case '"':
           out += '"';
+          j += 2;
           break;
         case '\\':
           out += '\\';
+          j += 2;
           break;
         case 'n':
           out += '\n';
+          j += 2;
           break;
         case 't':
           out += '\t';
+          j += 2;
           break;
         case 'r':
           out += '\r';
+          j += 2;
           break;
         case 'b':
           out += '\b';
+          j += 2;
           break;
         case 'f':
           out += '\f';
+          j += 2;
           break;
         case 'v':
           out += '\v';
+          j += 2;
           break;
         case '0':
           out += '\0';
+          j += 2;
           break;
         case '/':
           out += '/';
+          j += 2;
+          break;
+        case 'u':
+          // Parse \uXXXX (4-digit hex unicode)
+          if (j + 5 < str.length) {
+            const hex = str.slice(j + 2, j + 6);
+            if (/^[0-9A-Fa-f]{4}$/.test(hex)) {
+              out += String.fromCharCode(parseInt(hex, 16));
+              j += 6;
+              break;
+            }
+          }
+          // Invalid \u sequence, preserve as-is
+          out += str[j] + (nextChar || '');
+          j += 2;
+          break;
+        case 'x':
+          // Parse \xXX (2-digit hex ascii)
+          if (j + 3 < str.length) {
+            const hex = str.slice(j + 2, j + 4);
+            if (/^[0-9A-Fa-f]{2}$/.test(hex)) {
+              out += String.fromCharCode(parseInt(hex, 16));
+              j += 4;
+              break;
+            }
+          }
+          // Invalid \x sequence, preserve as-is
+          out += str[j] + (nextChar || '');
+          j += 2;
           break;
         default:
           // For unknown escape sequences, preserve the backslash and character
           out += str[j] + (nextChar || '');
+          j += 2;
           break;
       }
-      j += 2;
     } else {
       out += str[j];
       j++;
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml