diff --git a/src/Parser.mo b/src/Parser.mo index 41fe186..9c80e05 100644 --- a/src/Parser.mo +++ b/src/Parser.mo @@ -50,32 +50,32 @@ module { private func parseObject() : Result.Result { advance(); - var fields : [(Text, Types.Json)] = []; + let fields = Buffer.Buffer<(Text, Types.Json)>(8); switch (current()) { case (?#endObject) { advance(); - #ok(#object_(fields)); + #ok(#object_(Buffer.toArray(fields))); }; case (?#string(_)) { switch (parseMember()) { case (#err(e)) { #err(e) }; case (#ok(field)) { - fields := [(field.0, field.1)]; + fields.add(field); loop { switch (current()) { case (?#valueSeperator) { advance(); switch (parseMember()) { case (#ok(next)) { - fields := Array.append(fields, [(next.0, next.1)]); + fields.add(next); }; case (#err(e)) { return #err(e) }; }; }; case (?#endObject) { advance(); - return #ok(#object_(fields)); + return #ok(#object_(Buffer.toArray(fields))); }; case (null) { return #err(#unexpectedEOF) }; case (_) { @@ -114,12 +114,12 @@ module { private func parseArray() : Result.Result { advance(); - var elements : [Types.Json] = []; + let elements = Buffer.Buffer(16); switch (current()) { case (?#endArray) { advance(); - #ok(#array(elements)); + #ok(#array(Buffer.toArray(elements))); }; case (null) { #err(#unexpectedEOF); @@ -128,21 +128,21 @@ module { switch (parseValue()) { case (#err(e)) { #err(e) }; case (#ok(value)) { - elements := [value]; + elements.add(value); loop { switch (current()) { case (?#valueSeperator) { advance(); switch (parseValue()) { case (#ok(next)) { - elements := Array.append(elements, [next]); + elements.add(next); }; case (#err(e)) { return #err(e) }; }; }; case (?#endArray) { advance(); - return #ok(#array(elements)); + return #ok(#array(Buffer.toArray(elements))); }; case (null) { return #err(#unexpectedEOF) }; case (_) { diff --git a/src/Types.mo b/src/Types.mo index 3a2e038..78261f7 100644 --- a/src/Types.mo +++ b/src/Types.mo @@ -6,6 +6,8 @@ import Float "mo:base/Float"; import Bool "mo:base/Bool"; import Iter "mo:base/Iter"; import Array "mo:base/Array"; +import Buffer "mo:base/Buffer"; +import Nat32 "mo:base/Nat32"; module { public type Path = Text; @@ -132,6 +134,52 @@ module { let arr = Text.toArray(t); arr[i]; }; + func to4DigitHex(n: Nat32) : Text { + let hex_chars = "0123456789abcdef"; + var s = ""; + var i = n; + var counter : Nat = 0; + + while (counter < 4) { + // Get the last 4 bits to find the hex character index. + let index = Nat32.toNat(i & 0xF); + // Prepend the character to build the string in the correct order. + s := Text.fromChar(Text.toArray(hex_chars)[index]) # s; + // Shift bits for the next character. + i >>= 4; + // Increment the counter. + counter += 1; + }; + return s; + }; + // A helper function to correctly escape a string for JSON. + public func escape(s: Text) : Text { + let buf = Buffer.Buffer(s.size()); // Pre-allocate buffer for performance. + for (c in s.chars()) { + switch (c) { + case ('\"') { buf.add("\\\"") }; + case ('\\') { buf.add("\\\\") }; + case ('\n') { buf.add("\\n") }; + case ('\r') { buf.add("\\r") }; + case ('\t') { buf.add("\\t") }; + // Note: Motoko Char doesn't have literals for \b and \f, + // so we handle them in the default case via their code points. + case _ { + let code = Char.toNat32(c); + if (code == 0x8) { // Backspace + buf.add("\\b"); + } else if (code == 0xC) { // Form feed + buf.add("\\f"); + } else if (code < 32) { // Other control characters (U+0000 to U+001F) + buf.add("\\u" # to4DigitHex(code)); + } else { // A regular, non-special character. + buf.add(Text.fromChar(c)); + }; + }; + }; + }; + return Buffer.foldLeft(buf, "", func(acc, part) { acc # part }); + }; public func toText(json : Json) : Text { switch (json) { case (#object_(entries)) { @@ -156,7 +204,7 @@ module { }; result # "]"; }; - case (#string(text)) { "\"" # text # "\"" }; + case (#string(text)) { "\"" # escape(text) # "\"" }; case (#number(#int(n))) { Int.toText(n) }; case (#number(#float(n))) { Float.format(#exact, n) }; case (#bool(b)) { Bool.toText(b) }; diff --git a/test/lib.test.mo b/test/lib.test.mo index 60c217e..ccb9f48 100644 --- a/test/lib.test.mo +++ b/test/lib.test.mo @@ -308,3 +308,110 @@ test( }; }, ); + +test( + "stringify - special character escaping (Comprehensive)", + func() { + type TestCase = { + name: Text; + value : Json.Json; + expectedText : Text; + }; + + // A comprehensive list of test cases covering the JSON spec. + let testCases : [TestCase] = [ + // --- Basic Required Escapes --- + { + name = "String with quotes"; + value = #string("hello \"world\""); + expectedText = "\"hello \\\"world\\\"\""; + }, + { + name = "String with backslash"; + value = #string("C:\\Users\\"); + expectedText = "\"C:\\\\Users\\\\\""; + }, + { + name = "String with newline"; + value = #string("line1\nline2"); + expectedText = "\"line1\\nline2\""; + }, + { + name = "String with carriage return"; + value = #string("line1\rline2"); + expectedText = "\"line1\\rline2\""; + }, + { + name = "String with tab"; + value = #string("col1\tcol2"); + expectedText = "\"col1\\tcol2\""; + }, + { + // Motoko uses \u{...} for unicode literals. 0x8 is backspace. + name = "String with backspace (\\b)"; + value = #string("a\u{8}b"); + expectedText = "\"a\\bb\""; + }, + { + // 0xC is form feed. + name = "String with form feed (\\f)"; + value = #string("a\u{c}b"); + expectedText = "\"a\\fb\""; + }, + + // --- Control Character Escapes (\uXXXX) --- + { + // U+0000 (null character) must be escaped. + name = "Control character NULL (U+0000)"; + value = #string("\u{0}"); + expectedText = "\"\\u0000\""; + }, + { + // U+001F (unit separator) is the last control character. + name = "Control character Unit Separator (U+001F)"; + value = #string("\u{1f}"); + expectedText = "\"\\u001f\""; + }, + + // --- Edge Cases and Combinations --- + { + name = "Empty string"; + value = #string(""); + expectedText = "\"\""; + }, + { + name = "String containing only a quote"; + value = #string("\""); + expectedText = "\"\\\"\""; + }, + { + name = "String containing only a backslash"; + value = #string("\\"); + expectedText = "\"\\\\\""; + }, + { + name = "The exact problem case: a string that is a JSON object"; + value = #string("{\"key\":\"value\"}"); + expectedText = "\"{\\\"key\\\":\\\"value\\\"}\""; + }, + { + name = "A mix of all special characters"; + value = #string("key:\"val\"\n\t\\path/\u{1}end"); + expectedText = "\"key:\\\"val\\\"\\n\\t\\\\path/\\u0001end\""; + } + ]; + + for (testCase in testCases.vals()) { + let result = Json.stringify(testCase.value, null); + + if (result != testCase.expectedText) { + Debug.trap( + "stringify test case '" # testCase.name # "' failed\nInput: " + # debug_show (testCase.value) + # "\nExpected: " # testCase.expectedText + # "\nActual: " # result + ); + }; + }; + }, +); \ No newline at end of file