@@ -129,6 +129,26 @@ proc parseHook*(s: string, i: var int, v: var SomeFloat) =
129129 i += chars
130130 v = f
131131
132+ proc parseUnicodeEscape (s: string , i: var int ): int =
133+ inc i
134+ result = parseHexInt (s[i ..< i + 4 ])
135+ i += 3
136+ # Deal with UTF-16 surrogates. Most of the time strings are encoded as utf8
137+ # but some APIs will reply with UTF-16 surrogate pairs which needs to be dealt
138+ # with.
139+ if (result and 0x fc00 ) == 0x d800 :
140+ inc i
141+ if s[i] != '\\ ' :
142+ error (" Found an Orphan Surrogate." , i)
143+ inc i
144+ if s[i] != 'u' :
145+ error (" Found an Orphan Surrogate." , i)
146+ inc i
147+ let nextRune = parseHexInt (s[i ..< i + 4 ])
148+ i += 3
149+ if (nextRune and 0x fc00 ) == 0x dc00 :
150+ result = 0x 10000 + (((result - 0x d800 ) shl 10 ) or (nextRune - 0x dc00 ))
151+
132152proc parseStringSlow (s: string , i: var int , v: var string ) =
133153 while i < s.len:
134154 let c = s[i]
@@ -146,10 +166,7 @@ proc parseStringSlow(s: string, i: var int, v: var string) =
146166 of 'r' : v.add '\r '
147167 of 't' : v.add '\t '
148168 of 'u' :
149- inc i
150- let u = parseHexInt (s[i ..< i + 4 ])
151- i += 3
152- v.add (Rune (u).toUTF8 ())
169+ v.add (Rune (parseUnicodeEscape (s, i)).toUTF8 ())
153170 else :
154171 v.add (c)
155172 else :
@@ -173,10 +190,7 @@ proc parseStringFast(s: string, i: var int, v: var string) =
173190 let c = s[j]
174191 case c
175192 of 'u' :
176- inc j
177- let u = parseHexInt (s[j ..< j + 4 ])
178- j += 3
179- ll += Rune (u).toUTF8 ().len
193+ ll += Rune (parseUnicodeEscape (s, j)).toUTF8 ().len
180194 else :
181195 inc ll
182196 else :
@@ -207,10 +221,7 @@ proc parseStringFast(s: string, i: var int, v: var string) =
207221 of 'r' : ss.add '\r '
208222 of 't' : ss.add '\t '
209223 of 'u' :
210- inc i
211- let u = parseHexInt (s[i ..< i + 4 ])
212- i += 3
213- for c in Rune (u).toUTF8 ():
224+ for c in Rune (parseUnicodeEscape (s, i)).toUTF8 ():
214225 ss.add (c)
215226 else :
216227 ss.add (c)
0 commit comments