Skip to content

Commit 955d2f3

Browse files
Update libstrings.js
1 parent 69f05d1 commit 955d2f3

File tree

1 file changed

+76
-11
lines changed

1 file changed

+76
-11
lines changed

src/lib/libstrings.js

Lines changed: 76 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,20 @@
1313
#endif
1414

1515
addToLibrary({
16-
// TextEncoder constructor defaults to UTF-8
17-
#if TEXTENCODER == 2
18-
$UTF8Encoder: "new TextEncoder()",
19-
#else
20-
$UTF8Decoder: "typeof TextEncoder != 'undefined' ? new TextEncoder() : undefined",
21-
#endif
22-
2316
// TextDecoder constructor defaults to UTF-8
2417
#if TEXTDECODER == 2
2518
$UTF8Decoder: "new TextDecoder()",
2619
#else
2720
$UTF8Decoder: "typeof TextDecoder != 'undefined' ? new TextDecoder() : undefined",
2821
#endif
2922

23+
// TextEncoder constructor defaults to UTF-8
24+
#if TEXTENCODER == 2
25+
$UTF8Encoder: "new TextEncoder()",
26+
#else
27+
$UTF8Encoder: "typeof TextEncoder != 'undefined' ? new TextEncoder() : undefined",
28+
#endif
29+
3030
$findStringEnd: (heapOrArray, idx, maxBytesToRead, ignoreNul) => {
3131
var maxIdx = idx + maxBytesToRead;
3232
if (ignoreNul) return maxIdx;
@@ -158,9 +158,14 @@ addToLibrary({
158158
* terminator.
159159
* @return {number} The number of bytes written, EXCLUDING the null terminator.
160160
*/
161+
$stringToUTF8Array__deps: [
162+
#if TEXTENCODER == 2
163+
'$UTF8Encoder',
164+
#endif
161165
#if ASSERTIONS
162-
$stringToUTF8Array__deps: ['$warnOnce'],
166+
'$warnOnce',
163167
#endif
168+
],
164169
$stringToUTF8Array: (str, heap, outIdx, maxBytesToWrite) => {
165170
#if CAN_ADDRESS_2GB
166171
outIdx >>>= 0;
@@ -173,6 +178,32 @@ addToLibrary({
173178
if (!(maxBytesToWrite > 0))
174179
return 0;
175180

181+
#if TEXTENCODER == 2
182+
// Always use TextEncoder when TEXTENCODER == 2
183+
var encoded = UTF8Encoder.encode(str);
184+
var bytesToWrite = Math.min(encoded.length, maxBytesToWrite - 1); // -1 for null terminator
185+
186+
for (var i = 0; i < bytesToWrite; ++i) {
187+
heap[outIdx + i] = encoded[i];
188+
}
189+
// Null-terminate the string
190+
heap[outIdx + bytesToWrite] = 0;
191+
return bytesToWrite;
192+
#else
193+
// When using conditional TextEncoder, use it for longer strings if available
194+
if (str.length > 16 && UTF8Encoder) {
195+
var encoded = UTF8Encoder.encode(str);
196+
var bytesToWrite = Math.min(encoded.length, maxBytesToWrite - 1); // -1 for null terminator
197+
198+
for (var i = 0; i < bytesToWrite; ++i) {
199+
heap[outIdx + i] = encoded[i];
200+
}
201+
// Null-terminate the string
202+
heap[outIdx + bytesToWrite] = 0;
203+
return bytesToWrite;
204+
}
205+
206+
// Fallback: manual UTF-8 encoding
176207
var startIdx = outIdx;
177208
var endIdx = outIdx + maxBytesToWrite - 1; // -1 for string null terminator.
178209
for (var i = 0; i < str.length; ++i) {
@@ -209,6 +240,7 @@ addToLibrary({
209240
// Null-terminate the pointer to the buffer.
210241
heap[outIdx] = 0;
211242
return outIdx - startIdx;
243+
#endif // TEXTENCODER == 2
212244
},
213245

214246
/**
@@ -229,14 +261,47 @@ addToLibrary({
229261
},
230262

231263
/**
232-
* Returns the number of bytes the given JavaScript string takes if encoded as a
264+
* Returns the number of bytes the given Javascript string takes if encoded as a
233265
* UTF8 byte array, EXCLUDING the null terminator byte.
234266
*
235-
* @param {string} str - The JavaScript string to operate on.
236-
* @return {number} The length, in bytes, of the UTF-8 encoded string.
267+
* @param {string} str - JavaScript string to operator on
268+
* @return {number} Length, in bytes, of the UTF8 encoded string.
237269
*/
270+
$lengthBytesUTF8__deps: [
271+
#if TEXTENCODER == 2
272+
'$UTF8Encoder',
273+
#endif
274+
],
238275
$lengthBytesUTF8: (str) => {
276+
#if TEXTENCODER == 2
277+
// Always use TextEncoder when TEXTENCODER == 2
239278
return UTF8Encoder.encode(str).length;
279+
#else
280+
// When using conditional TextEncoder, use it for longer strings if available
281+
if (str.length > 16 && UTF8Encoder) {
282+
return UTF8Encoder.encode(str).length;
283+
}
284+
285+
// Fallback: manual calculation
286+
var len = 0;
287+
for (var i = 0; i < str.length; ++i) {
288+
// Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code
289+
// unit, not a Unicode code point of the character! So decode
290+
// UTF16->UTF32->UTF8.
291+
// See http://unicode.org/faq/utf_bom.html#utf16-3
292+
var c = str.charCodeAt(i); // possibly a lead surrogate
293+
if (c <= 0x7F) {
294+
len++;
295+
} else if (c <= 0x7FF) {
296+
len += 2;
297+
} else if (c >= 0xD800 && c <= 0xDFFF) {
298+
len += 4; ++i;
299+
} else {
300+
len += 3;
301+
}
302+
}
303+
return len;
304+
#endif // TEXTENCODER == 2
240305
},
241306

242307
$intArrayFromString__docs: '/** @type {function(string, boolean=, number=)} */',

0 commit comments

Comments
 (0)