Skip to content

Commit be0690b

Browse files
NFC: Use TextEncoder for stringToUTF8Array and lengthBytesUTF8 Function
1 parent fb9f5e3 commit be0690b

35 files changed

+238
-147
lines changed

AUTHORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,3 +601,4 @@ a license to everyone to use it as detailed in LICENSE.)
601601
* Artur Gatin <agatin@teladochealth.com> (copyright owned by Teladoc Health, Inc.)
602602
* Christian Lloyd <clloyd@teladochealth.com> (copyright owned by Teladoc Health, Inc.)
603603
* Sean Morris <sean@seanmorr.is>
604+
* Pt. Prashant Tripathi <ptprashanttripathi@outlook.com>

site/source/docs/tools_reference/settings_reference.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2700,6 +2700,18 @@ of ENVIRONMENT since TextDecoder is not available in those environments).
27002700

27012701
Default value: 1
27022702

2703+
.. _textencoder:
2704+
2705+
TEXTENCODER
2706+
===========
2707+
2708+
The default value of 1 means the generated code will use TextEncoder if
2709+
available and fall back to custom encoding code when it is not available.
2710+
If set to 2, we assume TextEncoder is always present and usable, and no
2711+
fallback JS code will be emitted.
2712+
2713+
Default value: 1
2714+
27032715
.. _embind_std_string_is_utf8:
27042716

27052717
EMBIND_STD_STRING_IS_UTF8

src/lib/libstrings.js

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
#error "TEXTDECODER must be either 1 or 2"
99
#endif
1010

11+
#if TEXTENCODER != 1 && TEXTENCODER != 2
12+
#error "TEXTENCODER must be either 1 or 2"
13+
#endif
14+
1115
addToLibrary({
1216
// TextDecoder constructor defaults to UTF-8
1317
#if TEXTDECODER == 2
@@ -16,6 +20,13 @@ addToLibrary({
1620
$UTF8Decoder: "typeof TextDecoder != 'undefined' ? new TextDecoder() : undefined",
1721
#endif
1822

23+
// TextEncoder constructor defaults to UTF-8
24+
#if TEXTENCODER == 2
25+
$UTF8Encoder: "new TextEncoder()",
26+
#else
27+
$UTF8Encoder: "typeof TextEncoder != 'undefined' ? new TextEncoder() : undefined",
28+
#endif
29+
1930
$findStringEnd: (heapOrArray, idx, maxBytesToRead, ignoreNul) => {
2031
var maxIdx = idx + maxBytesToRead;
2132
if (ignoreNul) return maxIdx;
@@ -147,9 +158,12 @@ addToLibrary({
147158
* terminator.
148159
* @return {number} The number of bytes written, EXCLUDING the null terminator.
149160
*/
161+
$stringToUTF8Array__deps: [
162+
'$UTF8Encoder',
150163
#if ASSERTIONS
151-
$stringToUTF8Array__deps: ['$warnOnce'],
164+
'$warnOnce',
152165
#endif
166+
],
153167
$stringToUTF8Array: (str, heap, outIdx, maxBytesToWrite) => {
154168
#if CAN_ADDRESS_2GB
155169
outIdx >>>= 0;
@@ -162,6 +176,28 @@ addToLibrary({
162176
if (!(maxBytesToWrite > 0))
163177
return 0;
164178

179+
#if TEXTENCODER == 2
180+
// Always use TextEncoder when TEXTENCODER == 2
181+
var encoded = UTF8Encoder.encode(str);
182+
var bytesToWrite = Math.min(encoded.length, maxBytesToWrite - 1); // -1 for null terminator
183+
for (var i = 0; i < bytesToWrite; ++i) {
184+
heap[outIdx + i] = encoded[i];
185+
}
186+
heap[outIdx + bytesToWrite] = 0;
187+
return bytesToWrite;
188+
#else
189+
// When using conditional TextEncoder, use it for longer strings if available
190+
if (UTF8Encoder) {
191+
var encoded = UTF8Encoder.encode(str);
192+
var bytesToWrite = Math.min(encoded.length, maxBytesToWrite - 1); // -1 for null terminator
193+
for (var i = 0; i < bytesToWrite; ++i) {
194+
heap[outIdx + i] = encoded[i];
195+
}
196+
heap[outIdx + bytesToWrite] = 0;
197+
return bytesToWrite;
198+
}
199+
200+
// Fallback: manual UTF-8 encoding
165201
var startIdx = outIdx;
166202
var endIdx = outIdx + maxBytesToWrite - 1; // -1 for string null terminator.
167203
for (var i = 0; i < str.length; ++i) {
@@ -198,6 +234,7 @@ addToLibrary({
198234
// Null-terminate the pointer to the buffer.
199235
heap[outIdx] = 0;
200236
return outIdx - startIdx;
237+
#endif // TEXTENCODER == 2
201238
},
202239

203240
/**
@@ -224,7 +261,18 @@ addToLibrary({
224261
* @param {string} str - JavaScript string to operator on
225262
* @return {number} Length, in bytes, of the UTF8 encoded string.
226263
*/
264+
$lengthBytesUTF8__deps: ['$UTF8Encoder'],
227265
$lengthBytesUTF8: (str) => {
266+
#if TEXTENCODER == 2
267+
// Always use TextEncoder when TEXTENCODER == 2
268+
return UTF8Encoder.encode(str).length;
269+
#else
270+
// When using conditional TextEncoder, use it for longer strings if available
271+
if (UTF8Encoder) {
272+
return UTF8Encoder.encode(str).length;
273+
}
274+
275+
// Fallback: manual calculation
228276
var len = 0;
229277
for (var i = 0; i < str.length; ++i) {
230278
// Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code
@@ -243,6 +291,7 @@ addToLibrary({
243291
}
244292
}
245293
return len;
294+
#endif // TEXTENCODER == 2
246295
},
247296

248297
$intArrayFromString__docs: '/** @type {function(string, boolean=, number=)} */',

src/settings.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1776,6 +1776,13 @@ var EVAL_CTORS = 0;
17761776
// [link]
17771777
var TEXTDECODER = 1;
17781778

1779+
// The default value of 1 means the generated code will use TextEncoder if
1780+
// available and fall back to custom encoding code when it is not available.
1781+
// If set to 2, we assume TextEncoder is always present and usable, and no
1782+
// fallback JS code will be emitted.
1783+
// [link]
1784+
var TEXTENCODER = 1;
1785+
17791786
// Embind specific: If enabled, assume UTF-8 encoded data in std::string binding.
17801787
// Disable this to support binary data transfer.
17811788
// [link]

test/code_size/test_codesize_cxx_ctors1.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
2-
"a.out.js": 19754,
3-
"a.out.js.gz": 8162,
4-
"a.out.nodebug.wasm": 129509,
5-
"a.out.nodebug.wasm.gz": 49243,
6-
"total": 149263,
7-
"total_gz": 57405,
2+
"a.out.js": 19941,
3+
"a.out.js.gz": 8224,
4+
"a.out.nodebug.wasm": 129504,
5+
"a.out.nodebug.wasm.gz": 49232,
6+
"total": 149445,
7+
"total_gz": 57456,
88
"sent": [
99
"__cxa_throw",
1010
"_abort_js",

test/code_size/test_codesize_cxx_ctors2.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
2-
"a.out.js": 19732,
3-
"a.out.js.gz": 8148,
4-
"a.out.nodebug.wasm": 128936,
5-
"a.out.nodebug.wasm.gz": 48884,
6-
"total": 148668,
7-
"total_gz": 57032,
2+
"a.out.js": 19919,
3+
"a.out.js.gz": 8209,
4+
"a.out.nodebug.wasm": 128931,
5+
"a.out.nodebug.wasm.gz": 48876,
6+
"total": 148850,
7+
"total_gz": 57085,
88
"sent": [
99
"__cxa_throw",
1010
"_abort_js",

test/code_size/test_codesize_cxx_except.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
2-
"a.out.js": 23415,
3-
"a.out.js.gz": 9145,
4-
"a.out.nodebug.wasm": 171271,
5-
"a.out.nodebug.wasm.gz": 57338,
6-
"total": 194686,
7-
"total_gz": 66483,
2+
"a.out.js": 23604,
3+
"a.out.js.gz": 9203,
4+
"a.out.nodebug.wasm": 171266,
5+
"a.out.nodebug.wasm.gz": 57323,
6+
"total": 194870,
7+
"total_gz": 66526,
88
"sent": [
99
"__cxa_begin_catch",
1010
"__cxa_end_catch",

test/code_size/test_codesize_cxx_except_wasm.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
2-
"a.out.js": 19643,
3-
"a.out.js.gz": 8112,
4-
"a.out.nodebug.wasm": 144630,
5-
"a.out.nodebug.wasm.gz": 54894,
6-
"total": 164273,
7-
"total_gz": 63006,
2+
"a.out.js": 19831,
3+
"a.out.js.gz": 8171,
4+
"a.out.nodebug.wasm": 144625,
5+
"a.out.nodebug.wasm.gz": 54883,
6+
"total": 164456,
7+
"total_gz": 63054,
88
"sent": [
99
"_abort_js",
1010
"_tzset_js",

test/code_size/test_codesize_cxx_except_wasm_legacy.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
2-
"a.out.js": 19643,
3-
"a.out.js.gz": 8112,
4-
"a.out.nodebug.wasm": 142219,
5-
"a.out.nodebug.wasm.gz": 54358,
6-
"total": 161862,
7-
"total_gz": 62470,
2+
"a.out.js": 19831,
3+
"a.out.js.gz": 8171,
4+
"a.out.nodebug.wasm": 142214,
5+
"a.out.nodebug.wasm.gz": 54349,
6+
"total": 162045,
7+
"total_gz": 62520,
88
"sent": [
99
"_abort_js",
1010
"_tzset_js",

test/code_size/test_codesize_cxx_lto.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
2-
"a.out.js": 19082,
3-
"a.out.js.gz": 7841,
4-
"a.out.nodebug.wasm": 106463,
5-
"a.out.nodebug.wasm.gz": 42596,
6-
"total": 125545,
7-
"total_gz": 50437,
2+
"a.out.js": 18725,
3+
"a.out.js.gz": 7678,
4+
"a.out.nodebug.wasm": 106458,
5+
"a.out.nodebug.wasm.gz": 42588,
6+
"total": 125183,
7+
"total_gz": 50266,
88
"sent": [
99
"a (emscripten_resize_heap)",
1010
"b (_setitimer_js)",

0 commit comments

Comments
 (0)