13
13
#endif
14
14
15
15
addToLibrary ( {
16
- // TextEncoder constructor defaults to UTF-8
17
- #if TEXTENCODER == 2
18
- $UTF8Encoder : "new TextEncoder()" ,
19
- #else
20
- $UTF8Decoder : "typeof TextEncoder != 'undefined' ? new TextEncoder() : undefined" ,
21
- #endif
22
-
23
16
// TextDecoder constructor defaults to UTF-8
24
17
#if TEXTDECODER == 2
25
18
$UTF8Decoder : "new TextDecoder()" ,
26
19
#else
27
20
$UTF8Decoder : "typeof TextDecoder != 'undefined' ? new TextDecoder() : undefined" ,
28
21
#endif
29
22
23
+ // TextEncoder constructor defaults to UTF-8
24
+ #if TEXTENCODER == 2
25
+ $UTF8Encoder : "new TextEncoder()" ,
26
+ #else
27
+ $UTF8Encoder : "typeof TextEncoder != 'undefined' ? new TextEncoder() : undefined" ,
28
+ #endif
29
+
30
30
$findStringEnd : ( heapOrArray , idx , maxBytesToRead , ignoreNul ) => {
31
31
var maxIdx = idx + maxBytesToRead ;
32
32
if ( ignoreNul ) return maxIdx ;
@@ -158,9 +158,14 @@ addToLibrary({
158
158
* terminator.
159
159
* @return {number } The number of bytes written, EXCLUDING the null terminator.
160
160
*/
161
+ $stringToUTF8Array__deps : [
162
+ #if TEXTENCODER == 2
163
+ '$UTF8Encoder' ,
164
+ #endif
161
165
#if ASSERTIONS
162
- $stringToUTF8Array__deps : [ '$warnOnce' ] ,
166
+ '$warnOnce' ,
163
167
#endif
168
+ ] ,
164
169
$stringToUTF8Array : ( str , heap , outIdx , maxBytesToWrite ) => {
165
170
#if CAN_ADDRESS_2GB
166
171
outIdx >>>= 0 ;
@@ -173,6 +178,32 @@ addToLibrary({
173
178
if ( ! ( maxBytesToWrite > 0 ) )
174
179
return 0 ;
175
180
181
+ #if TEXTENCODER == 2
182
+ // Always use TextEncoder when TEXTENCODER == 2
183
+ var encoded = UTF8Encoder . encode ( str ) ;
184
+ var bytesToWrite = Math . min ( encoded . length , maxBytesToWrite - 1 ) ; // -1 for null terminator
185
+
186
+ for ( var i = 0 ; i < bytesToWrite ; ++ i ) {
187
+ heap [ outIdx + i ] = encoded [ i ] ;
188
+ }
189
+ // Null-terminate the string
190
+ heap [ outIdx + bytesToWrite ] = 0 ;
191
+ return bytesToWrite ;
192
+ #else
193
+ // When using conditional TextEncoder, use it for longer strings if available
194
+ if ( str . length > 16 && UTF8Encoder ) {
195
+ var encoded = UTF8Encoder . encode ( str ) ;
196
+ var bytesToWrite = Math . min ( encoded . length , maxBytesToWrite - 1 ) ; // -1 for null terminator
197
+
198
+ for ( var i = 0 ; i < bytesToWrite ; ++ i ) {
199
+ heap [ outIdx + i ] = encoded [ i ] ;
200
+ }
201
+ // Null-terminate the string
202
+ heap [ outIdx + bytesToWrite ] = 0 ;
203
+ return bytesToWrite ;
204
+ }
205
+
206
+ // Fallback: manual UTF-8 encoding
176
207
var startIdx = outIdx ;
177
208
var endIdx = outIdx + maxBytesToWrite - 1 ; // -1 for string null terminator.
178
209
for ( var i = 0 ; i < str . length ; ++ i ) {
@@ -209,6 +240,7 @@ addToLibrary({
209
240
// Null-terminate the pointer to the buffer.
210
241
heap [ outIdx ] = 0 ;
211
242
return outIdx - startIdx ;
243
+ #endif // TEXTENCODER == 2
212
244
} ,
213
245
214
246
/**
@@ -229,14 +261,47 @@ addToLibrary({
229
261
} ,
230
262
231
263
/**
232
- * Returns the number of bytes the given JavaScript string takes if encoded as a
264
+ * Returns the number of bytes the given Javascript string takes if encoded as a
233
265
* UTF8 byte array, EXCLUDING the null terminator byte.
234
266
*
235
- * @param {string } str - The JavaScript string to operate on.
236
- * @return {number } The length , in bytes, of the UTF-8 encoded string.
267
+ * @param {string } str - JavaScript string to operator on
268
+ * @return {number } Length , in bytes, of the UTF8 encoded string.
237
269
*/
270
+ $lengthBytesUTF8__deps : [
271
+ #if TEXTENCODER == 2
272
+ '$UTF8Encoder' ,
273
+ #endif
274
+ ] ,
238
275
$lengthBytesUTF8 : ( str ) => {
276
+ #if TEXTENCODER == 2
277
+ // Always use TextEncoder when TEXTENCODER == 2
239
278
return UTF8Encoder . encode ( str ) . length ;
279
+ #else
280
+ // When using conditional TextEncoder, use it for longer strings if available
281
+ if ( str . length > 16 && UTF8Encoder ) {
282
+ return UTF8Encoder . encode ( str ) . length ;
283
+ }
284
+
285
+ // Fallback: manual calculation
286
+ var len = 0 ;
287
+ for ( var i = 0 ; i < str . length ; ++ i ) {
288
+ // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code
289
+ // unit, not a Unicode code point of the character! So decode
290
+ // UTF16->UTF32->UTF8.
291
+ // See http://unicode.org/faq/utf_bom.html#utf16-3
292
+ var c = str . charCodeAt ( i ) ; // possibly a lead surrogate
293
+ if ( c <= 0x7F ) {
294
+ len ++ ;
295
+ } else if ( c <= 0x7FF ) {
296
+ len += 2 ;
297
+ } else if ( c >= 0xD800 && c <= 0xDFFF ) {
298
+ len += 4 ; ++ i ;
299
+ } else {
300
+ len += 3 ;
301
+ }
302
+ }
303
+ return len ;
304
+ #endif // TEXTENCODER == 2
240
305
} ,
241
306
242
307
$intArrayFromString__docs : '/** @type {function(string, boolean=, number=)} */' ,
0 commit comments