Update libstrings.js

PtPrashantTripathi · PtPrashantTripathi · commit 955d2f3aae4b · 2025-09-03T16:27:48.000+05:30
diff --git a/src/lib/libstrings.js b/src/lib/libstrings.js
@@ -13,20 +13,20 @@
 #endif
 
 addToLibrary({
-  // TextEncoder constructor defaults to UTF-8
-#if TEXTENCODER == 2
-  $UTF8Encoder: "new TextEncoder()",
-#else
-  $UTF8Decoder: "typeof TextEncoder != 'undefined' ? new TextEncoder() : undefined",
-#endif
-
   // TextDecoder constructor defaults to UTF-8
 #if TEXTDECODER == 2
   $UTF8Decoder: "new TextDecoder()",
 #else
   $UTF8Decoder: "typeof TextDecoder != 'undefined' ? new TextDecoder() : undefined",
 #endif
 
+  // TextEncoder constructor defaults to UTF-8
+#if TEXTENCODER == 2
+  $UTF8Encoder: "new TextEncoder()",
+#else
+  $UTF8Encoder: "typeof TextEncoder != 'undefined' ? new TextEncoder() : undefined",
+#endif
+
   $findStringEnd: (heapOrArray, idx, maxBytesToRead, ignoreNul) => {
     var maxIdx = idx + maxBytesToRead;
     if (ignoreNul) return maxIdx;
@@ -158,9 +158,14 @@ addToLibrary({
    *                                   terminator.
    * @return {number} The number of bytes written, EXCLUDING the null terminator.
    */
+  $stringToUTF8Array__deps: [
+#if TEXTENCODER == 2
+    '$UTF8Encoder',
+#endif
 #if ASSERTIONS
-  $stringToUTF8Array__deps: ['$warnOnce'],
+    '$warnOnce',
 #endif
+  ],
   $stringToUTF8Array: (str, heap, outIdx, maxBytesToWrite) => {
 #if CAN_ADDRESS_2GB
     outIdx >>>= 0;
@@ -173,6 +178,32 @@ addToLibrary({
     if (!(maxBytesToWrite > 0))
       return 0;
 
+#if TEXTENCODER == 2
+    // Always use TextEncoder when TEXTENCODER == 2
+    var encoded = UTF8Encoder.encode(str);
+    var bytesToWrite = Math.min(encoded.length, maxBytesToWrite - 1); // -1 for null terminator
+    
+    for (var i = 0; i < bytesToWrite; ++i) {
+      heap[outIdx + i] = encoded[i];
+    }
+    // Null-terminate the string
+    heap[outIdx + bytesToWrite] = 0;
+    return bytesToWrite;
+#else
+    // When using conditional TextEncoder, use it for longer strings if available
+    if (str.length > 16 && UTF8Encoder) {
+      var encoded = UTF8Encoder.encode(str);
+      var bytesToWrite = Math.min(encoded.length, maxBytesToWrite - 1); // -1 for null terminator
+      
+      for (var i = 0; i < bytesToWrite; ++i) {
+        heap[outIdx + i] = encoded[i];
+      }
+      // Null-terminate the string
+      heap[outIdx + bytesToWrite] = 0;
+      return bytesToWrite;
+    }
+    
+    // Fallback: manual UTF-8 encoding
     var startIdx = outIdx;
     var endIdx = outIdx + maxBytesToWrite - 1; // -1 for string null terminator.
     for (var i = 0; i < str.length; ++i) {
@@ -209,6 +240,7 @@ addToLibrary({
     // Null-terminate the pointer to the buffer.
     heap[outIdx] = 0;
     return outIdx - startIdx;
+#endif // TEXTENCODER == 2
   },
 
   /**
@@ -229,14 +261,47 @@ addToLibrary({
   },
 
   /**
-   * Returns the number of bytes the given JavaScript string takes if encoded as a
+   * Returns the number of bytes the given Javascript string takes if encoded as a
    * UTF8 byte array, EXCLUDING the null terminator byte.
    *
-   * @param {string} str - The JavaScript string to operate on.
-   * @return {number} The length, in bytes, of the UTF-8 encoded string.
+   * @param {string} str - JavaScript string to operator on
+   * @return {number} Length, in bytes, of the UTF8 encoded string.
    */
+  $lengthBytesUTF8__deps: [
+#if TEXTENCODER == 2
+    '$UTF8Encoder',
+#endif
+  ],
   $lengthBytesUTF8: (str) => {
+#if TEXTENCODER == 2
+    // Always use TextEncoder when TEXTENCODER == 2
     return UTF8Encoder.encode(str).length;
+#else
+    // When using conditional TextEncoder, use it for longer strings if available
+    if (str.length > 16 && UTF8Encoder) {
+      return UTF8Encoder.encode(str).length;
+    }
+    
+    // Fallback: manual calculation
+    var len = 0;
+    for (var i = 0; i < str.length; ++i) {
+      // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code
+      // unit, not a Unicode code point of the character! So decode
+      // UTF16->UTF32->UTF8.
+      // See http://unicode.org/faq/utf_bom.html#utf16-3
+      var c = str.charCodeAt(i); // possibly a lead surrogate
+      if (c <= 0x7F) {
+        len++;
+      } else if (c <= 0x7FF) {
+        len += 2;
+      } else if (c >= 0xD800 && c <= 0xDFFF) {
+        len += 4; ++i;
+      } else {
+        len += 3;
+      }
+    }
+    return len;
+#endif // TEXTENCODER == 2
   },
 
   $intArrayFromString__docs: '/** @type {function(string, boolean=, number=)} */',