From e9e92526679b486f31fc4bd73433a331a922b0e9 Mon Sep 17 00:00:00 2001
From: Nikita Skovoroda <chalkerx@gmail.com>
Date: Tue, 27 Jan 2026 22:12:49 +0400
Subject: [PATCH 1/3] lib: use utf8 fast path for streaming TextDecoder

---
 lib/internal/encoding.js                      | 59 +++++++++++++--
 lib/internal/encoding/util.js                 | 72 +++++++++++++++++++
 test/parallel/test-bootstrap-modules.js       |  1 +
 ...test-whatwg-encoding-custom-textdecoder.js | 53 ++++----------
 4 files changed, 138 insertions(+), 47 deletions(-)
 create mode 100644 lib/internal/encoding/util.js

diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js
index 5523c70a157225..7d4747abc23bb9 100644
--- a/lib/internal/encoding.js
+++ b/lib/internal/encoding.js
@@ -28,11 +28,13 @@ const kHandle = Symbol('handle');
 const kFlags = Symbol('flags');
 const kEncoding = Symbol('encoding');
 const kDecoder = Symbol('decoder');
+const kChunk = Symbol('chunk');
 const kFatal = Symbol('kFatal');
 const kUTF8FastPath = Symbol('kUTF8FastPath');
 const kIgnoreBOM = Symbol('kIgnoreBOM');
 
 const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte');
+const { unfinishedBytesUtf8, mergePrefixUtf8 } = require('internal/encoding/util');
 
 const {
   getConstructorOf,
@@ -447,9 +449,11 @@ class TextDecoder {
     this[kUTF8FastPath] = false;
     this[kHandle] = undefined;
     this[kSingleByte] = undefined; // Does not care about streaming or BOM
+    this[kChunk] = null; // A copy of previous streaming tail or null
 
     if (enc === 'utf-8') {
       this[kUTF8FastPath] = true;
+      this[kBOMSeen] = false;
     } else if (isSinglebyteEncoding(enc)) {
       this[kSingleByte] = createSinglebyteDecoder(enc, this[kFatal]);
     } else {
@@ -458,7 +462,6 @@ class TextDecoder {
   }
 
   #prepareConverter() {
-    if (this[kHandle] !== undefined) return;
     if (hasIntl) {
       let icuEncoding = this[kEncoding];
       if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
@@ -466,7 +469,7 @@ class TextDecoder {
       if (handle === undefined)
         throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
       this[kHandle] = handle;
-    } else if (this[kEncoding] === 'utf-8' || this[kEncoding] === 'utf-16le') {
+    } else if (this[kEncoding] === 'utf-16le') {
       if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
       this[kHandle] = new (lazyStringDecoder())(this[kEncoding]);
       this[kBOMSeen] = false;
@@ -483,11 +486,55 @@ class TextDecoder {
 
     const stream = options?.stream;
     if (this[kUTF8FastPath]) {
-      if (!stream) return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
-      this[kUTF8FastPath] = false;
-    }
+      const chunk = this[kChunk];
+      const ignoreBom = this[kIgnoreBOM] || this[kBOMSeen];
+      if (!stream) {
+        this[kBOMSeen] = false;
+        if (!chunk) return decodeUTF8(input, ignoreBom, this[kFatal]);
+      }
+
+      let u = parseInput(input);
+      if (u.length === 0 && stream) return ''; // no state change
+      let prefix;
+      if (chunk) {
+        const merged = mergePrefixUtf8(u, this[kChunk]);
+        if (u.length < 3) {
+          u = merged; // Might be unfinished, but fully consumed old u
+        } else {
+          prefix = merged; // Stops at complete chunk
+          const add = prefix.length - this[kChunk].length;
+          if (add > 0) u = u.subarray(add);
+        }
+
+        this[kChunk] = null;
+      }
 
-    this.#prepareConverter();
+      if (stream) {
+        const trail = unfinishedBytesUtf8(u, u.length);
+        if (trail > 0) {
+          this[kChunk] = new FastBuffer(u.subarray(-trail)); // copy
+          if (!prefix && trail === u.length) return ''; // No further state change
+          u = u.subarray(0, -trail);
+        }
+      }
+
+      try {
+        const res = (prefix ? decodeUTF8(prefix, ignoreBom, this[kFatal]) : '') +
+                    decodeUTF8(u, ignoreBom || prefix, this[kFatal]);
+
+        // "BOM seen" is set on the current decode call only if it did not error,
+        // in "serialize I/O queue" after decoding
+        // We don't get here if we had no complete data to process,
+        // and we don't want BOM processing after that if streaming
+        if (stream) this[kBOMSeen] = true;
+
+        return res;
+      } catch (e) {
+        this[kChunk] = null; // Reset unfinished chunk on errors
+        // The correct way per spec seems to be not destroying the decoder state (aka BOM here) in stream mode
+        throw e;
+      }
+    }
 
     if (hasIntl) {
       const flags = stream ? 0 : CONVERTER_FLAGS_FLUSH;
diff --git a/lib/internal/encoding/util.js b/lib/internal/encoding/util.js
new file mode 100644
index 00000000000000..107a0f41b5d811
--- /dev/null
+++ b/lib/internal/encoding/util.js
@@ -0,0 +1,72 @@
+// From https://npmjs.com/package/@exodus/bytes
+// Copyright Exodus Movement. Licensed under MIT License.
+
+'use strict';
+
+const {
+  Uint8Array,
+} = primordials;
+
+
+/**
+ * Get a number of last bytes in an Uint8Array `data` ending at `len` that don't
+ * form a codepoint yet, but can be a part of a single codepoint on more data.
+ * @param {Uint8Array} data Uint8Array of potentially UTF-8 bytes
+ * @param {number} len Position to look behind from
+ * @returns {number} Number of unfinished potentially valid UTF-8 bytes ending at position `len`
+ */
+function unfinishedBytesUtf8(data, len) {
+  // 0-3
+  let pos = 0;
+  while (pos < 2 && pos < len && (data[len - pos - 1] & 0xc0) === 0x80) pos++; // Go back 0-2 trailing bytes
+  if (pos === len) return 0; // no space for lead
+  const lead = data[len - pos - 1];
+  if (lead < 0xc2 || lead > 0xf4) return 0; // not a lead
+  if (pos === 0) return 1; // Nothing to recheck, we have only lead, return it. 2-byte must return here
+  if (lead < 0xe0 || (lead < 0xf0 && pos >= 2)) return 0; // 2-byte, or 3-byte or less and we already have 2 trailing
+  const lower = lead === 0xf0 ? 0x90 : lead === 0xe0 ? 0xa0 : 0x80;
+  const upper = lead === 0xf4 ? 0x8f : lead === 0xed ? 0x9f : 0xbf;
+  const next = data[len - pos];
+  return next >= lower && next <= upper ? pos + 1 : 0;
+}
+
+/**
+ * Merge prefix `chunk` with `data` and return new combined prefix.
+ * For data.length < 3, fully consumes data and can return unfinished data,
+ * otherwise returns a prefix with no unfinished bytes
+ * @param {Uint8Array} data Uint8Array of potentially UTF-8 bytes
+ * @param {Uint8Array} chunk Prefix to prepend before `data`
+ * @returns {Uint8Array} If data.length >= 3: an Uint8Array containing `chunk` and a slice of `data`
+ *   so that the result has no unfinished UTF-8 codepoints. If data.length < 3: concat(chunk, data).
+ */
+function mergePrefixUtf8(data, chunk) {
+  if (data.length === 0) return chunk;
+  if (data.length < 3) {
+    // No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
+    const res = new Uint8Array(data.length + chunk.length);
+    res.set(chunk);
+    res.set(data, chunk.length);
+    return res;
+  }
+
+  // Slice off a small portion of data into prefix chunk so we can decode them separately without extending array size
+  const temp = new Uint8Array(chunk.length + 3); // We have 1-3 bytes and need 1-3 more bytes
+  temp.set(chunk);
+  temp.set(data.subarray(0, 3), chunk.length);
+
+  // Stop at the first offset where unfinished bytes reaches 0 or fits into data
+  // If that doesn't happen (data too short), just concat chunk and data completely (above)
+  for (let i = 1; i <= 3; i++) {
+    const unfinished = unfinishedBytesUtf8(temp, chunk.length + i); // 0-3
+    if (unfinished <= i) {
+      // Always reachable at 3, but we still need 'unfinished' value for it
+      const add = i - unfinished; // 0-3
+      return add > 0 ? temp.subarray(0, chunk.length + add) : chunk;
+    }
+  }
+
+  // Unreachable
+  return null;
+}
+
+module.exports = { unfinishedBytesUtf8, mergePrefixUtf8 };
diff --git a/test/parallel/test-bootstrap-modules.js b/test/parallel/test-bootstrap-modules.js
index d69a299625d9f2..05d7830a2ab1d2 100644
--- a/test/parallel/test-bootstrap-modules.js
+++ b/test/parallel/test-bootstrap-modules.js
@@ -89,6 +89,7 @@ expected.beforePreExec = new Set([
   'Internal Binding fs',
   'NativeModule internal/encoding',
   'NativeModule internal/encoding/single-byte',
+  'NativeModule internal/encoding/util',
   'NativeModule internal/blob',
   'NativeModule internal/fs/utils',
   'NativeModule fs',
diff --git a/test/parallel/test-whatwg-encoding-custom-textdecoder.js b/test/parallel/test-whatwg-encoding-custom-textdecoder.js
index eabe54b36d7674..9734825b6b27a5 100644
--- a/test/parallel/test-whatwg-encoding-custom-textdecoder.js
+++ b/test/parallel/test-whatwg-encoding-custom-textdecoder.js
@@ -80,20 +80,8 @@ assert(TextDecoder);
 
 ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
   const dec = new TextDecoder(i, { fatal: true });
-  if (common.hasIntl) {
-    dec.decode(buf.slice(0, 8), { stream: true });
-    dec.decode(buf.slice(8));
-  } else {
-    assert.throws(
-      () => {
-        dec.decode(buf.slice(0, 8), { stream: true });
-      },
-      {
-        code: 'ERR_NO_ICU',
-        name: 'TypeError',
-        message: '"fatal" option is not supported on Node.js compiled without ICU'
-      });
-  }
+  dec.decode(buf.slice(0, 8), { stream: true });
+  dec.decode(buf.slice(8));
 });
 
 // Test TextDecoder, label undefined, options null
@@ -122,33 +110,16 @@ if (common.hasIntl) {
 // Test TextDecoder inspect with hidden fields
 {
   const dec = new TextDecoder('utf-8', { ignoreBOM: true });
-  if (common.hasIntl) {
-    assert.strictEqual(
-      util.inspect(dec, { showHidden: true }),
-      'TextDecoder {\n' +
-      '  encoding: \'utf-8\',\n' +
-      '  fatal: false,\n' +
-      '  ignoreBOM: true,\n' +
-      '  Symbol(flags): 4,\n' +
-      '  Symbol(handle): undefined\n' +
-      '}'
-    );
-  } else {
-    dec.decode(Uint8Array.of(0), { stream: true });
-    assert.strictEqual(
-      util.inspect(dec, { showHidden: true }),
-      'TextDecoder {\n' +
-      "  encoding: 'utf-8',\n" +
-      '  fatal: false,\n' +
-      '  ignoreBOM: true,\n' +
-      '  Symbol(flags): 4,\n' +
-      '  Symbol(handle): StringDecoder {\n' +
-      "    encoding: 'utf8',\n" +
-      '    Symbol(kNativeDecoder): <Buffer 00 00 00 00 00 00 01>\n' +
-      '  }\n' +
-      '}'
-    );
-  }
+  assert.strictEqual(
+    util.inspect(dec, { showHidden: true }),
+    'TextDecoder {\n' +
+    '  encoding: \'utf-8\',\n' +
+    '  fatal: false,\n' +
+    '  ignoreBOM: true,\n' +
+    '  Symbol(flags): 4,\n' +
+    '  Symbol(handle): undefined\n' +
+    '}'
+  );
 }
 
 

From 155995438c2a1ded915050365d88cc93d06d3f07 Mon Sep 17 00:00:00 2001
From: Nikita Skovoroda <chalkerx@gmail.com>
Date: Wed, 28 Jan 2026 03:08:19 +0400
Subject: [PATCH 2/3] benchmark: add streaming TextDecoder benchmark

---
 benchmark/util/text-decoder-stream.js | 55 +++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 benchmark/util/text-decoder-stream.js

diff --git a/benchmark/util/text-decoder-stream.js b/benchmark/util/text-decoder-stream.js
new file mode 100644
index 00000000000000..16293b5b1375de
--- /dev/null
+++ b/benchmark/util/text-decoder-stream.js
@@ -0,0 +1,55 @@
+'use strict';
+
+const common = require('../common.js');
+
+const bench = common.createBenchmark(main, {
+  encoding: ['utf-8', 'utf-16le'],
+  ignoreBOM: [0, 1],
+  fatal: [0, 1],
+  unicode: [0, 1],
+  len: [256, 1024 * 16, 1024 * 128],
+  chunks: [10],
+  n: [1e3],
+  type: ['SharedArrayBuffer', 'ArrayBuffer', 'Buffer'],
+});
+
+const UNICODE_ALPHA = 'Blåbærsyltetøy';
+const ASCII_ALPHA = 'Blueberry jam';
+
+function main({ encoding, len, unicode, chunks, n, ignoreBOM, type, fatal }) {
+  const decoder = new TextDecoder(encoding, { ignoreBOM, fatal });
+  let buf;
+
+  const fill = Buffer.from(unicode ? UNICODE_ALPHA : ASCII_ALPHA, encoding);
+
+  switch (type) {
+    case 'SharedArrayBuffer': {
+      buf = new SharedArrayBuffer(len);
+      Buffer.from(buf).fill(fill);
+      break;
+    }
+    case 'ArrayBuffer': {
+      buf = new ArrayBuffer(len);
+      Buffer.from(buf).fill(fill);
+      break;
+    }
+    case 'Buffer': {
+      buf = Buffer.alloc(len, fill);
+      break;
+    }
+  }
+
+  const chunk = Math.ceil(len / chunks);
+  const max = len - chunk;
+  bench.start();
+  for (let i = 0; i < n; i++) {
+    let pos = 0;
+    while (pos < max) {
+      decoder.decode(buf.slice(pos, pos + chunk), { stream: true });
+      pos += chunk;
+    }
+
+    decoder.decode(buf.slice(pos));
+  }
+  bench.end(n);
+}

From 398a94edb09fa1cc73b19a7e03c52f4e887735ff Mon Sep 17 00:00:00 2001
From: Nikita Skovoroda <chalkerx@gmail.com>
Date: Wed, 28 Jan 2026 10:12:20 +0400
Subject: [PATCH 3/3] lib: add utf16 fast path for TextDecoder

---
 lib/internal/encoding.js                      | 80 +++++++++----------
 lib/internal/encoding/util.js                 | 57 ++++++++-----
 ...test-whatwg-encoding-custom-textdecoder.js |  2 +-
 3 files changed, 74 insertions(+), 65 deletions(-)

diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js
index 7d4747abc23bb9..5f1655426d5bd5 100644
--- a/lib/internal/encoding.js
+++ b/lib/internal/encoding.js
@@ -20,8 +20,8 @@ const { FastBuffer } = require('internal/buffer');
 const {
   ERR_ENCODING_NOT_SUPPORTED,
   ERR_INVALID_ARG_TYPE,
+  ERR_ENCODING_INVALID_ENCODED_DATA,
   ERR_INVALID_THIS,
-  ERR_NO_ICU,
 } = require('internal/errors').codes;
 const kSingleByte = Symbol('single-byte');
 const kHandle = Symbol('handle');
@@ -30,11 +30,11 @@ const kEncoding = Symbol('encoding');
 const kDecoder = Symbol('decoder');
 const kChunk = Symbol('chunk');
 const kFatal = Symbol('kFatal');
-const kUTF8FastPath = Symbol('kUTF8FastPath');
+const kUnicode = Symbol('kUnicode');
 const kIgnoreBOM = Symbol('kIgnoreBOM');
 
 const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte');
-const { unfinishedBytesUtf8, mergePrefixUtf8 } = require('internal/encoding/util');
+const { unfinishedBytes, mergePrefix } = require('internal/encoding/util');
 
 const {
   getConstructorOf,
@@ -419,11 +419,24 @@ if (hasIntl) {
 
 const kBOMSeen = Symbol('BOM seen');
 
-let StringDecoder;
-function lazyStringDecoder() {
-  if (StringDecoder === undefined)
-    ({ StringDecoder } = require('string_decoder'));
-  return StringDecoder;
+function fixupDecodedString(res, ignoreBom, fatal, encoding) {
+  if (res.length === 0) return '';
+  if (!ignoreBom && res[0] === '\ufeff') res = StringPrototypeSlice(res, 1);
+  if (!fatal) return res.toWellFormed();
+  if (!res.isWellFormed()) throw new ERR_ENCODING_INVALID_ENCODED_DATA(encoding, undefined);
+  return res;
+}
+
+function decodeUTF16le(input, ignoreBom, fatal) {
+  return fixupDecodedString(parseInput(input).ucs2Slice(), ignoreBom, fatal, 'utf-16le');
+}
+
+function decodeUTF16be(input, ignoreBom, fatal) {
+  const be = parseInput(input);
+  const le = new FastBuffer(be.length);
+  le.set(be);
+  le.swap16();
+  return fixupDecodedString(le.ucs2Slice(), ignoreBom, fatal, 'utf-16be');
 }
 
 class TextDecoder {
@@ -446,33 +459,29 @@ class TextDecoder {
     this[kEncoding] = enc;
     this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
     this[kFatal] = Boolean(options?.fatal);
-    this[kUTF8FastPath] = false;
+    this[kUnicode] = undefined;
     this[kHandle] = undefined;
     this[kSingleByte] = undefined; // Does not care about streaming or BOM
     this[kChunk] = null; // A copy of previous streaming tail or null
 
     if (enc === 'utf-8') {
-      this[kUTF8FastPath] = true;
+      this[kUnicode] = decodeUTF8;
+      this[kBOMSeen] = false;
+    } else if (enc === 'utf-16le') {
+      this[kUnicode] = decodeUTF16le;
+      this[kBOMSeen] = false;
+    } else if (enc === 'utf-16be') {
+      this[kUnicode] = decodeUTF16be;
       this[kBOMSeen] = false;
     } else if (isSinglebyteEncoding(enc)) {
       this[kSingleByte] = createSinglebyteDecoder(enc, this[kFatal]);
-    } else {
-      this.#prepareConverter(); // Need to throw early if we don't support the encoding
-    }
-  }
-
-  #prepareConverter() {
-    if (hasIntl) {
+    } if (hasIntl) {
       let icuEncoding = this[kEncoding];
       if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
       const handle = icuGetConverter(icuEncoding, this[kFlags]);
       if (handle === undefined)
         throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
       this[kHandle] = handle;
-    } else if (this[kEncoding] === 'utf-16le') {
-      if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
-      this[kHandle] = new (lazyStringDecoder())(this[kEncoding]);
-      this[kBOMSeen] = false;
     } else {
       throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
     }
@@ -485,19 +494,19 @@ class TextDecoder {
     if (this[kSingleByte]) return this[kSingleByte](parseInput(input));
 
     const stream = options?.stream;
-    if (this[kUTF8FastPath]) {
+    if (this[kUnicode]) {
       const chunk = this[kChunk];
       const ignoreBom = this[kIgnoreBOM] || this[kBOMSeen];
       if (!stream) {
         this[kBOMSeen] = false;
-        if (!chunk) return decodeUTF8(input, ignoreBom, this[kFatal]);
+        if (!chunk) return this[kUnicode](input, ignoreBom, this[kFatal]);
       }
 
       let u = parseInput(input);
       if (u.length === 0 && stream) return ''; // no state change
       let prefix;
       if (chunk) {
-        const merged = mergePrefixUtf8(u, this[kChunk]);
+        const merged = mergePrefix(u, this[kChunk], this[kEncoding]);
         if (u.length < 3) {
           u = merged; // Might be unfinished, but fully consumed old u
         } else {
@@ -510,7 +519,7 @@ class TextDecoder {
       }
 
       if (stream) {
-        const trail = unfinishedBytesUtf8(u, u.length);
+        const trail = unfinishedBytes(u, u.length, this[kEncoding]);
         if (trail > 0) {
           this[kChunk] = new FastBuffer(u.subarray(-trail)); // copy
           if (!prefix && trail === u.length) return ''; // No further state change
@@ -519,8 +528,8 @@ class TextDecoder {
       }
 
       try {
-        const res = (prefix ? decodeUTF8(prefix, ignoreBom, this[kFatal]) : '') +
-                    decodeUTF8(u, ignoreBom || prefix, this[kFatal]);
+        const res = (prefix ? this[kUnicode](prefix, ignoreBom, this[kFatal]) : '') +
+                    this[kUnicode](u, ignoreBom || prefix, this[kFatal]);
 
         // "BOM seen" is set on the current decode call only if it did not error,
         // in "serialize I/O queue" after decoding
@@ -541,22 +550,7 @@ class TextDecoder {
       return icuDecode(this[kHandle], input, flags, this[kEncoding]);
     }
 
-    input = parseInput(input);
-
-    let result = stream ? this[kHandle].write(input) : this[kHandle].end(input);
-
-    if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
-      // If the very first result in the stream is a BOM, and we are not
-      // explicitly told to ignore it, then we discard it.
-      if (result[0] === '\ufeff') {
-        result = StringPrototypeSlice(result, 1);
-      }
-      this[kBOMSeen] = true;
-    }
-
-    if (!stream) this[kBOMSeen] = false;
-
-    return result;
+    // Unreachable
   }
 }
 
diff --git a/lib/internal/encoding/util.js b/lib/internal/encoding/util.js
index 107a0f41b5d811..80d0cb9fc3028f 100644
--- a/lib/internal/encoding/util.js
+++ b/lib/internal/encoding/util.js
@@ -7,39 +7,54 @@ const {
   Uint8Array,
 } = primordials;
 
-
 /**
  * Get a number of last bytes in an Uint8Array `data` ending at `len` that don't
  * form a codepoint yet, but can be a part of a single codepoint on more data.
- * @param {Uint8Array} data Uint8Array of potentially UTF-8 bytes
+ * @param {Uint8Array} data Uint8Array of potentially UTF-8/UTF-16 bytes
  * @param {number} len Position to look behind from
- * @returns {number} Number of unfinished potentially valid UTF-8 bytes ending at position `len`
+ * @param {string} enc Encoding to use: utf-8, utf-16le, or utf16-be
+ * @returns {number} Number (0-3) of unfinished potentially valid UTF bytes ending at position `len`
  */
-function unfinishedBytesUtf8(data, len) {
-  // 0-3
-  let pos = 0;
-  while (pos < 2 && pos < len && (data[len - pos - 1] & 0xc0) === 0x80) pos++; // Go back 0-2 trailing bytes
-  if (pos === len) return 0; // no space for lead
-  const lead = data[len - pos - 1];
-  if (lead < 0xc2 || lead > 0xf4) return 0; // not a lead
-  if (pos === 0) return 1; // Nothing to recheck, we have only lead, return it. 2-byte must return here
-  if (lead < 0xe0 || (lead < 0xf0 && pos >= 2)) return 0; // 2-byte, or 3-byte or less and we already have 2 trailing
-  const lower = lead === 0xf0 ? 0x90 : lead === 0xe0 ? 0xa0 : 0x80;
-  const upper = lead === 0xf4 ? 0x8f : lead === 0xed ? 0x9f : 0xbf;
-  const next = data[len - pos];
-  return next >= lower && next <= upper ? pos + 1 : 0;
+function unfinishedBytes(data, len, enc) {
+  switch (enc) {
+    case 'utf-8': {
+      // 0-3
+      let pos = 0;
+      while (pos < 2 && pos < len && (data[len - pos - 1] & 0xc0) === 0x80) pos++; // Go back 0-2 trailing bytes
+      if (pos === len) return 0; // no space for lead
+      const lead = data[len - pos - 1];
+      if (lead < 0xc2 || lead > 0xf4) return 0; // not a lead
+      if (pos === 0) return 1; // Nothing to recheck, we have only lead, return it. 2-byte must return here
+      if (lead < 0xe0 || (lead < 0xf0 && pos >= 2)) return 0; // 2-byte, 3-byte or less and we already have 2 trailing
+      const lower = lead === 0xf0 ? 0x90 : lead === 0xe0 ? 0xa0 : 0x80;
+      const upper = lead === 0xf4 ? 0x8f : lead === 0xed ? 0x9f : 0xbf;
+      const next = data[len - pos];
+      return next >= lower && next <= upper ? pos + 1 : 0;
+    }
+
+    case 'utf-16le':
+    case 'utf-16be': {
+      // 0-3
+      const uneven = len % 2; // Uneven byte length adds 1
+      if (len < 2) return uneven;
+      const l = len - uneven - 1;
+      const last = enc === 'utf-16le' ? (data[l] << 8) ^ data[l - 1] : (data[l - 1] << 8) ^ data[l];
+      return last >= 0xd8_00 && last < 0xdc_00 ? uneven + 2 : uneven; // lone lead adds 2
+    }
+  }
 }
 
 /**
  * Merge prefix `chunk` with `data` and return new combined prefix.
  * For data.length < 3, fully consumes data and can return unfinished data,
  * otherwise returns a prefix with no unfinished bytes
- * @param {Uint8Array} data Uint8Array of potentially UTF-8 bytes
+ * @param {Uint8Array} data Uint8Array of potentially UTF-8/UTF-16 bytes
  * @param {Uint8Array} chunk Prefix to prepend before `data`
+ * @param {string} enc Encoding to use: utf-8, utf-16le, or utf16-be
  * @returns {Uint8Array} If data.length >= 3: an Uint8Array containing `chunk` and a slice of `data`
- *   so that the result has no unfinished UTF-8 codepoints. If data.length < 3: concat(chunk, data).
+ *   so that the result has no unfinished codepoints. If data.length < 3: concat(chunk, data).
  */
-function mergePrefixUtf8(data, chunk) {
+function mergePrefix(data, chunk, enc) {
   if (data.length === 0) return chunk;
   if (data.length < 3) {
     // No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
@@ -57,7 +72,7 @@ function mergePrefixUtf8(data, chunk) {
   // Stop at the first offset where unfinished bytes reaches 0 or fits into data
   // If that doesn't happen (data too short), just concat chunk and data completely (above)
   for (let i = 1; i <= 3; i++) {
-    const unfinished = unfinishedBytesUtf8(temp, chunk.length + i); // 0-3
+    const unfinished = unfinishedBytes(temp, chunk.length + i, enc); // 0-3
     if (unfinished <= i) {
       // Always reachable at 3, but we still need 'unfinished' value for it
       const add = i - unfinished; // 0-3
@@ -69,4 +84,4 @@ function mergePrefixUtf8(data, chunk) {
   return null;
 }
 
-module.exports = { unfinishedBytesUtf8, mergePrefixUtf8 };
+module.exports = { unfinishedBytes, mergePrefix };
diff --git a/test/parallel/test-whatwg-encoding-custom-textdecoder.js b/test/parallel/test-whatwg-encoding-custom-textdecoder.js
index 9734825b6b27a5..10ef410f5bf77b 100644
--- a/test/parallel/test-whatwg-encoding-custom-textdecoder.js
+++ b/test/parallel/test-whatwg-encoding-custom-textdecoder.js
@@ -101,7 +101,7 @@ assert(TextDecoder);
 }
 
 // Test TextDecoder, UTF-16be
-if (common.hasIntl) {
+{
   const dec = new TextDecoder('utf-16be');
   const res = dec.decode(Buffer.from('test€', 'utf-16le').swap16());
   assert.strictEqual(res, 'test€');