Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 49 additions & 42 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,27 @@ const {
StringPrototypeSlice,
Symbol,
SymbolToStringTag,
Uint8Array,
} = primordials;

const { FastBuffer } = require('internal/buffer');

const {
ERR_ENCODING_NOT_SUPPORTED,
ERR_INVALID_ARG_TYPE,
ERR_INVALID_THIS,
ERR_NO_ICU,
} = require('internal/errors').codes;
const kMethod = Symbol('method');
const kHandle = Symbol('handle');
const kFlags = Symbol('flags');
const kEncoding = Symbol('encoding');
const kDecoder = Symbol('decoder');
const kFatal = Symbol('kFatal');
const kUTF8FastPath = Symbol('kUTF8FastPath');
const kWindows1252FastPath = Symbol('kWindows1252FastPath');
const kIgnoreBOM = Symbol('kIgnoreBOM');

const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte');

const {
getConstructorOf,
customInspectSymbol: inspect,
Expand All @@ -55,11 +58,8 @@ const {
encodeIntoResults,
encodeUtf8String,
decodeUTF8,
decodeWindows1252,
} = binding;

const { Buffer } = require('buffer');

function validateDecoder(obj) {
if (obj == null || obj[kDecoder] !== true)
throw new ERR_INVALID_THIS('TextDecoder');
Expand All @@ -69,7 +69,7 @@ const CONVERTER_FLAGS_FLUSH = 0x1;
const CONVERTER_FLAGS_FATAL = 0x2;
const CONVERTER_FLAGS_IGNORE_BOM = 0x4;

const empty = new Uint8Array(0);
const empty = new FastBuffer();

const encodings = new SafeMap([
['unicode-1-1-utf-8', 'utf-8'],
Expand Down Expand Up @@ -387,6 +387,24 @@ ObjectDefineProperties(
[SymbolToStringTag]: { __proto__: null, configurable: true, value: 'TextEncoder' },
});

function parseInput(input) {
if (isAnyArrayBuffer(input)) {
try {
return new FastBuffer(input);
} catch {
return empty;
}
} else if (isArrayBufferView(input)) {
try {
return new FastBuffer(input.buffer, input.byteOffset, input.byteLength);
} catch {
return empty;
}
} else {
throw new ERR_INVALID_ARG_TYPE('input', ['ArrayBuffer', 'ArrayBufferView'], input);
}
}

const TextDecoder =
internalBinding('config').hasIntl ?
makeTextDecoderICU() :
Expand Down Expand Up @@ -420,10 +438,12 @@ function makeTextDecoderICU() {
this[kFatal] = Boolean(options?.fatal);
// Only support fast path for UTF-8.
this[kUTF8FastPath] = enc === 'utf-8';
this[kWindows1252FastPath] = enc === 'windows-1252';
this[kHandle] = undefined;
this[kMethod] = undefined;

if (!this[kUTF8FastPath] && !this[kWindows1252FastPath]) {
if (isSinglebyteEncoding(this.encoding)) {
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
} else if (!this[kUTF8FastPath]) {
this.#prepareConverter();
}
}
Expand All @@ -438,22 +458,18 @@ function makeTextDecoderICU() {

decode(input = empty, options = kEmptyObject) {
validateDecoder(this);
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

if (this[kMethod]) return this[kMethod](parseInput(input));

this[kUTF8FastPath] &&= !(options?.stream);
this[kWindows1252FastPath] &&= !(options?.stream);

if (this[kUTF8FastPath]) {
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
}

if (this[kWindows1252FastPath]) {
return decodeWindows1252(input, this[kIgnoreBOM], this[kFatal]);
}

this.#prepareConverter();

validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

let flags = 0;
if (options !== null)
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
Expand Down Expand Up @@ -485,47 +501,40 @@ function makeTextDecoderJS() {
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

const enc = getEncodingFromLabel(encoding);
if (enc === undefined || !hasConverter(enc))
if (enc === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);

let flags = 0;
if (options !== null) {
if (options.fatal) {
throw new ERR_NO_ICU('"fatal" option');
}
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
}

this[kDecoder] = true;
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
this[kHandle] = new (lazyStringDecoder())(enc);
this[kFlags] = flags;
this[kEncoding] = enc;
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
this[kFatal] = Boolean(options?.fatal);
this[kBOMSeen] = false;
this[kMethod] = undefined;

if (isSinglebyteEncoding(enc)) {
this[kMethod] = createSinglebyteDecoder(enc, this[kFatal]);
} else {
if (!hasConverter(enc)) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
this[kHandle] = new (lazyStringDecoder())(enc);
}
}

decode(input = empty, options = kEmptyObject) {
validateDecoder(this);
if (isAnyArrayBuffer(input)) {
try {
input = Buffer.from(input);
} catch {
input = empty;
}
} else if (isArrayBufferView(input)) {
try {
input = Buffer.from(input.buffer, input.byteOffset,
input.byteLength);
} catch {
input = empty;
}
} else {
throw new ERR_INVALID_ARG_TYPE('input',
['ArrayBuffer', 'ArrayBufferView'],
input);
}
input = parseInput(input);
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

if (this[kMethod]) return this[kMethod](input);

if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
this[kBOMSeen] = false;
}
Expand All @@ -540,9 +549,7 @@ function makeTextDecoderJS() {
this[kHandle].end(input) :
this[kHandle].write(input);

if (result.length > 0 &&
!this[kBOMSeen] &&
!(this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM)) {
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
// If the very first result in the stream is a BOM, and we are not
// explicitly told to ignore it, then we discard it.
if (result[0] === '\ufeff') {
Expand Down
155 changes: 155 additions & 0 deletions lib/internal/encoding/single-byte.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
// Simplified version extracted from https://npmjs.com/package/@exodus/bytes codepath for 1-byte encodings
// Copyright Exodus Movement. Licensed under MIT License.

'use strict';

const {
Array,
ArrayPrototypeFill,
ObjectKeys,
ObjectPrototypeHasOwnProperty,
SafeArrayIterator,
SafeMap,
SafeSet,
StringPrototypeIncludes,
TypedArrayFrom,
TypedArrayOf,
TypedArrayPrototypeIncludes,
TypedArrayPrototypeSet,
Uint16Array,
} = primordials;

const { isAscii } = require('buffer');

const { FastBuffer } = require('internal/buffer');

const {
ERR_ENCODING_NOT_SUPPORTED,
ERR_ENCODING_INVALID_ENCODED_DATA,
} = require('internal/errors').codes;

const isBigEndian = new FastBuffer(TypedArrayOf(Uint16Array, 258).buffer)[1] === 2;

const it = (x) => new SafeArrayIterator(x);

/* fallback/single-byte.encodings.js */

const r = 0xfffd;
const e = (x) => it(ArrayPrototypeFill(new Array(x), 1));
const h = (x) => it(ArrayPrototypeFill(new Array(x), r));

/* eslint-disable @stylistic/js/max-len */

// Index tables from https://encoding.spec.whatwg.org/#legacy-single-byte-encodings
// Each table in the spec lists only mapping from byte 0x80 onwards, as below that they are all ASCII and mapped as idenity
// Here, 0xfffd (replacement charcode) designates a hole (unmapped offset), as not all encodings map all offsets
// All other numbers are deltas from the last seen mapped value, starting with 0x7f (127, highest ASCII)
// Thus, [0x80, 0x81, , 0x83] is stored as [1, 1, r, 2]
// Truncation (length < 128) means that all remaining ones are mapped as identity (offset i => codepoint i), not unmapped
const encodings = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without documentation this is really not maintainable. At the very least, this needs some comments about what these numbers are, how they are determined, etc.

Copy link
Member Author

@ChALkeR ChALkeR Dec 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jasnell A test will cover that explanation, which is why the method to get those is exported
I'll add that test

It will also be able to reproduce those from the spec data (albeit without the common chunks extraction, but that is trivially maintainable e.g. by just omitting that)

Copy link
Member Author

@ChALkeR ChALkeR Dec 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just the tables for 0x80+ ranges from the spec, as e.g. https://encoding.spec.whatwg.org/index-iso-8859-6.txt, with two adjustments for compactness and ease-of-use:

  1. 0xfffd designates a hole (unmapped offset), those tables are non-continuous
  2. All other values are deltas from the previous seen entry, the first one is delta from 0x7f
    This is because values 0x00-0x7f are always mapped as identity and are not even present in spec tables

E.g. if the table in the spec says

0 0x0080
1 0x0081
5 0x0085
6 0x0086
7 0x0087

The vector is [1, 1, f, f, f, 4, 1, 1] (with f being 0xfffd)

I'll document that in the testcase/generator

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I understand what's happening here but really the comments/explanation needs to be in the source here. I don't think placing the explanation in the test is correct.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.
Also I removed all the common ranges logic to make this easier, size doesn't matter that much here

'__proto__': null,
'ibm866': [913, ...e(47), 8530, 1, 1, -145, 34, 61, 1, -12, -1, 14, -18, 6, 6, -1, -1, -75, 4, 32, -8, -16, -28, 60, 34, 1, -5, -6, 21, -3, -6, -16, 28, -5, 1, -4, 1, -12, -1, -6, 1, 24, -1, -82, -12, 124, -4, 8, 4, -16, -8512, ...e(15), -78, 80, -77, 80, -77, 80, -73, 80, -942, 8553, -8546, 8547, -260, -8306, 9468, -9472],
'iso-8859-10': [...e(33), 100, 14, 16, 8, -2, 14, -143, 148, -43, 80, 6, 23, -208, 189, -32, -154, 85, 14, 16, 8, -2, 14, -128, 133, -43, 80, 6, 23, 7831, -7850, -32, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 1, 1, 117, 7, -121, 1, 1, 1, 146, -144, 154, -152, ...e(5), 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 1, 1, 86, 7, -90, 1, 1, 1, 115, -113, 123, -121, 1, 1, 1, 1, 58],
'iso-8859-13': [...e(33), 8061, -8059, 1, 1, 8058, -8056, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, 1, 1, 1, 8041, -8039, 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 7835],
'iso-8859-14': [...e(33), 7522, 1, -7520, 103, 1, 7423, -7523, 7641, -7639, 7641, -119, 231, -7749, 1, 202, 7334, 1, -7423, 1, 7455, 1, -7563, 7584, 43, -42, 44, -35, 147, -111, 1, -36, -7585, ...e(15), 165, -163, ...e(5), 7572, -7570, ...e(5), 153, -151, ...e(16), 134, -132, ...e(5), 7541, -7539, ...e(5), 122],
'iso-8859-15': [...e(33), 1, 1, 1, 8201, -8199, 187, -185, 186, -184, ...e(10), 202, -200, 1, 1, 199, -197, 1, 1, 151, 1, 37],
'iso-8859-16': [...e(33), 100, 1, 60, 8043, -142, -7870, -185, 186, -184, 367, -365, 206, -204, 205, 1, -203, 1, 91, 54, 59, 7840, -8039, 1, 199, -113, 268, -350, 151, 1, 37, 4, -188, 1, 1, 64, -62, 66, -64, ...e(9), 65, 51, -113, 1, 1, 124, -122, 132, 22, -151, 1, 1, 1, 60, 258, -315, 1, 1, 1, 33, -31, 35, -33, ...e(9), 34, 51, -82, 1, 1, 93, -91, 101, 22, -120, 1, 1, 1, 29, 258],
'iso-8859-2': [...e(33), 100, 468, -407, -157, 153, 29, -179, 1, 184, -2, 6, 21, -204, 208, -2, -203, 85, 470, -409, -142, 138, 29, 364, -527, 169, -2, 6, 21, 355, -351, -2, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374],
'iso-8859-3': [...e(33), 134, 434, -565, 1, r, 128, -125, 1, 136, 46, -64, 22, -135, r, 206, -203, 119, -117, 1, 1, 1, 112, -110, 1, 121, 46, -64, 22, -120, r, 191, -188, 1, 1, r, 2, 70, -2, -65, ...e(8), r, 2, 1, 1, 1, 76, -74, 1, 69, -67, 1, 1, 1, 144, -16, -125, 1, 1, 1, r, 2, 39, -2, -34, ...e(8), r, 2, 1, 1, 1, 45, -43, 1, 38, -36, 1, 1, 1, 113, -16, 380],
'iso-8859-4': [...e(33), 100, 52, 30, -178, 132, 19, -148, 1, 184, -78, 16, 68, -185, 208, -206, 1, 85, 470, -388, -163, 117, 19, 395, -527, 169, -78, 16, 68, -29, 52, -51, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 92, -26, 53, 7, -22, -98, 1, 1, 1, 1, 154, -152, 1, 1, 140, 2, -139, 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 61, -26, 53, 7, -22, -67, 1, 1, 1, 1, 123, -121, 1, 1, 109, 2, 366],
'iso-8859-5': [...e(33), 865, ...e(11), -863, 865, ...e(65), 7367, -7365, ...e(11), -949, 951, 1],
'iso-8859-6': [...e(33), r, r, r, 4, ...h(7), 1384, -1375, ...h(13), 1390, r, r, r, 4, r, 2, ...e(25), r, r, r, r, r, 6, ...e(18), ...h(13)],
'iso-8859-7': [...e(33), 8056, 1, -8054, 8201, 3, -8201, 1, 1, 1, 721, -719, 1, 1, r, 8040, -8037, 1, 1, 1, 721, 1, 1, -719, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r],
'iso-8859-8': [...e(33), r, 2, ...e(7), 46, -44, ...e(14), 62, -60, 1, 1, 1, ...h(32), 8025, -6727, ...e(26), r, r, 6692, 1, r],
'koi8-r': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 8450, ...e(14), -8544, 8545, ...e(10), -9411, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3],
'koi8-u': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 3, 8448, -8446, 1, 8448, 1, 1, 1, 1, -8394, -51, 8448, 1, 1, 1, -8544, 3, 8543, -8541, 1, 8543, 1, 1, 1, 1, -8410, -130, -869, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3],
'macintosh': [69, 1, 2, 2, 8, 5, 6, 5, -1, 2, 2, -1, 2, 2, 2, -1, 2, 1, 2, -1, 2, 1, 2, 2, -1, 2, 2, -1, 5, -1, 2, 1, 7972, -8048, -14, 1, 4, 8059, -8044, 41, -49, -5, 8313, -8302, -12, 8632, -8602, 18, 8518, -8557, 8627, 1, -8640, 16, 8525, 15, -2, -7759, 7787, -8577, 16, 751, -707, 18, -57, -30, 11, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 32, 3, 18, 125, 1, 7872, 1, 8, 1, -5, 1, -7970, 9427, -9419, 121, 7884, 104, -115, 1, 56007, 1, -56033, -8042, 8035, 4, 18, -8046, 8, -9, 10, -3, 5, 1, 1, -3, 7, 1, 63531, -63533, 8, 1, -2, 88, 405, 22, -557, 553, 1, 1, -546, 549, -2, -20],
'windows-1250': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -7888, 7897, -7903, 10, 25, -4, -233, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8129, 7897, -7903, 10, 25, -4, -218, 551, 17, -407, -157, 96, -94, 1, 1, 1, 181, -179, 1, 1, 1, 205, -203, 1, 554, -409, -142, 1, 1, 1, 1, 77, 90, -164, 130, 416, -415, 62, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374],
'windows-1251': [899, 1, 7191, -7111, 7115, 8, -6, 1, 139, -124, -7207, 7216, -7215, 2, -1, 4, 67, 7110, 1, 3, 1, 5, -15, 1, -8060, 8330, -7369, 7137, -7136, 2, -1, 4, -959, 878, 80, -86, -868, 1004, -1002, 1, 858, -856, 859, -857, 1, 1, 1, 857, -855, 1, 853, 80, 59, -988, 1, 1, 922, 7365, -7362, -921, 925, -83, 80, 2, -71, ...e(63)],
'windows-1252': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 240, -238, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 225, -6],
'windows-1253': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 1, 1, 1, 1, 741, 1, -739, 1, 1, 1, 1, 1, 1, r, 2, 1, 1, 1, 8039, -8037, 1, 1, 1, 721, -719, 1, 1, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r],
'windows-1254': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 1, 218, -216, ...e(47), 79, -77, ...e(11), 84, 46, -127, ...e(16), 48, -46, ...e(11), 53, 46],
'windows-1255': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -8094, ...e(7), 8199, -8197, 1, 1, 1, 1, 46, -44, ...e(14), 62, -60, 1, 1, 1, 1, 1265, ...e(19), 45, 1, 1, 1, 1, ...h(7), -36, ...e(26), r, r, 6692, 1, r],
'windows-1256': [8237, -6702, 6556, -7816, 7820, 8, -6, 1, -7515, 7530, -6583, 6592, -7911, 1332, 18, -16, 39, 6505, 1, 3, 1, 5, -15, 1, -6507, 6777, -6801, 6569, -7911, 7865, 1, -6483, -1562, 1388, -1386, ...e(7), 1557, -1555, ...e(14), 1378, -1376, 1, 1, 1, 1377, 162, -160, ...e(21), -1375, 1376, 1, 1, 1, 6, 1, 1, 1, -1379, 1380, -1378, 1379, 1, 1, 1, -1377, 1, 1, 1, 1, 1374, 1, -1372, 1, 1372, 1, 1, 1, -1370, 1371, 1, -1369, 1370, -1368, 1369, -1367, 1, 7954, 1, -6461],
'windows-1257': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 28, 543, -527, -40, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 19, 556, -572, 1, r, 2, 1, 1, r, 2, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, ...e(5), 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 347],
'windows-1258': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -7911, -182, 1, 218, -216, ...e(34), 64, -62, ...e(7), 565, -563, 1, 1, 65, -63, 568, -566, 1, 204, -202, 1, 1, 1, 1, 1, 1, 211, 340, -548, 1, 1, 1, 33, -31, ...e(7), 534, -532, 1, 1, 34, -32, 562, -560, 1, 173, -171, 1, 1, 1, 1, 1, 1, 180, 7931],
'windows-874': [8237, -8235, 1, 1, 1, 8098, -8096, ...e(10), 8072, 1, 3, 1, 5, -15, 1, -8060, ...e(8), 3425, ...e(57), r, r, r, r, 5, ...e(28), r, r, r, r],
'x-mac-cyrillic': [913, ...e(31), 7153, -8048, 992, -1005, 4, 8059, -8044, 848, -856, -5, 8313, -7456, 80, 7694, -7773, 80, 7627, -8557, 8627, 1, -7695, -929, 988, -137, -4, 80, -77, 80, -78, 80, -79, 80, -2, -83, -857, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 875, 80, -79, 80, -7, 7102, 1, 8, 1, -5, 1, -7970, 7975, -7184, 80, -79, 80, 7351, -7445, 80, -2, -31, ...e(30), 7262],
};

/* eslint-enable @stylistic/js/max-len */

/* fallback/single-byte.js + single-byte.node.js, simplified */

const l256 = { __proto__: null, length: 256 };

function getEncoding(encoding) {
if (encoding === 'x-user-defined') {
// https://encoding.spec.whatwg.org/#x-user-defined-decoder, 14.5.1. x-user-defined decoder
return TypedArrayFrom(Uint16Array, l256, (_, i) => (i >= 0x80 ? 0xf700 + i : i));
}

if (!ObjectPrototypeHasOwnProperty(encodings, encoding)) {
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
}

const map = TypedArrayFrom(Uint16Array, l256, (_, i) => i); // Unicode subset
let prev = 127;
map.set(TypedArrayFrom(Uint16Array, it(encodings[encoding]), (x) => (x === r ? x : (prev += x))), 128);
return map;
}

const supported = new SafeSet(it(ObjectKeys(encodings))).add('iso-8859-8-i').add('x-user-defined');
const isSinglebyteEncoding = (enc) => supported.has(enc);

const decodersLoose = new SafeMap();
const decodersFatal = new SafeMap();

function createSinglebyteDecoder(encoding, fatal) {
const id = encoding === 'iso-8859-8-i' ? 'iso-8859-8' : encoding;
const decoders = fatal ? decodersFatal : decodersLoose;
const cached = decoders.get(id);
if (cached) return cached;

const map = getEncoding(id);
const incomplete = TypedArrayPrototypeIncludes(map, r);

// Expects type-checked Buffer input
const decoder = (buf) => {
if (buf.byteLength === 0) return '';
if (isAscii(buf)) return buf.latin1Slice(); // .latin1Slice is faster than .asciiSlice
const o = new Uint16Array(buf.length);
TypedArrayPrototypeSet(o, buf); // Copy to modify in-place, also those are 16-bit now

let i = 0;
for (const end7 = o.length - 7; i < end7; i += 8) {
o[i] = map[o[i]];
o[i + 1] = map[o[i + 1]];
o[i + 2] = map[o[i + 2]];
o[i + 3] = map[o[i + 3]];
o[i + 4] = map[o[i + 4]];
o[i + 5] = map[o[i + 5]];
o[i + 6] = map[o[i + 6]];
o[i + 7] = map[o[i + 7]];
}

for (const end = o.length; i < end; i++) o[i] = map[o[i]];

const b = new FastBuffer(o.buffer, o.byteOffset, o.byteLength);
if (isBigEndian) b.swap16();
const string = b.ucs2Slice();
if (fatal && incomplete && StringPrototypeIncludes(string, '\uFFFD')) {
throw new ERR_ENCODING_INVALID_ENCODED_DATA(encoding, undefined);
}
return string;
};

decoders.set(id, decoder);
return decoder;
}

module.exports = {
isSinglebyteEncoding,
createSinglebyteDecoder,
getEncoding, // for tests
};
Loading
Loading