Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 83 additions & 34 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,34 @@
// https://encoding.spec.whatwg.org

const {
ArrayPrototypeMap,
Boolean,
ObjectDefineProperties,
ObjectGetOwnPropertyDescriptors,
ObjectSetPrototypeOf,
ObjectValues,
SafeArrayIterator,
SafeMap,
StringPrototypeSlice,
Symbol,
SymbolToStringTag,
Uint8Array,
} = primordials;

const { FastBuffer } = require('internal/buffer');

const {
ERR_ENCODING_NOT_SUPPORTED,
ERR_INVALID_ARG_TYPE,
ERR_INVALID_THIS,
ERR_NO_ICU,
} = require('internal/errors').codes;
const kMethod = Symbol('method');
const kHandle = Symbol('handle');
const kFlags = Symbol('flags');
const kEncoding = Symbol('encoding');
const kDecoder = Symbol('decoder');
const kFatal = Symbol('kFatal');
const kUTF8FastPath = Symbol('kUTF8FastPath');
const kWindows1252FastPath = Symbol('kWindows1252FastPath');
const kIgnoreBOM = Symbol('kIgnoreBOM');

const {
Expand All @@ -55,11 +58,9 @@ const {
encodeIntoResults,
encodeUtf8String,
decodeUTF8,
decodeWindows1252,
decodeSingleByte,
} = binding;

const { Buffer } = require('buffer');

function validateDecoder(obj) {
if (obj == null || obj[kDecoder] !== true)
throw new ERR_INVALID_THIS('TextDecoder');
Expand All @@ -69,7 +70,49 @@ const CONVERTER_FLAGS_FLUSH = 0x1;
const CONVERTER_FLAGS_FATAL = 0x2;
const CONVERTER_FLAGS_IGNORE_BOM = 0x4;

const empty = new Uint8Array(0);
const empty = new FastBuffer();

// Has to be synced with src/
const namesSinglebyte = [
'ibm866',
'koi8-r',
'koi8-u',
'macintosh',
'x-mac-cyrillic',
'iso-8859-2',
'iso-8859-3',
'iso-8859-4',
'iso-8859-5',
'iso-8859-6',
'iso-8859-7',
'iso-8859-8',
'iso-8859-8-i',
'iso-8859-10',
'iso-8859-13',
'iso-8859-14',
'iso-8859-15',
'iso-8859-16',
'windows-874',
'windows-1250',
'windows-1251',
'windows-1252',
'windows-1253',
'windows-1254',
'windows-1255',
'windows-1256',
'windows-1257',
'windows-1258',
'x-user-defined', // Has to be last, special case
];

const encodingsSinglebyte = new SafeMap(new SafeArrayIterator(ArrayPrototypeMap(namesSinglebyte, (e, i) => [e, i])));
const isSinglebyteEncoding = (enc) => encodingsSinglebyte.has(enc);

function createSinglebyteDecoder(encoding, fatal) {
const key = encodingsSinglebyte.get(encoding);
if (key === undefined) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
return (buf) => decodeSingleByte(buf, key, fatal);
}

const encodings = new SafeMap([
['unicode-1-1-utf-8', 'utf-8'],
Expand Down Expand Up @@ -387,6 +430,24 @@ ObjectDefineProperties(
[SymbolToStringTag]: { __proto__: null, configurable: true, value: 'TextEncoder' },
});

function parseInput(input) {
if (isAnyArrayBuffer(input)) {
try {
return new FastBuffer(input);
} catch {
return empty;
}
} else if (isArrayBufferView(input)) {
try {
return new FastBuffer(input.buffer, input.byteOffset, input.byteLength);
} catch {
return empty;
}
} else {
throw new ERR_INVALID_ARG_TYPE('input', ['ArrayBuffer', 'ArrayBufferView'], input);
}
}

const TextDecoder =
internalBinding('config').hasIntl ?
makeTextDecoderICU() :
Expand Down Expand Up @@ -420,10 +481,12 @@ function makeTextDecoderICU() {
this[kFatal] = Boolean(options?.fatal);
// Only support fast path for UTF-8.
this[kUTF8FastPath] = enc === 'utf-8';
this[kWindows1252FastPath] = enc === 'windows-1252';
this[kHandle] = undefined;
this[kMethod] = undefined;

if (!this[kUTF8FastPath] && !this[kWindows1252FastPath]) {
if (isSinglebyteEncoding(this.encoding)) {
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
} else if (!this[kUTF8FastPath]) {
this.#prepareConverter();
}
}
Expand All @@ -438,22 +501,18 @@ function makeTextDecoderICU() {

decode(input = empty, options = kEmptyObject) {
validateDecoder(this);
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

if (this[kMethod]) return this[kMethod](input);

this[kUTF8FastPath] &&= !(options?.stream);
this[kWindows1252FastPath] &&= !(options?.stream);

if (this[kUTF8FastPath]) {
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
}

if (this[kWindows1252FastPath]) {
return decodeWindows1252(input, this[kIgnoreBOM], this[kFatal]);
}

this.#prepareConverter();

validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

let flags = 0;
if (options !== null)
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
Expand All @@ -476,7 +535,7 @@ function makeTextDecoderJS() {
const kBOMSeen = Symbol('BOM seen');

function hasConverter(encoding) {
return encoding === 'utf-8' || encoding === 'utf-16le';
return encoding === 'utf-8' || encoding === 'utf-16le' || isSinglebyteEncoding(encoding);
}

class TextDecoder {
Expand All @@ -502,30 +561,20 @@ function makeTextDecoderJS() {
this[kFlags] = flags;
this[kEncoding] = enc;
this[kBOMSeen] = false;
this[kMethod] = undefined;

if (isSinglebyteEncoding(this.encoding)) {
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
}
}

decode(input = empty, options = kEmptyObject) {
validateDecoder(this);
if (isAnyArrayBuffer(input)) {
try {
input = Buffer.from(input);
} catch {
input = empty;
}
} else if (isArrayBufferView(input)) {
try {
input = Buffer.from(input.buffer, input.byteOffset,
input.byteLength);
} catch {
input = empty;
}
} else {
throw new ERR_INVALID_ARG_TYPE('input',
['ArrayBuffer', 'ArrayBufferView'],
input);
}
input = parseInput(input);
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

if (this[kMethod]) return this[kMethod](input);

if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
this[kBOMSeen] = false;
}
Expand Down
Loading