Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 49 additions & 42 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,27 @@ const {
StringPrototypeSlice,
Symbol,
SymbolToStringTag,
Uint8Array,
} = primordials;

const { FastBuffer } = require('internal/buffer');

const {
ERR_ENCODING_NOT_SUPPORTED,
ERR_INVALID_ARG_TYPE,
ERR_INVALID_THIS,
ERR_NO_ICU,
} = require('internal/errors').codes;
const kMethod = Symbol('method');
const kHandle = Symbol('handle');
const kFlags = Symbol('flags');
const kEncoding = Symbol('encoding');
const kDecoder = Symbol('decoder');
const kFatal = Symbol('kFatal');
const kUTF8FastPath = Symbol('kUTF8FastPath');
const kWindows1252FastPath = Symbol('kWindows1252FastPath');
const kIgnoreBOM = Symbol('kIgnoreBOM');

const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte');

const {
getConstructorOf,
customInspectSymbol: inspect,
Expand All @@ -55,11 +58,8 @@ const {
encodeIntoResults,
encodeUtf8String,
decodeUTF8,
decodeWindows1252,
} = binding;

const { Buffer } = require('buffer');

function validateDecoder(obj) {
if (obj == null || obj[kDecoder] !== true)
throw new ERR_INVALID_THIS('TextDecoder');
Expand All @@ -69,7 +69,7 @@ const CONVERTER_FLAGS_FLUSH = 0x1;
const CONVERTER_FLAGS_FATAL = 0x2;
const CONVERTER_FLAGS_IGNORE_BOM = 0x4;

const empty = new Uint8Array(0);
const empty = new FastBuffer();

const encodings = new SafeMap([
['unicode-1-1-utf-8', 'utf-8'],
Expand Down Expand Up @@ -387,6 +387,24 @@ ObjectDefineProperties(
[SymbolToStringTag]: { __proto__: null, configurable: true, value: 'TextEncoder' },
});

function parseInput(input) {
if (isAnyArrayBuffer(input)) {
try {
return new FastBuffer(input);
} catch {
return empty;
}
} else if (isArrayBufferView(input)) {
try {
return new FastBuffer(input.buffer, input.byteOffset, input.byteLength);
} catch {
return empty;
}
} else {
throw new ERR_INVALID_ARG_TYPE('input', ['ArrayBuffer', 'ArrayBufferView'], input);
}
}

const TextDecoder =
internalBinding('config').hasIntl ?
makeTextDecoderICU() :
Expand Down Expand Up @@ -420,10 +438,12 @@ function makeTextDecoderICU() {
this[kFatal] = Boolean(options?.fatal);
// Only support fast path for UTF-8.
this[kUTF8FastPath] = enc === 'utf-8';
this[kWindows1252FastPath] = enc === 'windows-1252';
this[kHandle] = undefined;
this[kMethod] = undefined;

if (!this[kUTF8FastPath] && !this[kWindows1252FastPath]) {
if (isSinglebyteEncoding(this.encoding)) {
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
} else if (!this[kUTF8FastPath]) {
this.#prepareConverter();
}
}
Expand All @@ -438,22 +458,18 @@ function makeTextDecoderICU() {

decode(input = empty, options = kEmptyObject) {
validateDecoder(this);
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

if (this[kMethod]) return this[kMethod](parseInput(input));

this[kUTF8FastPath] &&= !(options?.stream);
this[kWindows1252FastPath] &&= !(options?.stream);

if (this[kUTF8FastPath]) {
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
}

if (this[kWindows1252FastPath]) {
return decodeWindows1252(input, this[kIgnoreBOM], this[kFatal]);
}

this.#prepareConverter();

validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

let flags = 0;
if (options !== null)
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
Expand Down Expand Up @@ -485,47 +501,40 @@ function makeTextDecoderJS() {
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

const enc = getEncodingFromLabel(encoding);
if (enc === undefined || !hasConverter(enc))
if (enc === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);

let flags = 0;
if (options !== null) {
if (options.fatal) {
throw new ERR_NO_ICU('"fatal" option');
}
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
}

this[kDecoder] = true;
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
this[kHandle] = new (lazyStringDecoder())(enc);
this[kFlags] = flags;
this[kEncoding] = enc;
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
this[kFatal] = Boolean(options?.fatal);
this[kBOMSeen] = false;
this[kMethod] = undefined;

if (isSinglebyteEncoding(enc)) {
this[kMethod] = createSinglebyteDecoder(enc, this[kFatal]);
} else {
if (!hasConverter(enc)) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
this[kHandle] = new (lazyStringDecoder())(enc);
}
}

decode(input = empty, options = kEmptyObject) {
validateDecoder(this);
if (isAnyArrayBuffer(input)) {
try {
input = Buffer.from(input);
} catch {
input = empty;
}
} else if (isArrayBufferView(input)) {
try {
input = Buffer.from(input.buffer, input.byteOffset,
input.byteLength);
} catch {
input = empty;
}
} else {
throw new ERR_INVALID_ARG_TYPE('input',
['ArrayBuffer', 'ArrayBufferView'],
input);
}
input = parseInput(input);
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

if (this[kMethod]) return this[kMethod](input);

if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
this[kBOMSeen] = false;
}
Expand All @@ -540,9 +549,7 @@ function makeTextDecoderJS() {
this[kHandle].end(input) :
this[kHandle].write(input);

if (result.length > 0 &&
!this[kBOMSeen] &&
!(this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM)) {
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
// If the very first result in the stream is a BOM, and we are not
// explicitly told to ignore it, then we discard it.
if (result[0] === '\ufeff') {
Expand Down
155 changes: 155 additions & 0 deletions lib/internal/encoding/single-byte.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
// Simplified version extracted from https://npmjs.com/package/@exodus/bytes codepath for 1-byte encodings
// Copyright Exodus Movement. Licensed under MIT License.

'use strict';

const {
Array,
ArrayPrototypeFill,
ObjectKeys,
ObjectPrototypeHasOwnProperty,
SafeArrayIterator,
SafeMap,
SafeSet,
StringPrototypeIncludes,
TypedArrayFrom,
TypedArrayOf,
TypedArrayPrototypeIncludes,
TypedArrayPrototypeSet,
Uint16Array,
} = primordials;

const { isAscii } = require('buffer');

const { FastBuffer } = require('internal/buffer');

const {
ERR_ENCODING_NOT_SUPPORTED,
ERR_ENCODING_INVALID_ENCODED_DATA,
} = require('internal/errors').codes;

const isBigEndian = new FastBuffer(TypedArrayOf(Uint16Array, 258).buffer)[1] === 2;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: we already check this here, I wonder if it'd be better to make this an internal export?


const it = (x) => new SafeArrayIterator(x);

/* fallback/single-byte.encodings.js */

const r = 0xfffd;
const e = (x) => it(ArrayPrototypeFill(new Array(x), 1));
const h = (x) => it(ArrayPrototypeFill(new Array(x), r));

/* eslint-disable @stylistic/js/max-len */

// Index tables from https://encoding.spec.whatwg.org/#legacy-single-byte-encodings
// Each table in the spec lists only mapping from byte 0x80 onwards, as below that they are all ASCII and mapped as idenity
// Here, 0xfffd (replacement charcode) designates a hole (unmapped offset), as not all encodings map all offsets
// All other numbers are deltas from the last seen mapped value, starting with 0x7f (127, highest ASCII)
// Thus, [0x80, 0x81, , 0x83] is stored as [1, 1, r, 2]
// Truncation (length < 128) means that all remaining ones are mapped as identity (offset i => codepoint i), not unmapped
const encodings = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without documentation this is really not maintainable. At the very least, this needs some comments about what these numbers are, how they are determined, etc.

Copy link
Member Author

@ChALkeR ChALkeR Dec 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jasnell A test will cover that explanation, which is why the method to get those is exported
I'll add that test

It will also be able to reproduce those from the spec data (albeit without the common chunks extraction, but that is trivially maintainable e.g. by just omitting that)

Copy link
Member Author

@ChALkeR ChALkeR Dec 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just the tables for 0x80+ ranges from the spec, as e.g. https://encoding.spec.whatwg.org/index-iso-8859-6.txt, with two adjustments for compactness and ease-of-use:

  1. 0xfffd designates a hole (unmapped offset), those tables are non-continuous
  2. All other values are deltas from the previous seen entry, the first one is delta from 0x7f
    This is because values 0x00-0x7f are always mapped as identity and are not even present in spec tables

E.g. if the table in the spec says

0 0x0080
1 0x0081
5 0x0085
6 0x0086
7 0x0087

The vector is [1, 1, f, f, f, 4, 1, 1] (with f being 0xfffd)

I'll document that in the testcase/generator

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I understand what's happening here but really the comments/explanation needs to be in the source here. I don't think placing the explanation in the test is correct.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.
Also I removed all the common ranges logic to make this easier, size doesn't matter that much here

'__proto__': null,
'ibm866': [913, ...e(47), 8530, 1, 1, -145, 34, 61, 1, -12, -1, 14, -18, 6, 6, -1, -1, -75, 4, 32, -8, -16, -28, 60, 34, 1, -5, -6, 21, -3, -6, -16, 28, -5, 1, -4, 1, -12, -1, -6, 1, 24, -1, -82, -12, 124, -4, 8, 4, -16, -8512, ...e(15), -78, 80, -77, 80, -77, 80, -73, 80, -942, 8553, -8546, 8547, -260, -8306, 9468, -9472],
'iso-8859-10': [...e(33), 100, 14, 16, 8, -2, 14, -143, 148, -43, 80, 6, 23, -208, 189, -32, -154, 85, 14, 16, 8, -2, 14, -128, 133, -43, 80, 6, 23, 7831, -7850, -32, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 1, 1, 117, 7, -121, 1, 1, 1, 146, -144, 154, -152, ...e(5), 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 1, 1, 86, 7, -90, 1, 1, 1, 115, -113, 123, -121, 1, 1, 1, 1, 58],
'iso-8859-13': [...e(33), 8061, -8059, 1, 1, 8058, -8056, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, 1, 1, 1, 8041, -8039, 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 7835],
'iso-8859-14': [...e(33), 7522, 1, -7520, 103, 1, 7423, -7523, 7641, -7639, 7641, -119, 231, -7749, 1, 202, 7334, 1, -7423, 1, 7455, 1, -7563, 7584, 43, -42, 44, -35, 147, -111, 1, -36, -7585, ...e(15), 165, -163, ...e(5), 7572, -7570, ...e(5), 153, -151, ...e(16), 134, -132, ...e(5), 7541, -7539, ...e(5), 122],
'iso-8859-15': [...e(33), 1, 1, 1, 8201, -8199, 187, -185, 186, -184, ...e(10), 202, -200, 1, 1, 199, -197, 1, 1, 151, 1, 37],
'iso-8859-16': [...e(33), 100, 1, 60, 8043, -142, -7870, -185, 186, -184, 367, -365, 206, -204, 205, 1, -203, 1, 91, 54, 59, 7840, -8039, 1, 199, -113, 268, -350, 151, 1, 37, 4, -188, 1, 1, 64, -62, 66, -64, ...e(9), 65, 51, -113, 1, 1, 124, -122, 132, 22, -151, 1, 1, 1, 60, 258, -315, 1, 1, 1, 33, -31, 35, -33, ...e(9), 34, 51, -82, 1, 1, 93, -91, 101, 22, -120, 1, 1, 1, 29, 258],
'iso-8859-2': [...e(33), 100, 468, -407, -157, 153, 29, -179, 1, 184, -2, 6, 21, -204, 208, -2, -203, 85, 470, -409, -142, 138, 29, 364, -527, 169, -2, 6, 21, 355, -351, -2, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374],
'iso-8859-3': [...e(33), 134, 434, -565, 1, r, 128, -125, 1, 136, 46, -64, 22, -135, r, 206, -203, 119, -117, 1, 1, 1, 112, -110, 1, 121, 46, -64, 22, -120, r, 191, -188, 1, 1, r, 2, 70, -2, -65, ...e(8), r, 2, 1, 1, 1, 76, -74, 1, 69, -67, 1, 1, 1, 144, -16, -125, 1, 1, 1, r, 2, 39, -2, -34, ...e(8), r, 2, 1, 1, 1, 45, -43, 1, 38, -36, 1, 1, 1, 113, -16, 380],
'iso-8859-4': [...e(33), 100, 52, 30, -178, 132, 19, -148, 1, 184, -78, 16, 68, -185, 208, -206, 1, 85, 470, -388, -163, 117, 19, 395, -527, 169, -78, 16, 68, -29, 52, -51, -75, -63, ...e(5), 104, -34, -67, 79, -77, 75, -73, 1, 92, -26, 53, 7, -22, -98, 1, 1, 1, 1, 154, -152, 1, 1, 140, 2, -139, 34, -32, ...e(5), 73, -34, -36, 48, -46, 44, -42, 1, 61, -26, 53, 7, -22, -67, 1, 1, 1, 1, 123, -121, 1, 1, 109, 2, 366],
'iso-8859-5': [...e(33), 865, ...e(11), -863, 865, ...e(65), 7367, -7365, ...e(11), -949, 951, 1],
'iso-8859-6': [...e(33), r, r, r, 4, ...h(7), 1384, -1375, ...h(13), 1390, r, r, r, 4, r, 2, ...e(25), r, r, r, r, r, 6, ...e(18), ...h(13)],
'iso-8859-7': [...e(33), 8056, 1, -8054, 8201, 3, -8201, 1, 1, 1, 721, -719, 1, 1, r, 8040, -8037, 1, 1, 1, 721, 1, 1, -719, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r],
'iso-8859-8': [...e(33), r, 2, ...e(7), 46, -44, ...e(14), 62, -60, 1, 1, 1, ...h(32), 8025, -6727, ...e(26), r, r, 6692, 1, r],
'koi8-r': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 8450, ...e(14), -8544, 8545, ...e(10), -9411, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3],
'koi8-u': [9345, 2, 10, 4, 4, 4, 4, 8, 8, 8, 8, 68, 4, 4, 4, 4, 1, 1, 1, -627, 640, -903, 1, 46, 28, 1, -8645, 8833, -8817, 2, 5, 64, 9305, 1, 1, -8449, 3, 8448, -8446, 1, 8448, 1, 1, 1, 1, -8394, -51, 8448, 1, 1, 1, -8544, 3, 8543, -8541, 1, 8543, 1, 1, 1, 1, -8410, -130, -869, 933, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3, -28, -30, 1, 21, -18, 1, 15, -17, 18, -13, ...e(7), 16, -15, 1, 1, 1, -13, -4, 26, -1, -20, 17, 5, -4, -2, 3],
'macintosh': [69, 1, 2, 2, 8, 5, 6, 5, -1, 2, 2, -1, 2, 2, 2, -1, 2, 1, 2, -1, 2, 1, 2, 2, -1, 2, 2, -1, 5, -1, 2, 1, 7972, -8048, -14, 1, 4, 8059, -8044, 41, -49, -5, 8313, -8302, -12, 8632, -8602, 18, 8518, -8557, 8627, 1, -8640, 16, 8525, 15, -2, -7759, 7787, -8577, 16, 751, -707, 18, -57, -30, 11, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 32, 3, 18, 125, 1, 7872, 1, 8, 1, -5, 1, -7970, 9427, -9419, 121, 7884, 104, -115, 1, 56007, 1, -56033, -8042, 8035, 4, 18, -8046, 8, -9, 10, -3, 5, 1, 1, -3, 7, 1, 63531, -63533, 8, 1, -2, 88, 405, 22, -557, 553, 1, 1, -546, 549, -2, -20],
'windows-1250': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -7888, 7897, -7903, 10, 25, -4, -233, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8129, 7897, -7903, 10, 25, -4, -218, 551, 17, -407, -157, 96, -94, 1, 1, 1, 181, -179, 1, 1, 1, 205, -203, 1, 554, -409, -142, 1, 1, 1, 1, 77, 90, -164, 130, 416, -415, 62, -40, -147, 1, 64, -62, 117, -51, -63, 69, -67, 79, -77, 79, -77, 1, 64, 2, 51, 4, -116, 1, 124, -122, 1, 129, 22, -148, 150, -148, 1, 133, -131, 118, -116, 1, 33, -31, 86, -51, -32, 38, -36, 48, -46, 48, -46, 1, 33, 2, 51, 4, -85, 1, 93, -91, 1, 98, 22, -117, 119, -117, 1, 102, 374],
'windows-1251': [899, 1, 7191, -7111, 7115, 8, -6, 1, 139, -124, -7207, 7216, -7215, 2, -1, 4, 67, 7110, 1, 3, 1, 5, -15, 1, -8060, 8330, -7369, 7137, -7136, 2, -1, 4, -959, 878, 80, -86, -868, 1004, -1002, 1, 858, -856, 859, -857, 1, 1, 1, 857, -855, 1, 853, 80, 59, -988, 1, 1, 922, 7365, -7362, -921, 925, -83, 80, 2, -71, ...e(63)],
'windows-1252': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 240, -238, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 225, -6],
'windows-1253': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 1, 1, 1, 1, 741, 1, -739, 1, 1, 1, 1, 1, 1, r, 2, 1, 1, 1, 8039, -8037, 1, 1, 1, 721, -719, 1, 1, 721, 1, 1, -719, 721, -719, 721, ...e(19), r, 2, ...e(43), r],
'windows-1254': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -7888, 7897, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8129, 7897, -7911, -182, 1, 218, -216, ...e(47), 79, -77, ...e(11), 84, 46, -127, ...e(16), 48, -46, ...e(11), 53, 46],
'windows-1255': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -8109, 1, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -8094, ...e(7), 8199, -8197, 1, 1, 1, 1, 46, -44, ...e(14), 62, -60, 1, 1, 1, 1, 1265, ...e(19), 45, 1, 1, 1, 1, ...h(7), -36, ...e(26), r, r, 6692, 1, r],
'windows-1256': [8237, -6702, 6556, -7816, 7820, 8, -6, 1, -7515, 7530, -6583, 6592, -7911, 1332, 18, -16, 39, 6505, 1, 3, 1, 5, -15, 1, -6507, 6777, -6801, 6569, -7911, 7865, 1, -6483, -1562, 1388, -1386, ...e(7), 1557, -1555, ...e(14), 1378, -1376, 1, 1, 1, 1377, 162, -160, ...e(21), -1375, 1376, 1, 1, 1, 6, 1, 1, 1, -1379, 1380, -1378, 1379, 1, 1, 1, -1377, 1, 1, 1, 1, 1374, 1, -1372, 1, 1372, 1, 1, 1, -1370, 1371, 1, -1369, 1370, -1368, 1369, -1367, 1, 7954, 1, -6461],
'windows-1257': [8237, -8235, 8089, -8087, 8091, 8, -6, 1, -8089, 8104, -8102, 8111, -8109, 28, 543, -527, -40, 8072, 1, 3, 1, 5, -15, 1, -8060, 8330, -8328, 8096, -8094, 19, 556, -572, 1, r, 2, 1, 1, r, 2, 1, 49, -47, 173, -171, 1, 1, 1, 24, -22, ...e(5), 1, 1, 65, -63, 158, -156, 1, 1, 1, 40, 30, 42, -46, 6, -66, 1, 83, -6, -6, -67, 176, -99, 12, 20, -12, 17, 37, -29, 2, -114, 121, -119, 1, 1, 155, -49, 25, 16, -142, 159, 2, -158, 38, 42, -46, 6, -35, 1, 52, -6, -6, -36, 145, -99, 12, 20, -12, 17, 37, -29, 2, -83, 90, -88, 1, 1, 124, -49, 25, 16, -111, 128, 2, 347],
'windows-1258': [8237, -8235, 8089, -7816, 7820, 8, -6, 1, -7515, 7530, -8102, 8111, -7911, -197, 1, 1, 1, 8072, 1, 3, 1, 5, -15, 1, -7480, 7750, -8328, 8096, -7911, -182, 1, 218, -216, ...e(34), 64, -62, ...e(7), 565, -563, 1, 1, 65, -63, 568, -566, 1, 204, -202, 1, 1, 1, 1, 1, 1, 211, 340, -548, 1, 1, 1, 33, -31, ...e(7), 534, -532, 1, 1, 34, -32, 562, -560, 1, 173, -171, 1, 1, 1, 1, 1, 1, 180, 7931],
'windows-874': [8237, -8235, 1, 1, 1, 8098, -8096, ...e(10), 8072, 1, 3, 1, 5, -15, 1, -8060, ...e(8), 3425, ...e(57), r, r, r, r, 5, ...e(28), r, r, r, r],
'x-mac-cyrillic': [913, ...e(31), 7153, -8048, 992, -1005, 4, 8059, -8044, 848, -856, -5, 8313, -7456, 80, 7694, -7773, 80, 7627, -8557, 8627, 1, -7695, -929, 988, -137, -4, 80, -77, 80, -78, 80, -79, 80, -2, -83, -857, 8558, -8328, 8374, -66, -8539, 16, 8043, -8070, 875, 80, -79, 80, -7, 7102, 1, 8, 1, -5, 1, -7970, 7975, -7184, 80, -79, 80, 7351, -7445, 80, -2, -31, ...e(30), 7262],
};

/* eslint-enable @stylistic/js/max-len */

/* fallback/single-byte.js + single-byte.node.js, simplified */

const l256 = { __proto__: null, length: 256 };

function getEncoding(encoding) {
if (encoding === 'x-user-defined') {
// https://encoding.spec.whatwg.org/#x-user-defined-decoder, 14.5.1. x-user-defined decoder
return TypedArrayFrom(Uint16Array, l256, (_, i) => (i >= 0x80 ? 0xf700 + i : i));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Again, much faster to do the following:

Suggested change
return TypedArrayFrom(Uint16Array, l256, (_, i) => (i >= 0x80 ? 0xf700 + i : i));
const map = new Uint16Array(256);
for (let i = 0; i < 128; i++) {
map[i] = i;
}
for (let i = 128; i < 256; i++) {
map[i] = i + 0xf700;
}
return map;

Copy link
Member Author

@ChALkeR ChALkeR Dec 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are all single-time inits for arrs of length 256 and are cached, any perf difference there is insignificant and isn't work the extra LoCs

Actual improvement on this is #61118 which will remove this init code

}

if (!ObjectPrototypeHasOwnProperty(encodings, encoding)) {
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
}

const map = TypedArrayFrom(Uint16Array, l256, (_, i) => i); // Unicode subset
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: The callback variant of the arrayish .from methods is really quite slow. It's significantly quicker to create an empty typed array and fill the desired portion using a simple loop.

Suggested change
const map = TypedArrayFrom(Uint16Array, l256, (_, i) => i); // Unicode subset
const map = new Uint16Array(256);
// Populate the ASCII region
for (let i = 0; i < 128; i++) {
map[i] = i;
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let prev = 127;
map.set(TypedArrayFrom(Uint16Array, it(encodings[encoding]), (x) => (x === r ? x : (prev += x))), 128);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Again, a loop here is much faster than passing a callback to .from().

Suggested change
map.set(TypedArrayFrom(Uint16Array, it(encodings[encoding]), (x) => (x === r ? x : (prev += x))), 128);
const offsets = encodings[encoding];
for (let i = 128; i < 256; i++) {
const x = offsets[i];
map[i] = x === r ? r : (prev += x);
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return map;
}

const supported = new SafeSet(it(ObjectKeys(encodings))).add('iso-8859-8-i').add('x-user-defined');
const isSinglebyteEncoding = (enc) => supported.has(enc);

const decodersLoose = new SafeMap();
const decodersFatal = new SafeMap();

function createSinglebyteDecoder(encoding, fatal) {
const id = encoding === 'iso-8859-8-i' ? 'iso-8859-8' : encoding;
const decoders = fatal ? decodersFatal : decodersLoose;
const cached = decoders.get(id);
if (cached) return cached;

const map = getEncoding(id);
const incomplete = TypedArrayPrototypeIncludes(map, r);

// Expects type-checked Buffer input
const decoder = (buf) => {
if (buf.byteLength === 0) return '';
if (isAscii(buf)) return buf.latin1Slice(); // .latin1Slice is faster than .asciiSlice
const o = new Uint16Array(buf.length);
TypedArrayPrototypeSet(o, buf); // Copy to modify in-place, also those are 16-bit now

let i = 0;
for (const end7 = o.length - 7; i < end7; i += 8) {
o[i] = map[o[i]];
o[i + 1] = map[o[i + 1]];
o[i + 2] = map[o[i + 2]];
o[i + 3] = map[o[i + 3]];
o[i + 4] = map[o[i + 4]];
o[i + 5] = map[o[i + 5]];
o[i + 6] = map[o[i + 6]];
o[i + 7] = map[o[i + 7]];
}

for (const end = o.length; i < end; i++) o[i] = map[o[i]];

const b = new FastBuffer(o.buffer, o.byteOffset, o.byteLength);
if (isBigEndian) b.swap16();
const string = b.ucs2Slice();
if (fatal && incomplete && StringPrototypeIncludes(string, '\uFFFD')) {
throw new ERR_ENCODING_INVALID_ENCODED_DATA(encoding, undefined);
}
return string;
};

decoders.set(id, decoder);
return decoder;
}

module.exports = {
isSinglebyteEncoding,
createSinglebyteDecoder,
getEncoding, // for tests
};
Loading
Loading