From 866781fff6751c8d2964b6f395ecd998f51de03f Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Fri, 19 Dec 2025 01:32:42 +0400 Subject: [PATCH 1/2] src: improve StringBytes::Encode perf on ASCII --- src/encoding_binding.cc | 24 +++++++++++++++++------- src/string_bytes.cc | 8 ++++++++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index f68dd9522a0f69..01f10de535bcca 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -356,7 +356,24 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo& args) { const char* data = buffer.data(); size_t length = buffer.length(); + if (!ignore_bom && length >= 3) { + if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) { + data += 3; + length -= 3; + } + } + if (has_fatal) { + // Are we perhaps ASCII? Then we won't have to check for UTF-8 + if (simdutf::validate_ascii(data, length)) { + Local ret; + if (StringBytes::Encode(env->isolate(), data, length, LATIN1) + .ToLocal(&ret)) { + args.GetReturnValue().Set(ret); + } + return; + } + auto result = simdutf::validate_utf8_with_errors(data, length); if (result.error) { @@ -365,13 +382,6 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo& args) { } } - if (!ignore_bom && length >= 3) { - if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) { - data += 3; - length -= 3; - } - } - if (length == 0) return args.GetReturnValue().SetEmptyString(); Local ret; diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 03b5fd7ebe3816..550b1fa5aeb898 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -531,6 +531,14 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, case UTF8: { buflen = keep_buflen_in_range(buflen); + + // ASCII fast path + // TODO(chalker): remove when String::NewFromUtf8 is fast enough itself + // This is cheap compared to the benefits though + if (simdutf::validate_ascii(buf, buflen)) { + return ExternOneByteString::NewFromCopy(isolate, buf, buflen); + } + val = String::NewFromUtf8(isolate, buf, v8::NewStringType::kNormal, buflen); Local str; From 42b4ff4a114e6a1d8695aef26e439fac92c16284 Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Fri, 19 Dec 2025 13:30:57 +0400 Subject: [PATCH 2/2] src: use validate_ascii_with_errors --- src/encoding_binding.cc | 2 +- src/string_bytes.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index 01f10de535bcca..03b2848cb7b948 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -365,7 +365,7 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo& args) { if (has_fatal) { // Are we perhaps ASCII? Then we won't have to check for UTF-8 - if (simdutf::validate_ascii(data, length)) { + if (!simdutf::validate_ascii_with_errors(data, length).error) { Local ret; if (StringBytes::Encode(env->isolate(), data, length, LATIN1) .ToLocal(&ret)) { diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 550b1fa5aeb898..8f6bedd63e25b3 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -535,7 +535,7 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, // ASCII fast path // TODO(chalker): remove when String::NewFromUtf8 is fast enough itself // This is cheap compared to the benefits though - if (simdutf::validate_ascii(buf, buflen)) { + if (!simdutf::validate_ascii_with_errors(buf, buflen).error) { return ExternOneByteString::NewFromCopy(isolate, buf, buflen); }