diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index f68dd9522a0f69..03b2848cb7b948 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -356,7 +356,24 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo& args) { const char* data = buffer.data(); size_t length = buffer.length(); + if (!ignore_bom && length >= 3) { + if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) { + data += 3; + length -= 3; + } + } + if (has_fatal) { + // Are we perhaps ASCII? Then we won't have to check for UTF-8 + if (!simdutf::validate_ascii_with_errors(data, length).error) { + Local ret; + if (StringBytes::Encode(env->isolate(), data, length, LATIN1) + .ToLocal(&ret)) { + args.GetReturnValue().Set(ret); + } + return; + } + auto result = simdutf::validate_utf8_with_errors(data, length); if (result.error) { @@ -365,13 +382,6 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo& args) { } } - if (!ignore_bom && length >= 3) { - if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) { - data += 3; - length -= 3; - } - } - if (length == 0) return args.GetReturnValue().SetEmptyString(); Local ret; diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 03b5fd7ebe3816..8f6bedd63e25b3 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -531,6 +531,14 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, case UTF8: { buflen = keep_buflen_in_range(buflen); + + // ASCII fast path + // TODO(chalker): remove when String::NewFromUtf8 is fast enough itself + // This is cheap compared to the benefits though + if (!simdutf::validate_ascii_with_errors(buf, buflen).error) { + return ExternOneByteString::NewFromCopy(isolate, buf, buflen); + } + val = String::NewFromUtf8(isolate, buf, v8::NewStringType::kNormal, buflen); Local str;