diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index f68dd9522a0f69..1361c41a5491e1 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -356,20 +356,32 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo& args) { const char* data = buffer.data(); size_t length = buffer.length(); + if (!ignore_bom && length >= 3) { + if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) { + data += 3; + length -= 3; + } + } + if (has_fatal) { + // Are we perhaps ASCII? Then we won't have to check for UTF-8 + if (!simdutf::validate_ascii_with_errors(data, length).error) { + Local ret; + if (StringBytes::Encode(env->isolate(), data, length, LATIN1) + .ToLocal(&ret)) { + args.GetReturnValue().Set(ret); + } + return; + } + auto result = simdutf::validate_utf8_with_errors(data, length); if (result.error) { return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( env->isolate(), "The encoded data was not valid for encoding utf-8"); } - } - if (!ignore_bom && length >= 3) { - if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) { - data += 3; - length -= 3; - } + // TODO(chalker): save on utf8 validity recheck in StringBytes::Encode() } if (length == 0) return args.GetReturnValue().SetEmptyString(); diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 03b5fd7ebe3816..c1a446b79fda3a 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -531,6 +531,24 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, case UTF8: { buflen = keep_buflen_in_range(buflen); + + // ASCII fast path + // TODO(chalker): remove when String::NewFromUtf8 is fast enough itself + // This is cheap compared to the benefits though + if (!simdutf::validate_ascii_with_errors(buf, buflen).error) { + return ExternOneByteString::NewFromCopy(isolate, buf, buflen); + } + + if (simdutf::validate_utf8(buf, buflen)) { + // We know that we are non-ASCII (and are unlikely Latin1), use 2-byte + // In the most likely case of valid UTF-8, we can use this fast impl + size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen); + uint16_t* dst = node::UncheckedMalloc(u16size); + size_t utf16len = simdutf::convert_valid_utf8_to_utf16( + buf, buflen, reinterpret_cast(dst)); + return ExternTwoByteString::New(isolate, dst, utf16len); + } + val = String::NewFromUtf8(isolate, buf, v8::NewStringType::kNormal, buflen); Local str;