@@ -461,11 +461,11 @@ inline std::wstring Utf8ToWString(const std::string& str) {
461461 // Optimized UTF-8 to UTF-32 conversion (wstring on Unix)
462462 if (str.empty ())
463463 return {};
464-
464+
465465 // Lambda to decode UTF-8 multi-byte sequences
466466 constexpr auto decodeUtf8 = [](const unsigned char * data, size_t & i, size_t len) -> wchar_t {
467467 unsigned char byte = data[i];
468-
468+
469469 // 1-byte sequence (ASCII): 0xxxxxxx
470470 if (byte <= 0x7F ) {
471471 ++i;
@@ -480,46 +480,44 @@ inline std::wstring Utf8ToWString(const std::string& str) {
480480 // 3-byte sequence: 1110xxxx 10xxxxxx 10xxxxxx
481481 if ((byte & 0xF0 ) == 0xE0 && i + 2 < len) {
482482 uint32_t cp = ((static_cast <uint32_t >(byte & 0x0F ) << 12 ) |
483- ((data[i + 1 ] & 0x3F ) << 6 ) |
484- (data[i + 2 ] & 0x3F ));
483+ ((data[i + 1 ] & 0x3F ) << 6 ) | (data[i + 2 ] & 0x3F ));
485484 i += 3 ;
486485 return static_cast <wchar_t >(cp);
487486 }
488487 // 4-byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
489488 if ((byte & 0xF8 ) == 0xF0 && i + 3 < len) {
490- uint32_t cp = ((static_cast <uint32_t >(byte & 0x07 ) << 18 ) |
491- ((data[i + 1 ] & 0x3F ) << 12 ) |
492- ((data[i + 2 ] & 0x3F ) << 6 ) |
493- (data[i + 3 ] & 0x3F ));
489+ uint32_t cp =
490+ ((static_cast <uint32_t >(byte & 0x07 ) << 18 ) | ((data[i + 1 ] & 0x3F ) << 12 ) |
491+ ((data[i + 2 ] & 0x3F ) << 6 ) | (data[i + 3 ] & 0x3F ));
494492 i += 4 ;
495493 return static_cast <wchar_t >(cp);
496494 }
497495 // Invalid sequence - skip byte
498496 ++i;
499497 return 0xFFFD ; // Unicode replacement character
500498 };
501-
499+
502500 std::wstring result;
503501 result.reserve (str.size ()); // Reserve assuming mostly ASCII
504-
502+
505503 const unsigned char * data = reinterpret_cast <const unsigned char *>(str.data ());
506504 const size_t len = str.size ();
507505 size_t i = 0 ;
508-
506+
509507 // Fast path for ASCII-only prefix (most common case)
510508 while (i < len && data[i] <= 0x7F ) {
511509 result.push_back (static_cast <wchar_t >(data[i]));
512510 ++i;
513511 }
514-
512+
515513 // Handle remaining multi-byte sequences
516514 while (i < len) {
517515 wchar_t wc = decodeUtf8 (data, i, len);
518516 if (wc != 0xFFFD || data[i - 1 ] >= 0x80 ) { // Skip invalid sequences
519517 result.push_back (wc);
520518 }
521519 }
522-
520+
523521 return result;
524522#endif
525523}
0 commit comments