From f23eac2a7209cbab040e0ba06fc7e842bc00a1db Mon Sep 17 00:00:00 2001 From: Ilya Tribusean Date: Sun, 15 Feb 2026 17:27:40 +0200 Subject: [PATCH 1/5] do not copy whole input and misc --- utfbom.go | 14 +++++++++----- utfbom_test.go | 40 +++++++++++----------------------------- 2 files changed, 20 insertions(+), 34 deletions(-) diff --git a/utfbom.go b/utfbom.go index 1562960..28cac2b 100644 --- a/utfbom.go +++ b/utfbom.go @@ -19,6 +19,9 @@ var _ io.Reader = (*Reader)(nil) // ErrRead helps to trace error origin. var ErrRead = errors.New("utfbom: I/O error during BOM processing") +const maxBOMLen = 4 + + // Encoding is a character encoding standard. type Encoding int @@ -59,6 +62,10 @@ const ( // - UTF-32 Big Endian (BOM: 0x00 0x00 0xfe 0xff) // - UTF-32 Little Endian (BOM: 0xff 0xfe 0x00 0x00) func DetectEncoding[T ~string | ~[]byte](input T) Encoding { + if len(input) > maxBOMLen { + input = input[:maxBOMLen] + } + b := []byte(input) if len(b) < 2 { @@ -149,8 +156,7 @@ func (e Encoding) Bytes() []byte { // Trim removes the BOM prefix from the input. // Supports string or []byte inputs and returns the same type without the BOM. func Trim[T ~string | ~[]byte](input T) (T, Encoding) { - b := []byte(input) - enc := DetectEncoding(b) + enc := DetectEncoding(input) if enc == Unknown { return input, enc @@ -161,7 +167,7 @@ func Trim[T ~string | ~[]byte](input T) (T, Encoding) { // Prepend adds the corresponding Byte Order Mark (BOM) for a given encoding // to the beginning of a string or byte slice. -// If the provided encoding is Unknown, the input is returned unmodified. +// The input is returned unmodified if enc is Unknown or if the input already has any BOM. func Prepend[T ~string | ~[]byte](input T, enc Encoding) T { if enc == Unknown { return input @@ -201,8 +207,6 @@ func NewReader(rd io.Reader) *Reader { // On the first call, it detects and removes any Byte Order Mark (BOM). // Subsequent calls delegate directly to the underlying Reader. func (r *Reader) Read(buf []byte) (int, error) { - const maxBOMLen = 4 - if len(buf) == 0 { return 0, nil } diff --git a/utfbom_test.go b/utfbom_test.go index a89e3e6..6d29146 100644 --- a/utfbom_test.go +++ b/utfbom_test.go @@ -218,41 +218,23 @@ func TestEncoding_Len(t *testing.T) { t.Parallel() testCases := []struct { + name string enc utfbom.Encoding expected int }{ - { - enc: utfbom.Unknown, - expected: 0, - }, - { - enc: utfbom.UTF8, - expected: 3, - }, - { - enc: utfbom.UTF16BigEndian, - expected: 2, - }, - { - enc: utfbom.UTF16LittleEndian, - expected: 2, - }, - { - enc: utfbom.UTF32BigEndian, - expected: 4, - }, - { - enc: utfbom.UTF32LittleEndian, - expected: 4, - }, - { - enc: 999, - expected: 0, - }, + {"Unknown", utfbom.Unknown, 0}, + {"UTF8", utfbom.UTF8, 3}, + {"UTF16BigEndian", utfbom.UTF16BigEndian, 2}, + {"UTF16LittleEndian", utfbom.UTF16LittleEndian, 2}, + {"UTF32BigEndian", utfbom.UTF32BigEndian, 4}, + {"UTF32LittleEndian", utfbom.UTF32LittleEndian, 4}, + {"InvalidEncoding", 999, 0}, } for _, tc := range testCases { - be.Equal(t, tc.enc.Len(), tc.expected) + t.Run(tc.name, func(t *testing.T) { + be.Equal(t, tc.enc.Len(), tc.expected) + }) } } From 5b6673eef82558de52849de50e6f808beb7d6423 Mon Sep 17 00:00:00 2001 From: Ilya Tribusean Date: Sun, 15 Feb 2026 17:35:56 +0200 Subject: [PATCH 2/5] test fix --- utfbom.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/utfbom.go b/utfbom.go index 28cac2b..84f8034 100644 --- a/utfbom.go +++ b/utfbom.go @@ -21,7 +21,6 @@ var ErrRead = errors.New("utfbom: I/O error during BOM processing") const maxBOMLen = 4 - // Encoding is a character encoding standard. type Encoding int @@ -62,9 +61,9 @@ const ( // - UTF-32 Big Endian (BOM: 0x00 0x00 0xfe 0xff) // - UTF-32 Little Endian (BOM: 0xff 0xfe 0x00 0x00) func DetectEncoding[T ~string | ~[]byte](input T) Encoding { - if len(input) > maxBOMLen { - input = input[:maxBOMLen] - } + if len(input) > maxBOMLen { + input = input[:maxBOMLen] + } b := []byte(input) @@ -162,7 +161,7 @@ func Trim[T ~string | ~[]byte](input T) (T, Encoding) { return input, enc } - return T(b[enc.Len():]), enc + return input[enc.Len():], enc } // Prepend adds the corresponding Byte Order Mark (BOM) for a given encoding From 3bc099bfcc957430f91839c2e1c648061cae1693 Mon Sep 17 00:00:00 2001 From: Ilya Tribusean Date: Sun, 15 Feb 2026 17:38:56 +0200 Subject: [PATCH 3/5] duplicated test removed --- utfbom_test.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/utfbom_test.go b/utfbom_test.go index 6d29146..32626c7 100644 --- a/utfbom_test.go +++ b/utfbom_test.go @@ -295,16 +295,6 @@ func TestReader_StringWithoutBOM(t *testing.T) { be.Err(t, iotest.TestReader(rd, []byte(nobomstring)), nil) } -func TestReader_UsualReader(t *testing.T) { - t.Parallel() - - bomPrefixedStringReader := strings.NewReader(teststring) - - rd := utfbom.NewReader(bomPrefixedStringReader) - - be.Err(t, iotest.TestReader(rd, []byte(teststring[3:])), nil) -} - func TestReader_OneByteReader(t *testing.T) { t.Parallel() From 3254d9b9359cf4bb8c5668d21ebb49564eea0963 Mon Sep 17 00:00:00 2001 From: Ilya Tribusean Date: Sun, 15 Feb 2026 17:50:47 +0200 Subject: [PATCH 4/5] misc --- utfbom.go | 4 ++++ utfbom_test.go | 14 ++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/utfbom.go b/utfbom.go index 84f8034..e651210 100644 --- a/utfbom.go +++ b/utfbom.go @@ -172,6 +172,10 @@ func Prepend[T ~string | ~[]byte](input T, enc Encoding) T { return input } + if len(input) > maxBOMLen { + input = input[:maxBOMLen] + } + b := []byte(input) if DetectEncoding(b) != Unknown { diff --git a/utfbom_test.go b/utfbom_test.go index 32626c7..15b135a 100644 --- a/utfbom_test.go +++ b/utfbom_test.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/csv" "encoding/hex" + "errors" "fmt" "io" "strings" @@ -542,6 +543,19 @@ func TestPrepend_TypeAliases(t *testing.T) { }) } +// TestReader_UnderlyingReaderError verifies that when the underlying reader +// returns a non-EOF error during BOM detection, it is wrapped with ErrRead. +func TestReader_UnderlyingReaderError(t *testing.T) { + t.Parallel() + + rd := utfbom.NewReader(iotest.ErrReader(errors.New("disk failure"))) + + buf := make([]byte, 10) + n, err := rd.Read(buf) + be.Equal(t, 0, n) + be.True(t, errors.Is(err, utfbom.ErrRead)) +} + func TestNewReader_NilPanics(t *testing.T) { t.Parallel() From 91935ce574638f109490c3f9159705dbeed6d9e8 Mon Sep 17 00:00:00 2001 From: Ilya Tribusean Date: Sun, 15 Feb 2026 17:59:02 +0200 Subject: [PATCH 5/5] me breaki, me fixi --- utfbom.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/utfbom.go b/utfbom.go index e651210..fadb4eb 100644 --- a/utfbom.go +++ b/utfbom.go @@ -172,17 +172,11 @@ func Prepend[T ~string | ~[]byte](input T, enc Encoding) T { return input } - if len(input) > maxBOMLen { - input = input[:maxBOMLen] - } - - b := []byte(input) - - if DetectEncoding(b) != Unknown { + if DetectEncoding(input) != Unknown { return input } - return T(append(enc.Bytes(), b...)) + return T(append(enc.Bytes(), []byte(input)...)) } // Reader implements automatic BOM (Unicode Byte Order Mark) checking and