From be13c81a0c629936002cf9dbed7a76bb3bd394a6 Mon Sep 17 00:00:00 2001
From: Ilya Tribusean <slash3b@gmail.com>
Date: Wed, 3 Dec 2025 23:25:27 +0200
Subject: [PATCH 1/4] fix mutable bytes

---
 utfbom.go | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/utfbom.go b/utfbom.go
index c5ab8d6..edde726 100644
--- a/utfbom.go
+++ b/utfbom.go
@@ -13,14 +13,7 @@ import (
 	"sync"
 )
 
-var (
-	_          io.Reader = (*Reader)(nil)
-	utf8BOM              = []byte{0xef, 0xbb, 0xbf}
-	utf16BEBOM           = []byte{0xfe, 0xff}
-	utf16LEBOM           = []byte{0xff, 0xfe}
-	utf32BEBOM           = []byte{0x00, 0x00, 0xfe, 0xff}
-	utf32LEBOM           = []byte{0xff, 0xfe, 0x00, 0x00}
-)
+var _ io.Reader = (*Reader)(nil)
 
 // ErrRead helps to trace error origin.
 var ErrRead = errors.New("utfbom: I/O error during BOM processing")
@@ -71,25 +64,25 @@ func DetectEncoding[T string | []byte](input T) Encoding {
 		return Unknown
 	}
 
-	if len(ibs) >= 3 && bytes.HasPrefix(ibs, utf8BOM) {
+	if len(ibs) >= 3 && bytes.HasPrefix(ibs, []byte{0xef, 0xbb, 0xbf}) {
 		return UTF8
 	}
 
 	if len(ibs) >= 4 {
-		if bytes.HasPrefix(ibs, utf32BEBOM) {
+		if bytes.HasPrefix(ibs, []byte{0x00, 0x00, 0xfe, 0xff}) {
 			return UTF32BigEndian
 		}
 
-		if bytes.HasPrefix(ibs, utf32LEBOM) {
+		if bytes.HasPrefix(ibs, []byte{0xff, 0xfe, 0x00, 0x00}) {
 			return UTF32LittleEndian
 		}
 	}
 
-	if bytes.HasPrefix(ibs, utf16BEBOM) {
+	if bytes.HasPrefix(ibs, []byte{0xfe, 0xff}) {
 		return UTF16BigEndian
 	}
 
-	if bytes.HasPrefix(ibs, utf16LEBOM) {
+	if bytes.HasPrefix(ibs, []byte{0xff, 0xfe}) {
 		return UTF16LittleEndian
 	}
 
@@ -146,15 +139,15 @@ func (e Encoding) Bytes() []byte {
 	default:
 		return nil
 	case UTF8:
-		return utf8BOM
+		return []byte{0xef, 0xbb, 0xbf}
 	case UTF16BigEndian:
-		return utf16BEBOM
+		return []byte{0xfe, 0xff}
 	case UTF16LittleEndian:
-		return utf16LEBOM
+		return []byte{0xff, 0xfe}
 	case UTF32BigEndian:
-		return utf32BEBOM
+		return []byte{0x00, 0x00, 0xfe, 0xff}
 	case UTF32LittleEndian:
-		return utf32LEBOM
+		return []byte{0xff, 0xfe, 0x00, 0x00}
 	}
 }
 

From 53b832268ab7342b871e52ae1d1fe9e3ad104f5b Mon Sep 17 00:00:00 2001
From: Ilya Tribusean <slash3b@gmail.com>
Date: Wed, 3 Dec 2025 23:33:52 +0200
Subject: [PATCH 2/4] test

---
 utfbom_test.go | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/utfbom_test.go b/utfbom_test.go
index 012a6e8..59e0f35 100644
--- a/utfbom_test.go
+++ b/utfbom_test.go
@@ -443,6 +443,34 @@ func TestEncoding_Bytes(t *testing.T) {
 	}
 }
 
+// TestBytes_NoAliasing checks that bytes returned by Bytes() are immutable.
+func TestBytes_NoAliasing(t *testing.T) {
+	t.Parallel()
+
+	encodings := []utfbom.Encoding{
+		utfbom.UTF8,
+		utfbom.UTF16BigEndian,
+		utfbom.UTF16LittleEndian,
+		utfbom.UTF32BigEndian,
+		utfbom.UTF32LittleEndian,
+	}
+
+	for _, enc := range encodings {
+		t.Run(enc.String(), func(t *testing.T) {
+			t.Parallel()
+
+			original := enc.Bytes()
+			originalCopy := make([]byte, len(original))
+			copy(originalCopy, original)
+
+			original[0] = 0x00
+
+			fresh := enc.Bytes()
+			be.Equal(t, fresh, originalCopy)
+		})
+	}
+}
+
 func TestPrepend(t *testing.T) {
 	t.Parallel()
 

From 92eb9d436eee04dc69f2ce1430cc2d394100ccf5 Mon Sep 17 00:00:00 2001
From: Ilya Tribusean <slash3b@gmail.com>
Date: Wed, 3 Dec 2025 23:51:53 +0200
Subject: [PATCH 3/4] tests and polishing

---
 utfbom.go      | 64 +++++++++++++++++++++++++----------------------
 utfbom_test.go | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 30 deletions(-)

diff --git a/utfbom.go b/utfbom.go
index edde726..e8ff577 100644
--- a/utfbom.go
+++ b/utfbom.go
@@ -47,48 +47,51 @@ const (
 	UTF32LittleEndian
 )
 
-// DetectEncoding inspects the initial bytes of a string or byte slice (T)
-// and returns the detected text encoding based on the presence of known BOMs (Byte Order Marks).
-// If no known BOM is found, it returns Unknown.
-//
-// Supported encodings:
-//   - UTF-8 (BOM: 0xef 0xbb 0xbf)
-//   - UTF-16 Big Endian (BOM: 0xfe 0xff)
-//   - UTF-16 Little Endian (BOM: 0xff 0xfe)
-//   - UTF-32 Big Endian (BOM: 0x00 0x00 0xfe 0xff)
-//   - UTF-32 Little Endian (BOM: 0xff 0xfe 0x00 0x00)
-func DetectEncoding[T string | []byte](input T) Encoding {
-	ibs := []byte(input)
-
-	if len(ibs) < 2 {
+// detectEncodingBytes is the internal implementation that works directly on []byte.
+func detectEncodingBytes(b []byte) Encoding {
+	if len(b) < 2 {
 		return Unknown
 	}
 
-	if len(ibs) >= 3 && bytes.HasPrefix(ibs, []byte{0xef, 0xbb, 0xbf}) {
+	if len(b) >= 3 && bytes.HasPrefix(b, []byte{0xef, 0xbb, 0xbf}) {
 		return UTF8
 	}
 
-	if len(ibs) >= 4 {
-		if bytes.HasPrefix(ibs, []byte{0x00, 0x00, 0xfe, 0xff}) {
+	if len(b) >= 4 {
+		if bytes.HasPrefix(b, []byte{0x00, 0x00, 0xfe, 0xff}) {
 			return UTF32BigEndian
 		}
 
-		if bytes.HasPrefix(ibs, []byte{0xff, 0xfe, 0x00, 0x00}) {
+		if bytes.HasPrefix(b, []byte{0xff, 0xfe, 0x00, 0x00}) {
 			return UTF32LittleEndian
 		}
 	}
 
-	if bytes.HasPrefix(ibs, []byte{0xfe, 0xff}) {
+	if bytes.HasPrefix(b, []byte{0xfe, 0xff}) {
 		return UTF16BigEndian
 	}
 
-	if bytes.HasPrefix(ibs, []byte{0xff, 0xfe}) {
+	if bytes.HasPrefix(b, []byte{0xff, 0xfe}) {
 		return UTF16LittleEndian
 	}
 
 	return Unknown
 }
 
+// DetectEncoding inspects the initial bytes of a string or byte slice (T)
+// and returns the detected text encoding based on the presence of known BOMs (Byte Order Marks).
+// If no known BOM is found, it returns Unknown.
+//
+// Supported encodings:
+//   - UTF-8 (BOM: 0xef 0xbb 0xbf)
+//   - UTF-16 Big Endian (BOM: 0xfe 0xff)
+//   - UTF-16 Little Endian (BOM: 0xff 0xfe)
+//   - UTF-32 Big Endian (BOM: 0x00 0x00 0xfe 0xff)
+//   - UTF-32 Little Endian (BOM: 0xff 0xfe 0x00 0x00)
+func DetectEncoding[T ~string | ~[]byte](input T) Encoding {
+	return detectEncodingBytes([]byte(input))
+}
+
 // AnyOf reports whether the Encoding value equals any of the given Encoding values.
 // It returns true if a match is found, otherwise false.
 func (e Encoding) AnyOf(es ...Encoding) bool {
@@ -101,7 +104,7 @@ func (e Encoding) AnyOf(es ...Encoding) bool {
 	return false
 }
 
-// Strings returns human-readable name of encoding.
+// String returns the human-readable name of the encoding.
 func (e Encoding) String() string {
 	switch e {
 	case UTF8:
@@ -151,11 +154,11 @@ func (e Encoding) Bytes() []byte {
 	}
 }
 
-// Trim removes the BOM prefix from the input `s` based on the encoding `enc`.
+// Trim removes the BOM prefix from the input.
 // Supports string or []byte inputs and returns the same type without the BOM.
-func Trim[T string | []byte](input T) (T, Encoding) {
+func Trim[T ~string | ~[]byte](input T) (T, Encoding) {
 	b := []byte(input)
-	enc := DetectEncoding(b)
+	enc := detectEncodingBytes(b)
 
 	if enc == Unknown {
 		return input, enc
@@ -167,14 +170,14 @@ func Trim[T string | []byte](input T) (T, Encoding) {
 // Prepend adds the corresponding Byte Order Mark (BOM) for a given encoding
 // to the beginning of a string or byte slice.
 // If the provided encoding is Unknown, the input is returned unmodified.
-func Prepend[T string | []byte](input T, enc Encoding) T {
+func Prepend[T ~string | ~[]byte](input T, enc Encoding) T {
 	if enc == Unknown {
 		return input
 	}
 
 	b := []byte(input)
 
-	if DetectEncoding(b) != Unknown {
+	if detectEncodingBytes(b) != Unknown {
 		return input
 	}
 
@@ -183,6 +186,8 @@ func Prepend[T string | []byte](input T, enc Encoding) T {
 
 // Reader implements automatic BOM (Unicode Byte Order Mark) checking and
 // removing as necessary for an io.Reader object.
+//
+// Reader is not safe for concurrent use.
 type Reader struct {
 	rd   *bufio.Reader
 	once sync.Once
@@ -191,6 +196,7 @@ type Reader struct {
 }
 
 // NewReader wraps an incoming reader.
+// Passing a nil reader will cause a panic on the first Read call.
 func NewReader(rd io.Reader) *Reader {
 	return &Reader{
 		rd:   bufio.NewReader(rd),
@@ -200,10 +206,8 @@ func NewReader(rd io.Reader) *Reader {
 }
 
 // Read implements the io.Reader interface.
-// On the first read call, it reads from the underlying Reader, detects and removes any Byte Order Mark (BOM).
-// Subsequent calls delegate directly to the underlying Reader without BOM handling.
-// Read is only safe for concurrent use during the first call due to sync.Once; after that, thread-safety
-// depends on the underlying Reader. It is best to assume unsafe concurrent use.
+// On the first call, it detects and removes any Byte Order Mark (BOM).
+// Subsequent calls delegate directly to the underlying Reader.
 func (r *Reader) Read(buf []byte) (int, error) {
 	const maxBOMLen = 4
 
diff --git a/utfbom_test.go b/utfbom_test.go
index 59e0f35..1d0e669 100644
--- a/utfbom_test.go
+++ b/utfbom_test.go
@@ -515,3 +515,71 @@ func TestPrepend(t *testing.T) {
 		}
 	})
 }
+
+type CustomString string
+
+type CustomBytes []byte
+
+func TestDetectEncoding_TypeAliases(t *testing.T) {
+	t.Parallel()
+
+	t.Run("custom_string", func(t *testing.T) {
+		input := CustomString("\ufeffhello")
+		enc := utfbom.DetectEncoding(input)
+		be.Equal(t, enc, utfbom.UTF8)
+	})
+
+	t.Run("custom_bytes", func(t *testing.T) {
+		input := CustomBytes([]byte{0xfe, 0xff, 'h', 'i'})
+		enc := utfbom.DetectEncoding(input)
+		be.Equal(t, enc, utfbom.UTF16BigEndian)
+	})
+}
+
+func TestTrim_TypeAliases(t *testing.T) {
+	t.Parallel()
+
+	t.Run("custom_string", func(t *testing.T) {
+		input := CustomString("\ufeffhello")
+		out, enc := utfbom.Trim(input)
+		be.Equal(t, enc, utfbom.UTF8)
+		be.Equal(t, out, CustomString("hello"))
+	})
+
+	t.Run("custom_bytes", func(t *testing.T) {
+		input := CustomBytes([]byte{0xfe, 0xff, 'h', 'i'})
+		out, enc := utfbom.Trim(input)
+		be.Equal(t, enc, utfbom.UTF16BigEndian)
+		be.Equal(t, out, CustomBytes([]byte{'h', 'i'}))
+	})
+}
+
+func TestPrepend_TypeAliases(t *testing.T) {
+	t.Parallel()
+
+	t.Run("custom_string", func(t *testing.T) {
+		input := CustomString("hello")
+		out := utfbom.Prepend(input, utfbom.UTF8)
+		be.Equal(t, out, CustomString("\ufeffhello"))
+	})
+
+	t.Run("custom_bytes", func(t *testing.T) {
+		input := CustomBytes([]byte{'h', 'i'})
+		out := utfbom.Prepend(input, utfbom.UTF16BigEndian)
+		be.Equal(t, out, CustomBytes([]byte{0xfe, 0xff, 'h', 'i'}))
+	})
+}
+
+func TestNewReader_NilPanics(t *testing.T) {
+	t.Parallel()
+
+	rd := utfbom.NewReader(nil)
+
+	defer func() {
+		r := recover()
+		be.True(t, r != nil)
+	}()
+
+	buf := make([]byte, 10)
+	_, _ = rd.Read(buf)
+}

From a0170f75d0b40918a6d6d2866406196831e1117b Mon Sep 17 00:00:00 2001
From: Ilya Tribusean <slash3b@gmail.com>
Date: Wed, 3 Dec 2025 23:58:38 +0200
Subject: [PATCH 4/4] do not like private method for bytes detection, revert to
 public one

---
 utfbom.go | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/utfbom.go b/utfbom.go
index e8ff577..9ad9b42 100644
--- a/utfbom.go
+++ b/utfbom.go
@@ -47,8 +47,19 @@ const (
 	UTF32LittleEndian
 )
 
-// detectEncodingBytes is the internal implementation that works directly on []byte.
-func detectEncodingBytes(b []byte) Encoding {
+// DetectEncoding inspects the initial bytes of a string or byte slice (T)
+// and returns the detected text encoding based on the presence of known BOMs (Byte Order Marks).
+// If no known BOM is found, it returns Unknown.
+//
+// Supported encodings:
+//   - UTF-8 (BOM: 0xef 0xbb 0xbf)
+//   - UTF-16 Big Endian (BOM: 0xfe 0xff)
+//   - UTF-16 Little Endian (BOM: 0xff 0xfe)
+//   - UTF-32 Big Endian (BOM: 0x00 0x00 0xfe 0xff)
+//   - UTF-32 Little Endian (BOM: 0xff 0xfe 0x00 0x00)
+func DetectEncoding[T ~string | ~[]byte](input T) Encoding {
+	b := []byte(input)
+
 	if len(b) < 2 {
 		return Unknown
 	}
@@ -78,20 +89,6 @@ func detectEncodingBytes(b []byte) Encoding {
 	return Unknown
 }
 
-// DetectEncoding inspects the initial bytes of a string or byte slice (T)
-// and returns the detected text encoding based on the presence of known BOMs (Byte Order Marks).
-// If no known BOM is found, it returns Unknown.
-//
-// Supported encodings:
-//   - UTF-8 (BOM: 0xef 0xbb 0xbf)
-//   - UTF-16 Big Endian (BOM: 0xfe 0xff)
-//   - UTF-16 Little Endian (BOM: 0xff 0xfe)
-//   - UTF-32 Big Endian (BOM: 0x00 0x00 0xfe 0xff)
-//   - UTF-32 Little Endian (BOM: 0xff 0xfe 0x00 0x00)
-func DetectEncoding[T ~string | ~[]byte](input T) Encoding {
-	return detectEncodingBytes([]byte(input))
-}
-
 // AnyOf reports whether the Encoding value equals any of the given Encoding values.
 // It returns true if a match is found, otherwise false.
 func (e Encoding) AnyOf(es ...Encoding) bool {
@@ -158,7 +155,7 @@ func (e Encoding) Bytes() []byte {
 // Supports string or []byte inputs and returns the same type without the BOM.
 func Trim[T ~string | ~[]byte](input T) (T, Encoding) {
 	b := []byte(input)
-	enc := detectEncodingBytes(b)
+	enc := DetectEncoding(b)
 
 	if enc == Unknown {
 		return input, enc
@@ -177,7 +174,7 @@ func Prepend[T ~string | ~[]byte](input T, enc Encoding) T {
 
 	b := []byte(input)
 
-	if detectEncodingBytes(b) != Unknown {
+	if DetectEncoding(b) != Unknown {
 		return input
 	}