diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index fe38175..1edd815 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,11 +13,11 @@ jobs: strategy: matrix: go: - - "1.13" - - "1.14" - - "1.15" - - "1.16" - - "1.17" + - "1.19" + - "1.20" + - "1.21" + - "1.22" + - "1.23" steps: - uses: actions/checkout@v2 diff --git a/api.go b/api.go index 2c8eba6..6272df7 100644 --- a/api.go +++ b/api.go @@ -85,6 +85,32 @@ type SafeFloat = i.SafeFloat // SafeRune aliases rune. See the explanation for SafeString. type SafeRune = i.SafeRune +// HashValue is a marker interface to be implemented by types whose +// values should be hashed when redacted, instead of being replaced +// with the redaction marker. +type HashValue = i.HashValue + +// HashString represents a string that should be hashed when redacted. +type HashString = i.HashString + +// HashInt represents an integer that should be hashed when redacted. +type HashInt = i.HashInt + +// HashUint represents an unsigned integer that should be hashed when redacted. +type HashUint = i.HashUint + +// HashFloat represents a floating-point value that should be hashed when redacted. +type HashFloat = i.HashFloat + +// HashRune represents a rune that should be hashed when redacted. +type HashRune = i.HashRune + +// HashByte represents a byte that should be hashed when redacted. +type HashByte = i.HashByte + +// HashBytes represents a byte slice that should be hashed when redacted. +type HashBytes = i.HashBytes + // RedactableString is a string that contains a mix of safe and unsafe // bits of data, but where it is known that unsafe bits are enclosed // by redaction markers ‹ and ›, and occurrences of the markers @@ -173,3 +199,17 @@ type StringBuilder = builder.StringBuilder func RegisterSafeType(t reflect.Type) { ifmt.RegisterSafeType(t) } + +// EnableHashing enables hash-based redaction with an optional salt. +// Hash markers (‹†value›) will be replaced with hashes instead of being fully redacted. +// When salt is nil, hash markers use plain SHA-256. +// When salt is provided, hash markers use HMAC-SHA256 for better security. +func EnableHashing(salt []byte) { + m.EnableHashing(salt) +} + +// DisableHashing disables hash-based redaction. +// Hash markers will be fully redacted instead of being replaced with hashes. +func DisableHashing() { + m.DisableHashing() +} diff --git a/bench_test.go b/bench_test.go index d9af5ec..ebf8a0d 100644 --- a/bench_test.go +++ b/bench_test.go @@ -24,3 +24,37 @@ func BenchmarkRedact(b *testing.B) { _ = Sprintf("hello %v %v %v", 123, x, Safe("world"), Unsafe("universe")) } } + +// BenchmarkRedactCall_PlainMarkers calls .Redact() on a string with only +// regular ‹...› markers (no hash markers). This is the baseline. +func BenchmarkRedactCall_RegularRedaction(b *testing.B) { + s := Sprintf("user=%s action=%s", "alice", Safe("login")) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = s.Redact() + } +} + +// BenchmarkRedactCall_HashMarkers calls .Redact() on a string with ‹†...› +// hash markers and hashing enabled. This exercises the SHA-256 path. +func BenchmarkRedactCall_HashEnabled(b *testing.B) { + EnableHashing(nil) + defer DisableHashing() + s := Sprintf("user=%s action=%s", HashString("alice"), SafeString("login")) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = s.Redact() + } +} + +// BenchmarkRedactCall_SingleHashWithSalt — 1 ‹†alice› marker, HMAC-SHA256. +func BenchmarkRedactCall_HashWithSalt(b *testing.B) { + EnableHashing([]byte("my-secret-salt")) + defer DisableHashing() + s := Sprintf("user=%s action=%s", HashString("alice"), Safe("login")) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = s.Redact() + } +} + diff --git a/go.mod b/go.mod index cadc968..b51f1b0 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ module github.com/cockroachdb/redact -go 1.14 +go 1.19 diff --git a/hash_redact_test.go b/hash_redact_test.go new file mode 100644 index 0000000..c361b50 --- /dev/null +++ b/hash_redact_test.go @@ -0,0 +1,405 @@ +// Copyright 2026 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package redact + +import ( + "reflect" + "strings" + "sync" + "testing" +) + +func TestHashRedact_EndToEnd(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("user=%s", HashString("alice")) + if expected := RedactableString("user=‹†alice›"); s != expected { + t.Errorf("expected %q, got %q", expected, s) + } + + redacted := s.Redact() + if expected := RedactableString("user=‹2bd806c9›"); redacted != expected { + t.Errorf("expected %q, got %q", expected, redacted) + } +} + +func TestHashRedact_DisabledHashing(t *testing.T) { + s := Sprintf("user=%s", HashString("alice")) + + redacted := s.Redact() + expected := RedactableString("user=‹×›") + if redacted != expected { + t.Errorf("expected %q, got %q", expected, redacted) + } +} + +func TestHashRedact_MixedMarkers(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("user=%s action=%s", HashString("alice"), "login") + redacted := s.Redact() + + expected := RedactableString("user=‹2bd806c9› action=‹×›") + if redacted != expected { + t.Errorf("expected %q, got %q", expected, redacted) + } +} + +func TestHashRedact_FormatVerbs(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + testData := []struct { + name string + format RedactableString + expectedPre RedactableString + expectedPost RedactableString + }{ + {"padded int", Sprintf("%05d", HashInt(42)), "‹†00042›", "‹be9c43d5›"}, + {"hex int", Sprintf("%x", HashInt(255)), "‹†ff›", "‹05a9bf22›"}, + {"quoted string", Sprintf("%q", HashString("hello")), "‹†\"hello\"›", "‹5aa762ae›"}, + {"default int", Sprintf("%d", HashInt(123)), "‹†123›", "‹a665a459›"}, + {"float", Sprintf("%f", HashFloat(3.14)), "‹†3.140000›", "‹f0f4a726›"}, + {"float precision", Sprintf("%.2f", HashFloat(3.14159)), "‹†3.14›", "‹2efff126›"}, + } + + for _, tc := range testData { + t.Run(tc.name, func(t *testing.T) { + if tc.format != tc.expectedPre { + t.Errorf("pre-redaction: expected %q, got %q", tc.expectedPre, tc.format) + } + if redacted := tc.format.Redact(); redacted != tc.expectedPost { + t.Errorf("post-redaction: expected %q, got %q", tc.expectedPost, redacted) + } + }) + } +} + +func TestHashRedact_SafeWrapping(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("user=%s", Safe(HashString("alice"))) + expected := RedactableString("user=alice") + if s != expected { + t.Errorf("expected %q, got %q", expected, s) + } +} + +func TestHashRedact_UnsafeWrapping(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("user=%s", Unsafe(HashString("alice"))) + if expected := RedactableString("user=‹alice›"); s != expected { + t.Errorf("pre-redaction: expected %q, got %q", expected, s) + } + + if redacted := s.Redact(); redacted != RedactableString("user=‹×›") { + t.Errorf("post-redaction: expected %q, got %q", RedactableString("user=‹×›"), redacted) + } +} + +func TestHashRedact_AllHashTypes(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + testData := []struct { + name string + format RedactableString + expectedPre RedactableString + expectedPost RedactableString + }{ + {"HashString", Sprintf("%s", HashString("test")), "‹†test›", "‹9f86d081›"}, + {"HashInt", Sprintf("%d", HashInt(42)), "‹†42›", "‹73475cb4›"}, + {"HashUint", Sprintf("%d", HashUint(42)), "‹†42›", "‹73475cb4›"}, + {"HashFloat", Sprintf("%f", HashFloat(3.14)), "‹†3.140000›", "‹f0f4a726›"}, + {"HashRune", Sprintf("%c", HashRune('A')), "‹†A›", "‹559aead0›"}, + {"HashByte", Sprintf("%d", HashByte(0xFF)), "‹†255›", "‹9556b824›"}, + } + + for _, tc := range testData { + t.Run(tc.name, func(t *testing.T) { + if tc.format != tc.expectedPre { + t.Errorf("pre-redaction: expected %q, got %q", tc.expectedPre, tc.format) + } + if redacted := tc.format.Redact(); redacted != tc.expectedPost { + t.Errorf("post-redaction: expected %q, got %q", tc.expectedPost, redacted) + } + }) + } +} + +func TestHashRedact_RedactableBytes(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("user=%s", HashString("alice")) + redacted := s.ToBytes().Redact() + + expected := RedactableBytes("user=‹2bd806c9›") + if string(redacted) != string(expected) { + t.Errorf("expected %q, got %q", expected, redacted) + } + + if string(redacted) != string(s.Redact()) { + t.Errorf("string and bytes redaction differ: %q vs %q", s.Redact(), redacted) + } +} + +func TestHashRedact_StripMarkers(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("user=%s", HashString("alice")) + if stripped := s.StripMarkers(); stripped != "user=alice" { + t.Errorf("expected %q, got %q", "user=alice", stripped) + } +} + +// TestHashRedact_EnableAfterFormat verifies that hash markers written before +// EnableHashing are correctly hashed when Redact() is called after enabling. +func TestHashRedact_EnableAfterFormat(t *testing.T) { + defer DisableHashing() + + s := Sprintf("user=%s", HashString("alice")) + if expected := RedactableString("user=‹†alice›"); s != expected { + t.Errorf("pre-redaction: expected %q, got %q", expected, s) + } + + EnableHashing(nil) + if redacted := s.Redact(); redacted != RedactableString("user=‹2bd806c9›") { + t.Errorf("post-redaction: expected %q, got %q", RedactableString("user=‹2bd806c9›"), redacted) + } +} + +func TestHashRedact_MultipleHashMarkers(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("a=%s b=%s c=%s", HashString("x"), HashString("y"), HashString("z")) + if expected := RedactableString("a=‹†x› b=‹†y› c=‹†z›"); s != expected { + t.Errorf("pre-redaction: expected %q, got %q", expected, s) + } + + if redacted := s.Redact(); redacted != RedactableString("a=‹2d711642› b=‹a1fce436› c=‹594e519a›") { + t.Errorf("post-redaction: expected %q, got %q", RedactableString("a=‹2d711642› b=‹a1fce436› c=‹594e519a›"), redacted) + } +} + +func TestHashRedact_WithSalt(t *testing.T) { + defer DisableHashing() + + EnableHashing(nil) + unsalted := Sprintf("%s", HashString("alice")).Redact() + if expected := RedactableString("‹2bd806c9›"); unsalted != expected { + t.Errorf("unsalted: expected %q, got %q", expected, unsalted) + } + + EnableHashing([]byte("my-secret-salt")) + salted := Sprintf("%s", HashString("alice")).Redact() + if expected := RedactableString("‹cffebd45›"); salted != expected { + t.Errorf("salted: expected %q, got %q", expected, salted) + } +} + +// TestHashRedact_StructWithHashField exercises printValue +// where a struct contains a HashValue field alongside a regular unsafe field. +func TestHashRedact_StructWithHashField(t *testing.T) { + type User struct { + Name HashString + Age int + } + + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("%v", User{Name: "alice", Age: 30}) + if expected := RedactableString("{‹†alice› ‹30›}"); s != expected { + t.Errorf("pre-redaction: expected %q, got %q", expected, s) + } + + if redacted := s.Redact(); redacted != RedactableString("{‹2bd806c9› ‹×›}") { + t.Errorf("post-redaction: expected %q, got %q", RedactableString("{‹2bd806c9› ‹×›}"), redacted) + } +} + +// TestHashRedact_ReflectValue exercises the reflect.Value case in printArg, +// where a reflect.Value wrapping a HashValue is passed as an argument. +func TestHashRedact_ReflectValue(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + v := reflect.ValueOf(HashString("alice")) + s := Sprintf("%v", v) + if expected := RedactableString("‹†alice›"); s != expected { + t.Errorf("pre-redaction: expected %q, got %q", expected, s) + } + + if redacted := s.Redact(); redacted != RedactableString("‹2bd806c9›") { + t.Errorf("post-redaction: expected %q, got %q", RedactableString("‹2bd806c9›"), redacted) + } +} + +func TestHashRedact_Sprint(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprint(HashString("alice")) + if expected := RedactableString("‹†alice›"); s != expected { + t.Errorf("pre-redaction: expected %q, got %q", expected, s) + } + + if redacted := s.Redact(); redacted != RedactableString("‹2bd806c9›") { + t.Errorf("post-redaction: expected %q, got %q", RedactableString("‹2bd806c9›"), redacted) + } +} + +func TestHashRedact_HashBytes(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("%s", HashBytes([]byte("secret"))) + if expected := RedactableString("‹†secret›"); s != expected { + t.Errorf("pre-redaction: expected %q, got %q", expected, s) + } + + if redacted := s.Redact(); redacted != RedactableString("‹2bb80d53›") { + t.Errorf("post-redaction: expected %q, got %q", RedactableString("‹2bb80d53›"), redacted) + } +} + +func TestHashRedact_ToggleTransitions(t *testing.T) { + defer DisableHashing() + + s := Sprintf("user=%s", HashString("alice")) + + EnableHashing(nil) + if redacted := s.Redact(); redacted != RedactableString("user=‹2bd806c9›") { + t.Errorf("unsalted enabled: expected %q, got %q", RedactableString("user=‹2bd806c9›"), redacted) + } + + DisableHashing() + if redacted := s.Redact(); redacted != RedactableString("user=‹×›") { + t.Errorf("disabled: expected %q, got %q", RedactableString("user=‹×›"), redacted) + } + + EnableHashing([]byte("my-secret-salt")) + if redacted := s.Redact(); redacted != RedactableString("user=‹cffebd45›") { + t.Errorf("salted re-enabled: expected %q, got %q", RedactableString("user=‹cffebd45›"), redacted) + } +} + +func TestHashRedact_RedactableBytesDisabled(t *testing.T) { + DisableHashing() + + s := Sprintf("user=%s", HashString("alice")) + if redacted := s.ToBytes().Redact(); string(redacted) != string(RedactableBytes("user=‹×›")) { + t.Errorf("expected %q, got %q", RedactableBytes("user=‹×›"), redacted) + } +} + +// TestHashRedact_ConcurrentToggle exercises concurrent Enable/Disable/Redact +// calls to verify there are no races or panics under contention. +func TestHashRedact_ConcurrentToggle(t *testing.T) { + defer DisableHashing() + + s := Sprintf("user=%s action=%s", HashString("alice"), "login") + + var wg sync.WaitGroup + const goroutines = 8 + const iterations = 500 + + for g := 0; g < goroutines; g++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + for i := 0; i < iterations; i++ { + switch id % 3 { + case 0: + EnableHashing(nil) + case 1: + EnableHashing([]byte("salt")) + case 2: + DisableHashing() + } + redacted := s.Redact() + // The raw value must never leak through redaction. + if strings.Contains(string(redacted), "alice") { + t.Errorf("raw value leaked: %q", redacted) + } + } + }(g) + } + wg.Wait() +} + +// TestHashRedact_AdjacentZones verifies that the buffer's zone-merging +// optimization does not combine a hash zone with an adjacent unsafe zone. +func TestHashRedact_AdjacentZones(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + tests := []struct { + name string + input RedactableString + expectedPre string + expectedPost string + }{ + { + "hash then unsafe (no separator)", + Sprintf("%s%s", HashString("alice"), "bob"), + "‹†alice›‹bob›", + "‹2bd806c9›‹×›", + }, + { + "unsafe then hash (no separator)", + Sprintf("%s%s", "bob", HashString("alice")), + "‹bob›‹†alice›", + "‹×›‹2bd806c9›", + }, + { + "hash then hash (no separator)", + Sprintf("%s%s", HashString("alice"), HashString("bob")), + "‹†alice›‹†bob›", + "‹2bd806c9›‹81b637d8›", + }, + { + "unsafe then unsafe (merge is ok)", + Sprintf("%s%s", "alice", "bob"), + "‹alicebob›", + "‹×›", + }, + { + "hash with space separator", + Sprintf("%s%s", "alice", HashString("bob")), + "‹alice›‹†bob›", + "‹×›‹81b637d8›", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if string(tc.input) != tc.expectedPre { + t.Errorf("pre-redaction:\n expected %q\n got %q", tc.expectedPre, tc.input) + } + if redacted := tc.input.Redact(); string(redacted) != tc.expectedPost { + t.Errorf("post-redaction:\n expected %q\n got %q", tc.expectedPost, redacted) + } + }) + } +} diff --git a/interfaces/interfaces.go b/interfaces/interfaces.go index bd13a94..a7ac8a9 100644 --- a/interfaces/interfaces.go +++ b/interfaces/interfaces.go @@ -158,6 +158,61 @@ type SafeValue interface { SafeValue() } +// HashValue is a marker interface to be implemented by types whose +// values should be hashed when redacted, instead of being replaced +// with the redaction marker. +// +// This allows correlation between log entries while maintaining privacy. +// The hash is computed during the Redact() call, not at log creation time. +// +// Types implementing this interface should alias base Go types. +// The hash is applied to the string representation of the value. +type HashValue interface { + HashValue() +} + +// HashString represents a string that should be hashed when redacted. +type HashString string + +// HashValue makes HashString a HashValue. +func (HashString) HashValue() {} + +// HashInt represents an integer that should be hashed when redacted. +type HashInt int64 + +// HashValue makes HashInt a HashValue. +func (HashInt) HashValue() {} + +// HashUint represents an unsigned integer that should be hashed when redacted. +type HashUint uint64 + +// HashValue makes HashUint a HashValue. +func (HashUint) HashValue() {} + +// HashFloat represents a floating-point value that should be hashed when redacted. +type HashFloat float64 + +// HashValue makes HashFloat a HashValue. +func (HashFloat) HashValue() {} + +// HashRune represents a rune that should be hashed when redacted. +type HashRune rune + +// HashValue makes HashRune a HashValue. +func (HashRune) HashValue() {} + +// HashByte represents a byte that should be hashed when redacted. +type HashByte byte + +// HashValue makes HashByte a HashValue. +func (HashByte) HashValue() {} + +// HashBytes represents a byte slice that should be hashed when redacted. +type HashBytes []byte + +// HashValue makes HashBytes a HashValue. +func (HashBytes) HashValue() {} + // SafeMessager is an alternative to SafeFormatter used in previous // versions of CockroachDB. // NB: this interface is obsolete. Use SafeFormatter instead. diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index 387e254..e205063 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -218,18 +218,28 @@ func (b *Buffer) startWrite() { } } -// endRedactable adds the closing redaction marker. +// startRedactable adds the opening redaction marker. func (b *Buffer) startRedactable() { if bytes.HasSuffix(b.buf, m.EndBytes) { - // Special case: remove a trailing closing marker and call it a day. - b.buf = b.buf[:len(b.buf)-m.EndLen] - } else { - p, ok := b.tryGrowByReslice(len(m.StartS)) - if !ok { - p = b.grow(len(m.StartS)) + // Optimization: merge adjacent unsafe zones by removing the + // trailing › instead of writing a new ‹. However, we must NOT + // merge into a hash zone (‹†...›), because that would cause the + // next value to be hashed together with the hash-marked value. + preceding := b.buf[:len(b.buf)-m.EndLen] + startIdx := bytes.LastIndex(preceding, m.StartBytes) + isHashZone := startIdx >= 0 && + bytes.HasPrefix(preceding[startIdx+m.StartLen:], m.HashPrefixBytes) + if !isHashZone { + b.buf = preceding + b.markerOpen = true + return } - copy(b.buf[p:], m.StartS) } + p, ok := b.tryGrowByReslice(len(m.StartS)) + if !ok { + p = b.grow(len(m.StartS)) + } + copy(b.buf[p:], m.StartS) b.markerOpen = true } diff --git a/internal/escape/escape.go b/internal/escape/escape.go index 1fc6706..9249073 100644 --- a/internal/escape/escape.go +++ b/internal/escape/escape.go @@ -37,6 +37,7 @@ func InternalEscapeBytes(b []byte, startLoc int, breakNewLines, strip bool) (res // accelerates the loops below. start, ls := m.StartBytes, len(m.StartS) end, le := m.EndBytes, len(m.EndS) + hashPrefix, lh := m.HashPrefixBytes, len(m.HashPrefixS) escape := m.EscapeMarkBytes // Trim final newlines/spaces, for convenience. @@ -116,6 +117,15 @@ func InternalEscapeBytes(b []byte, startLoc int, breakNewLines, strip bool) (res // Advance the counters by the length (in bytes) of the delimiter. k = i + le i += le - 1 /* -1 because we have i++ at the end of every iteration */ + } else if i+lh <= len(b) && bytes.Equal(b[i:i+lh], hashPrefix) { + if !copied { + res = make([]byte, 0, len(b)+len(escape)) + copied = true + } + res = append(res, b[k:i]...) + res = append(res, escape...) + k = i + lh + i += lh - 1 } } // If the string terminates with an invalid utf-8 sequence, we diff --git a/internal/escape/escape_test.go b/internal/escape/escape_test.go index 7314cf6..219debe 100644 --- a/internal/escape/escape_test.go +++ b/internal/escape/escape_test.go @@ -37,6 +37,9 @@ func TestInternalEscape(t *testing.T) { {[]byte("abc\n‹d\nef›\n \n\n "), len([]byte("abc")), true, true, "abc›\n‹?d›\n‹ef?"}, {[]byte("‹abc› ‹def›"), len([]byte("‹abc› ")), true, true, "‹abc› ?def?"}, {[]byte("abc‹\ndef"), len([]byte("abc‹")), true, true, "abc\n‹def"}, + {[]byte("†abc"), 0, false, false, "?abc"}, + {[]byte("‹†abc›"), 3, false, false, "‹?abc?"}, + {[]byte("hello†world"), 0, false, false, "hello?world"}, } for _, tc := range testCases { diff --git a/internal/markers/constants.go b/internal/markers/constants.go index f062f7e..1b8b854 100644 --- a/internal/markers/constants.go +++ b/internal/markers/constants.go @@ -27,6 +27,8 @@ const ( EscapeMark = '?' EscapeMarkS = string(EscapeMark) RedactedS = StartS + "×" + EndS + HashPrefix = '†' + HashPrefixS = string(HashPrefix) ) // Internal variables. @@ -35,6 +37,7 @@ var ( EndBytes = []byte(EndS) EscapeMarkBytes = []byte(EscapeMarkS) RedactedBytes = []byte(RedactedS) - ReStripSensitive = regexp.MustCompile(StartS + "[^" + StartS + EndS + "]*" + EndS) - ReStripMarkers = regexp.MustCompile("[" + StartS + EndS + "]") + HashPrefixBytes = []byte(HashPrefixS) + ReStripSensitive = regexp.MustCompile(StartS + HashPrefixS + "?" + "[^" + StartS + EndS + "]*" + EndS) + ReStripMarkers = regexp.MustCompile("[" + StartS + EndS + HashPrefixS + "]") ) diff --git a/internal/markers/hash.go b/internal/markers/hash.go new file mode 100644 index 0000000..fbbea96 --- /dev/null +++ b/internal/markers/hash.go @@ -0,0 +1,146 @@ +// Copyright 2026 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package markers + +import ( + "crypto/hmac" + "crypto/sha256" + "encoding/hex" + "hash" + "sync" + "sync/atomic" +) + +/* + Hash function implementation notes: + + We use SHA-256 because it provides strong cryptographic properties with + negligible performance overhead on modern hardwares. + + When a salt is provided via EnableHashing(), we use HMAC-SHA256 which + provides additional security properties and domain separation. + + The hash output is truncated to 8 hex characters (32 bits) to keep log + output concise while still providing sufficient collision resistance for + typical logging workloads. + + Hasher instances are pooled via sync.Pool to avoid allocating a new + SHA-256 or HMAC struct on every call. The pool is created once in + EnableHashing() with the correct hasher type (SHA-256 or HMAC-SHA256) + baked in, so hash functions just Get/Reset/Put without checking salt. +*/ + +// defaultHashLength is the number of hex characters to use from the hash. +// 8 hex chars = 32 bits = ~4.3 billion unique values. +// This provides a good balance between collision resistance and output brevity. +const defaultHashLength = 8 + +// compile-time assertion to guarantee defaultHashLength doesn't exceed hex-encoded hash length. +var _ [sha256.Size*2 - defaultHashLength]byte + +// hasherState holds a reusable hash.Hash and a pre-allocated Sum buffer. +// sumBuf must live in the pool because Sum() is an interface method — +// the compiler can't prove it won't store a reference, because of hash being +// in pool, so a local [32]byte would escape to heap on every call. +// hexBuf is a stack-allocated local in the hash functions because +// hex.Encode is a concrete function call that doesn't cause escape. +type hasherState struct { + h hash.Hash + sumBuf [sha256.Size]byte // reusable buffer for h.Sum() output +} + +var hashConfig struct { + enabled atomic.Bool + pool atomic.Value +} + +// EnableHashing enables hash-based redaction with an optional salt. +// When salt is nil, hash markers use plain SHA-256. +// When salt is provided, hash markers use HMAC-SHA256 for better security. +// +// A sync.Pool of hasherState instances is created here with the correct +// hasher type, so hash functions just Get/Reset/Put without checking salt. +func EnableHashing(salt []byte) { + var pool *sync.Pool + if len(salt) > 0 { + // Copy so the pool closure doesn't capture a mutable slice. + saltCopy := make([]byte, len(salt)) + copy(saltCopy, salt) + pool = &sync.Pool{ + New: func() interface{} { + return &hasherState{h: hmac.New(sha256.New, saltCopy)} + }, + } + } else { + pool = &sync.Pool{ + New: func() interface{} { + return &hasherState{h: sha256.New()} + }, + } + } + + hashConfig.pool.Store(pool) + hashConfig.enabled.Store(true) +} + +// DisableHashing disables hash-based redaction. +// The pool and salt are intentionally left intact so that in-flight +// hashers (which captured the pool pointer) can finish safely. +func DisableHashing() { + hashConfig.enabled.Store(false) +} + +// IsHashingEnabled returns true if hash-based redaction is enabled. +func IsHashingEnabled() bool { + return hashConfig.enabled.Load() +} + +// hashString computes a truncated SHA-256 hash of the input string. +// Uses a pooled hasher instance. +// Must only be called when hashing is enabled (IsHashingEnabled() == true). +func hashString(value string) string { + p := hashConfig.pool.Load().(*sync.Pool) + state := p.Get().(*hasherState) + + var hexBuf [sha256.Size * 2]byte + state.h.Reset() + state.h.Write([]byte(value)) + sum := state.h.Sum(state.sumBuf[:0]) + hex.Encode(hexBuf[:], sum) + + result := string(hexBuf[:defaultHashLength]) + p.Put(state) + return result +} + +// hashBytes computes a truncated SHA-256 hash of the input byte slice. +// Uses a pooled hasher instance — no per-call hasher allocation. +// Must only be called when hashing is enabled (IsHashingEnabled() == true). +func hashBytes(value []byte) []byte { + p := hashConfig.pool.Load().(*sync.Pool) + state := p.Get().(*hasherState) + + var hexBuf [sha256.Size * 2]byte + + state.h.Reset() + state.h.Write(value) + sum := state.h.Sum(state.sumBuf[:0]) + hex.Encode(hexBuf[:], sum) + + result := make([]byte, defaultHashLength) + copy(result, hexBuf[:defaultHashLength]) + p.Put(state) + return result +} diff --git a/internal/markers/hash_test.go b/internal/markers/hash_test.go new file mode 100644 index 0000000..386836a --- /dev/null +++ b/internal/markers/hash_test.go @@ -0,0 +1,142 @@ +// Copyright 2026 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing +// permissions and limitations under the License. + +package markers + +import ( + "strings" + "testing" +) + +func TestHash(t *testing.T) { + defer DisableHashing() + + testCases := []struct { + name string + input string + salt []byte + expected string + }{ + { + name: "simple string", + input: "test", + salt: nil, + expected: "9f86d081", + }, + { + name: "empty string", + input: "", + salt: nil, + expected: "e3b0c442", + }, + { + name: "input exceeding hash length", + input: strings.Repeat("long-input-", 100), + salt: nil, + expected: "130ca5ec", + }, + { + name: "numeric string", + input: "12345", + salt: nil, + expected: "5994471a", + }, + { + name: "simple string with salt", + input: "test", + salt: []byte("my-salt"), + expected: "ac2fdbab", + }, + { + name: "empty string with salt", + input: "", + salt: []byte("my-salt"), + expected: "e5d13fde", + }, + } + + t.Run("string", func(t *testing.T) { + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + EnableHashing(tc.salt) + + resultString := hashString(tc.input) + if resultString != tc.expected { + t.Errorf("hashString(%q) = %q, expected %q", tc.input, resultString, tc.expected) + } + if len(resultString) != 8 { + t.Errorf("hashString(%q) returned %d characters, expected 8", tc.input, len(resultString)) + } + }) + } + }) + + t.Run("bytes", func(t *testing.T) { + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + EnableHashing(tc.salt) + resultBytes := hashBytes([]byte(tc.input)) + if string(resultBytes) != tc.expected { + t.Errorf("hashBytes(%q) = %q, expected %q", tc.input, resultBytes, tc.expected) + } + if len(resultBytes) != 8 { + t.Errorf("hashBytes(%q) returned %d bytes, expected 8", tc.input, len(resultBytes)) + } + }) + } + }) +} + +func TestHashDeterminism(t *testing.T) { + defer DisableHashing() + + EnableHashing(nil) + + input := "test-value" + input2 := "different-value" + + t.Run("hashString", func(t *testing.T) { + // Same input should always produce same output + hash1 := hashString(input) + hash2 := hashString(input) + + if hash1 != hash2 { + t.Errorf("hashString is not deterministic: hashString(%q) returned %q and %q", input, hash1, hash2) + } + + // Different inputs should produce different outputs + hash3 := hashString(input2) + + if hash1 == hash3 { + t.Errorf("Different inputs produced same hash: hashString(%q) = hashString(%q) = %q", input, input2, hash1) + } + }) + + t.Run("hashBytes", func(t *testing.T) { + // Same input should always produce same output + hash1 := hashBytes([]byte(input)) + hash2 := hashBytes([]byte(input)) + + if string(hash1) != string(hash2) { + t.Errorf("hashBytes is not deterministic: hashBytes(%q) returned %q and %q", input, hash1, hash2) + } + + // Different inputs should produce different outputs + hash3 := hashBytes([]byte(input2)) + + if string(hash1) == string(hash3) { + t.Errorf("Different inputs produced same hash: hashBytes(%q) = hashBytes(%q) = %q", input, input2, hash1) + } + }) +} diff --git a/internal/markers/markers.go b/internal/markers/markers.go index f6bfa10..a81efb2 100644 --- a/internal/markers/markers.go +++ b/internal/markers/markers.go @@ -14,7 +14,11 @@ package markers -import i "github.com/cockroachdb/redact/interfaces" +import ( + "bytes" + + i "github.com/cockroachdb/redact/interfaces" +) // RedactableString is a string that contains a mix of safe and unsafe // bits of data, but where it is known that unsafe bits are enclosed @@ -34,9 +38,22 @@ func (s RedactableString) StripMarkers() string { } // Redact replaces all occurrences of unsafe substrings by the -// “Redacted” marker, ‹×›. The result string is still safe. +// "Redacted" marker, ‹×›. Hash markers (‹†value›) are replaced +// with hashed values (‹hash›) if hashing is enabled, otherwise +// they are redacted like regular markers. The result string is still safe. func (s RedactableString) Redact() RedactableString { - return RedactableString(ReStripSensitive.ReplaceAllString(string(s), RedactedS)) + if !IsHashingEnabled() { + return RedactableString(ReStripSensitive.ReplaceAllString(string(s), RedactedS)) + } + result := ReStripSensitive.ReplaceAllStringFunc(string(s), func(match string) string { + if len(match) > len(StartS)+len(EndS) && + match[len(StartS):len(StartS)+len(HashPrefixS)] == HashPrefixS { + value := match[len(StartS)+len(HashPrefixS) : len(match)-len(EndS)] + return StartS + hashString(value) + EndS + } + return RedactedS + }) + return RedactableString(result) } // ToBytes converts the string to a byte slice. @@ -66,9 +83,27 @@ func (s RedactableBytes) StripMarkers() []byte { } // Redact replaces all occurrences of unsafe substrings by the -// “Redacted” marker, ‹×›. +// "Redacted" marker, ‹×›. Hash markers (‹†value›) are replaced +// with hashed values (‹hash›) if hashing is enabled, otherwise +// they are redacted like regular markers. func (s RedactableBytes) Redact() RedactableBytes { - return RedactableBytes(ReStripSensitive.ReplaceAll(s, RedactedBytes)) + if !IsHashingEnabled() { + return RedactableBytes(ReStripSensitive.ReplaceAll([]byte(s), RedactedBytes)) + } + result := ReStripSensitive.ReplaceAllFunc([]byte(s), func(match []byte) []byte { + if len(match) > len(StartBytes)+len(EndBytes) && + bytes.Equal(match[len(StartBytes):len(StartBytes)+len(HashPrefixS)], HashPrefixBytes) { + value := match[len(StartBytes)+len(HashPrefixS) : len(match)-len(EndBytes)] + hashed := hashBytes(value) + res := make([]byte, len(StartBytes)+len(hashed)+len(EndBytes)) + n := copy(res, StartBytes) + n += copy(res[n:], hashed) + copy(res[n:], EndBytes) + return res + } + return RedactedBytes + }) + return RedactableBytes(result) } // ToString converts the byte slice to a string. diff --git a/internal/rfmt/helpers.go b/internal/rfmt/helpers.go index 7868d1b..8aec053 100644 --- a/internal/rfmt/helpers.go +++ b/internal/rfmt/helpers.go @@ -81,6 +81,37 @@ func (r restorer) restore() { r.p.override = r.prevOverride } +type hashRestorer struct { + p *pp + prevMode b.OutputMode + prevOverride overrideMode + active bool +} + +func (r hashRestorer) restore() { + if r.active { + r.p.buf.SetMode(b.PreRedactable) + r.p.buf.WriteString(m.EndS) + } + r.p.buf.SetMode(r.prevMode) + r.p.override = r.prevOverride +} + +func (p *pp) startHashRedactable() hashRestorer { + prevMode := p.buf.GetMode() + prevOverride := p.override + active := false + if p.override == noOverride { + p.buf.SetMode(b.PreRedactable) + p.buf.WriteString(m.StartS) + p.buf.WriteString(m.HashPrefixS) + p.buf.SetMode(b.SafeEscaped) + p.override = overrideSafe + active = true + } + return hashRestorer{p, prevMode, prevOverride, active} +} + func (p *pp) handleSpecialValues( value reflect.Value, t reflect.Type, verb rune, depth int, ) (handled bool) { diff --git a/internal/rfmt/print.go b/internal/rfmt/print.go index 16ba3ce..8c747b5 100644 --- a/internal/rfmt/print.go +++ b/internal/rfmt/print.go @@ -67,7 +67,7 @@ type Formatter interface { } // Stringer is implemented by any value that has a String method, -// which defines the ``native'' format for that value. +// which defines the `native` format for that value. // The String method is used to print values passed as an operand // to any format that accepts a string or to an unformatted printer // such as Print. @@ -695,6 +695,7 @@ func (p *pp) handleMethods(verb rune) (handled bool) { return false } + func (p *pp) printArg(arg interface{}, verb rune) { t := reflect.TypeOf(arg) if safeTypeRegistry[t] { @@ -707,8 +708,10 @@ func (p *pp) printArg(arg interface{}, verb rune) { arg = arg.(w.UnsafeWrap).GetValue() } - if _, ok := arg.(i.SafeValue); ok { + if _, isSafe := arg.(i.SafeValue); isSafe { defer p.startSafeOverride().restore() + } else if _, ok := arg.(i.HashValue); ok { + defer p.startHashRedactable().restore() } p.arg = arg @@ -788,8 +791,10 @@ func (p *pp) printArg(arg interface{}, verb rune) { if f.CanInterface() { p.arg = f.Interface() - if _, ok := p.arg.(i.SafeValue); ok { + if _, isSafe := p.arg.(i.SafeValue); isSafe { defer p.startSafeOverride().restore() + } else if _, ok := p.arg.(i.HashValue); ok { + defer p.startHashRedactable().restore() } if p.handleMethods(verb) { return @@ -831,8 +836,10 @@ func (p *pp) printValue(value reflect.Value, verb rune, depth int) { if value.CanInterface() { p.arg = value.Interface() - if _, ok := p.arg.(i.SafeValue); ok { + if _, isSafe := p.arg.(i.SafeValue); isSafe { defer p.startSafeOverride().restore() + } else if _, ok := p.arg.(i.HashValue); ok { + defer p.startHashRedactable().restore() } if p.handleMethods(verb) { return