From 74df9091b1e7d9fda7800a05da9ebde27ed03665 Mon Sep 17 00:00:00 2001 From: Arjun Mahishi Date: Mon, 10 Nov 2025 13:07:56 +0530 Subject: [PATCH 1/4] introduce hash-based redaction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit introduces the ability to hash sensitive information instead of fully redacting it. This let's the user correlate entries while still preserving privacy. This is an opt-in feature and requires the user to explicitly mark values as "hashable". Changes: * EnableHashing(salt) is used to turn on hash-based redaction. If this is not called, redaction behaves as before (full redaction). * If both SafeValue and HashValue interfaces are implemented on a type, SafeValue takes precedence. * Uses SHA-1 / HMAC-SHA1 (if salt is provided) to hash the sensitive parts of the string. * A dagger (†) prefix is added within the redaction markers to denote that it should be hashed instead of fully redacted. * There are 2 ways to mark a value "hashable" (similar to redaction): 1) Implementing the HashValue interface directly on a type. 2) Wrapping a value with the HashString/HashBytes/Hash* functions * Hashes are calculated at formatting time and not during string creation. Just like redaction. **Example usage** * Implementing the HashValue interface: ```go type UserID string func (u UserID) HashValue() string {} // makes the type "hashable" fmt.Println(redact.Sprintf("%s", UserID("alice")).Redact()) // Output: ‹522b276a› ``` * Inline with helper functions: ```go fmt.Println(redact.Sprintf("%s", redact.HashString("alice")).Redact()) // Output: ‹522b276a› ``` --- api.go | 34 +++++++ interfaces/interfaces.go | 55 ++++++++++++ internal/markers/constants.go | 6 +- internal/markers/hash.go | 124 ++++++++++++++++++++++++++ internal/markers/hash_test.go | 162 ++++++++++++++++++++++++++++++++++ internal/markers/markers.go | 54 ++++++++++-- internal/rfmt/print.go | 32 ++++++- 7 files changed, 456 insertions(+), 11 deletions(-) create mode 100644 internal/markers/hash.go create mode 100644 internal/markers/hash_test.go diff --git a/api.go b/api.go index 2c8eba6..0021c0e 100644 --- a/api.go +++ b/api.go @@ -85,6 +85,32 @@ type SafeFloat = i.SafeFloat // SafeRune aliases rune. See the explanation for SafeString. type SafeRune = i.SafeRune +// HashValue is a marker interface to be implemented by types whose +// values should be hashed when redacted, instead of being replaced +// with the redaction marker. +type HashValue = i.HashValue + +// HashString represents a string that should be hashed when redacted. +type HashString = i.HashString + +// HashInt represents an integer that should be hashed when redacted. +type HashInt = i.HashInt + +// HashUint represents an unsigned integer that should be hashed when redacted. +type HashUint = i.HashUint + +// HashFloat represents a floating-point value that should be hashed when redacted. +type HashFloat = i.HashFloat + +// HashRune represents a rune that should be hashed when redacted. +type HashRune = i.HashRune + +// HashByte represents a byte that should be hashed when redacted. +type HashByte = i.HashByte + +// HashBytes represents a byte slice that should be hashed when redacted. +type HashBytes = i.HashBytes + // RedactableString is a string that contains a mix of safe and unsafe // bits of data, but where it is known that unsafe bits are enclosed // by redaction markers ‹ and ›, and occurrences of the markers @@ -173,3 +199,11 @@ type StringBuilder = builder.StringBuilder func RegisterSafeType(t reflect.Type) { ifmt.RegisterSafeType(t) } + +// EnableHashing enables hash-based redaction with an optional salt. +// Hash markers (‹†value›) will be replaced with hashes instead of being fully redacted. +// When salt is nil, hash markers use plain SHA1. +// When salt is provided, hash markers use HMAC-SHA1 for better security. +func EnableHashing(salt []byte) { + m.EnableHashing(salt) +} diff --git a/interfaces/interfaces.go b/interfaces/interfaces.go index bd13a94..a7ac8a9 100644 --- a/interfaces/interfaces.go +++ b/interfaces/interfaces.go @@ -158,6 +158,61 @@ type SafeValue interface { SafeValue() } +// HashValue is a marker interface to be implemented by types whose +// values should be hashed when redacted, instead of being replaced +// with the redaction marker. +// +// This allows correlation between log entries while maintaining privacy. +// The hash is computed during the Redact() call, not at log creation time. +// +// Types implementing this interface should alias base Go types. +// The hash is applied to the string representation of the value. +type HashValue interface { + HashValue() +} + +// HashString represents a string that should be hashed when redacted. +type HashString string + +// HashValue makes HashString a HashValue. +func (HashString) HashValue() {} + +// HashInt represents an integer that should be hashed when redacted. +type HashInt int64 + +// HashValue makes HashInt a HashValue. +func (HashInt) HashValue() {} + +// HashUint represents an unsigned integer that should be hashed when redacted. +type HashUint uint64 + +// HashValue makes HashUint a HashValue. +func (HashUint) HashValue() {} + +// HashFloat represents a floating-point value that should be hashed when redacted. +type HashFloat float64 + +// HashValue makes HashFloat a HashValue. +func (HashFloat) HashValue() {} + +// HashRune represents a rune that should be hashed when redacted. +type HashRune rune + +// HashValue makes HashRune a HashValue. +func (HashRune) HashValue() {} + +// HashByte represents a byte that should be hashed when redacted. +type HashByte byte + +// HashValue makes HashByte a HashValue. +func (HashByte) HashValue() {} + +// HashBytes represents a byte slice that should be hashed when redacted. +type HashBytes []byte + +// HashValue makes HashBytes a HashValue. +func (HashBytes) HashValue() {} + // SafeMessager is an alternative to SafeFormatter used in previous // versions of CockroachDB. // NB: this interface is obsolete. Use SafeFormatter instead. diff --git a/internal/markers/constants.go b/internal/markers/constants.go index f062f7e..e734fea 100644 --- a/internal/markers/constants.go +++ b/internal/markers/constants.go @@ -27,6 +27,8 @@ const ( EscapeMark = '?' EscapeMarkS = string(EscapeMark) RedactedS = StartS + "×" + EndS + HashPrefix = '†' + HashPrefixS = string(HashPrefix) ) // Internal variables. @@ -35,6 +37,6 @@ var ( EndBytes = []byte(EndS) EscapeMarkBytes = []byte(EscapeMarkS) RedactedBytes = []byte(RedactedS) - ReStripSensitive = regexp.MustCompile(StartS + "[^" + StartS + EndS + "]*" + EndS) - ReStripMarkers = regexp.MustCompile("[" + StartS + EndS + "]") + ReStripSensitive = regexp.MustCompile(StartS + HashPrefixS + "?" + "[^" + StartS + EndS + "]*" + EndS) + ReStripMarkers = regexp.MustCompile("[" + StartS + EndS + HashPrefixS + "]") ) diff --git a/internal/markers/hash.go b/internal/markers/hash.go new file mode 100644 index 0000000..272f9f6 --- /dev/null +++ b/internal/markers/hash.go @@ -0,0 +1,124 @@ +// Copyright 2025 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package markers + +import ( + "crypto/hmac" + "crypto/sha1" + "encoding/hex" + "sync" +) + +/* + Hash function implementation notes: + + We use SHA-1 because it provides a good balance of performance and output size + for the log correlation use case. SHA-1 is fast and produces compact hashes, + making it well-suited for high-throughput logging scenarios. + + When a salt is provided via EnableHashing(), we use HMAC-SHA1 which provides + additional security properties and domain separation. + + The hash output is truncated to 8 hex characters (32 bits) to keep log output + concise while still providing sufficient collision resistance for typical logging + workloads. +*/ + +// defaultHashLength is the number of hex characters to use from the SHA-1 hash. +// 8 hex chars = 32 bits = ~4.3 billion unique values. +// This provides a good balance between collision resistance and output brevity. +// For typical logging scenarios with fewer unique sensitive values per analysis window, +// this collision risk should be acceptable. If not, we can make this configurable in the future. +const defaultHashLength = 8 + +var hashConfig = struct { + sync.RWMutex + enabled bool + salt []byte +}{ + enabled: false, + salt: nil, +} + +// EnableHashing enables hash-based redaction with an optional salt. +// When salt is nil, hash markers use plain SHA1. +// When salt is provided, hash markers use HMAC-SHA1 for better security. +func EnableHashing(salt []byte) { + hashConfig.Lock() + defer hashConfig.Unlock() + hashConfig.enabled = true + hashConfig.salt = salt +} + +// IsHashingEnabled returns true if hash-based redaction is enabled. +func IsHashingEnabled() bool { + hashConfig.RLock() + defer hashConfig.RUnlock() + return hashConfig.enabled +} + +// hashString computes a truncated hash of the input string. +// Uses HMAC-SHA1 if salt is set, otherwise plain SHA1. +// Must only be called when hashing is enabled (IsHashingEnabled() == true). +func hashString(value string) string { + hashConfig.RLock() + salt := hashConfig.salt + hashConfig.RUnlock() + + var h []byte + if len(salt) > 0 { + mac := hmac.New(sha1.New, salt) + mac.Write([]byte(value)) + h = mac.Sum(nil) + } else { + hasher := sha1.New() + hasher.Write([]byte(value)) + h = hasher.Sum(nil) + } + + fullHash := hex.EncodeToString(h) + if len(fullHash) > defaultHashLength { + return fullHash[:defaultHashLength] + } + return fullHash +} + +// hashBytes computes a truncated hash of the input byte slice. +// Uses HMAC-SHA1 if salt is set, otherwise plain SHA1. +// Must only be called when hashing is enabled (IsHashingEnabled() == true). +func hashBytes(value []byte) []byte { + hashConfig.RLock() + salt := hashConfig.salt + hashConfig.RUnlock() + + var h []byte + if len(salt) > 0 { + mac := hmac.New(sha1.New, salt) + mac.Write(value) + h = mac.Sum(nil) + } else { + hasher := sha1.New() + hasher.Write(value) + h = hasher.Sum(nil) + } + + fullHash := make([]byte, sha1.Size*2) + _ = hex.Encode(fullHash, h) + + if len(fullHash) > defaultHashLength { + return fullHash[:defaultHashLength] + } + return fullHash +} diff --git a/internal/markers/hash_test.go b/internal/markers/hash_test.go new file mode 100644 index 0000000..f192081 --- /dev/null +++ b/internal/markers/hash_test.go @@ -0,0 +1,162 @@ +// Copyright 2025 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing +// permissions and limitations under the License. + +package markers + +import ( + "strings" + "testing" +) + +func TestHash(t *testing.T) { + // Save original state and restore after test + hashConfig.RLock() + originalEnabled := hashConfig.enabled + originalSalt := hashConfig.salt + hashConfig.RUnlock() + defer func() { + hashConfig.Lock() + hashConfig.enabled = originalEnabled + hashConfig.salt = originalSalt + hashConfig.Unlock() + }() + + testCases := []struct { + name string + input string + salt []byte + expected string + }{ + { + name: "simple string", + input: "test", + salt: nil, + expected: "a94a8fe5", + }, + { + name: "empty string", + input: "", + salt: nil, + expected: "da39a3ee", + }, + { + name: "input exceeding hash length", + input: strings.Repeat("long-input-", 100), + salt: nil, + expected: "c375461f", + }, + { + name: "numeric string", + input: "12345", + salt: nil, + expected: "8cb2237d", + }, + { + name: "simple string with salt", + input: "test", + salt: []byte("my-salt"), + expected: "c48ce5fd", + }, + { + name: "empty string with salt", + input: "", + salt: []byte("my-salt"), + expected: "7b1829af", + }, + } + + t.Run("string", func(t *testing.T) { + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + EnableHashing(tc.salt) + + resultString := hashString(tc.input) + if resultString != tc.expected { + t.Errorf("hashString(%q) = %q, expected %q", tc.input, resultString, tc.expected) + } + if len(resultString) != 8 { + t.Errorf("hashString(%q) returned %d characters, expected 8", tc.input, len(resultString)) + } + }) + } + }) + + t.Run("bytes", func(t *testing.T) { + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + EnableHashing(tc.salt) + resultBytes := hashBytes([]byte(tc.input)) + if string(resultBytes) != tc.expected { + t.Errorf("hashBytes(%q) = %q, expected %q", tc.input, resultBytes, tc.expected) + } + if len(resultBytes) != 8 { + t.Errorf("hashBytes(%q) returned %d bytes, expected 8", tc.input, len(resultBytes)) + } + }) + } + }) +} + +func TestHashDeterminism(t *testing.T) { + // Save original state and restore after test + hashConfig.RLock() + originalEnabled := hashConfig.enabled + originalSalt := hashConfig.salt + hashConfig.RUnlock() + defer func() { + hashConfig.Lock() + hashConfig.enabled = originalEnabled + hashConfig.salt = originalSalt + hashConfig.Unlock() + }() + + EnableHashing(nil) + + input := "test-value" + input2 := "different-value" + + t.Run("hashString", func(t *testing.T) { + // Same input should always produce same output + hash1 := hashString(input) + hash2 := hashString(input) + + if hash1 != hash2 { + t.Errorf("hashString is not deterministic: hashString(%q) returned %q and %q", input, hash1, hash2) + } + + // Different inputs should produce different outputs + hash3 := hashString(input2) + + if hash1 == hash3 { + t.Errorf("Different inputs produced same hash: hashString(%q) = hashString(%q) = %q", input, input2, hash1) + } + }) + + t.Run("hashBytes", func(t *testing.T) { + // Same input should always produce same output + hash1 := hashBytes([]byte(input)) + hash2 := hashBytes([]byte(input)) + + if string(hash1) != string(hash2) { + t.Errorf("hashBytes is not deterministic: hashBytes(%q) returned %q and %q", input, hash1, hash2) + } + + // Different inputs should produce different outputs + hash3 := hashBytes([]byte(input2)) + + if string(hash1) == string(hash3) { + t.Errorf("Different inputs produced same hash: hashBytes(%q) = hashBytes(%q) = %q", input, input2, hash1) + } + }) +} diff --git a/internal/markers/markers.go b/internal/markers/markers.go index f6bfa10..6053b9c 100644 --- a/internal/markers/markers.go +++ b/internal/markers/markers.go @@ -14,7 +14,11 @@ package markers -import i "github.com/cockroachdb/redact/interfaces" +import ( + "bytes" + + i "github.com/cockroachdb/redact/interfaces" +) // RedactableString is a string that contains a mix of safe and unsafe // bits of data, but where it is known that unsafe bits are enclosed @@ -34,9 +38,26 @@ func (s RedactableString) StripMarkers() string { } // Redact replaces all occurrences of unsafe substrings by the -// “Redacted” marker, ‹×›. The result string is still safe. +// "Redacted" marker, ‹×›. Hash markers (‹†value›) are replaced +// with hashed values (‹hash›) if hashing is enabled, otherwise +// they are redacted like regular markers. The result string is still safe. func (s RedactableString) Redact() RedactableString { - return RedactableString(ReStripSensitive.ReplaceAllString(string(s), RedactedS)) + result := ReStripSensitive.ReplaceAllStringFunc(string(s), func(match string) string { + // Check if this is a hash marker by looking for † after ‹ + if len(match) > len(StartS)+len(EndS) && + match[len(StartS):len(StartS)+len(HashPrefixS)] == HashPrefixS { + // Only hash if hashing is enabled, otherwise redact + if IsHashingEnabled() { + // Extract value without ‹† and › and return its hash + value := match[len(StartS)+len(HashPrefixS) : len(match)-len(EndS)] + return StartS + hashString(value) + EndS + } + } + // Regular marker or disabled hashing - replace with × + return RedactedS + }) + + return RedactableString(result) } // ToBytes converts the string to a byte slice. @@ -66,9 +87,32 @@ func (s RedactableBytes) StripMarkers() []byte { } // Redact replaces all occurrences of unsafe substrings by the -// “Redacted” marker, ‹×›. +// "Redacted" marker, ‹×›. Hash markers (‹†value›) are replaced +// with hashed values (‹hash›) if hashing is enabled, otherwise +// they are redacted like regular markers. func (s RedactableBytes) Redact() RedactableBytes { - return RedactableBytes(ReStripSensitive.ReplaceAll(s, RedactedBytes)) + result := ReStripSensitive.ReplaceAllFunc([]byte(s), func(match []byte) []byte { + // Check if this is a hash marker by looking for † after ‹ + if len(match) > len(StartBytes)+len(EndBytes) && + bytes.Equal(match[len(StartBytes):len(StartBytes)+len(HashPrefixS)], []byte(HashPrefixS)) { + // Only hash if hashing is enabled, otherwise redact + if IsHashingEnabled() { + // Extract value without ‹† and › and return its hash + value := match[len(StartBytes)+len(HashPrefixS) : len(match)-len(EndBytes)] + hashed := hashBytes(value) + res := make([]byte, len(StartBytes)+len(hashed)+len(EndBytes)) + n := copy(res, StartBytes) + n += copy(res[n:], hashed) + copy(res[n:], EndBytes) + return res + } + } + + // Regular marker or disabled hashing - replace with × + return RedactedBytes + }) + + return RedactableBytes(result) } // ToString converts the byte slice to a string. diff --git a/internal/rfmt/print.go b/internal/rfmt/print.go index 16ba3ce..4c50eaf 100644 --- a/internal/rfmt/print.go +++ b/internal/rfmt/print.go @@ -67,7 +67,7 @@ type Formatter interface { } // Stringer is implemented by any value that has a String method, -// which defines the ``native'' format for that value. +// which defines the “native” format for that value. // The String method is used to print values passed as an operand // to any format that accepts a string or to an unformatted printer // such as Print. @@ -695,6 +695,24 @@ func (p *pp) handleMethods(verb rune) (handled bool) { return false } +// tryWriteHashMarker checks if arg implements HashValue and hashing is enabled. +// If so, writes hash markers around the value and returns true. +// Otherwise, returns false. +func (p *pp) tryWriteHashMarker(arg interface{}) bool { + if _, ok := arg.(i.HashValue); ok && m.IsHashingEnabled() { + // Mark the value for hashing during Redact(), but preserve the original value + // Use PreRedactable mode to prevent marker escaping + defer p.startPreRedactable().restore() + valueStr := origFmt.Sprint(arg) + p.buf.writeString(m.StartS) + p.buf.writeString(m.HashPrefixS) + p.buf.writeString(valueStr) + p.buf.writeString(m.EndS) + return true + } + return false +} + func (p *pp) printArg(arg interface{}, verb rune) { t := reflect.TypeOf(arg) if safeTypeRegistry[t] { @@ -707,8 +725,10 @@ func (p *pp) printArg(arg interface{}, verb rune) { arg = arg.(w.UnsafeWrap).GetValue() } - if _, ok := arg.(i.SafeValue); ok { + if _, isSafe := arg.(i.SafeValue); isSafe { defer p.startSafeOverride().restore() + } else if p.tryWriteHashMarker(arg) { + return } p.arg = arg @@ -788,8 +808,10 @@ func (p *pp) printArg(arg interface{}, verb rune) { if f.CanInterface() { p.arg = f.Interface() - if _, ok := p.arg.(i.SafeValue); ok { + if _, isSafe := p.arg.(i.SafeValue); isSafe { defer p.startSafeOverride().restore() + } else if p.tryWriteHashMarker(p.arg) { + return } if p.handleMethods(verb) { return @@ -831,8 +853,10 @@ func (p *pp) printValue(value reflect.Value, verb rune, depth int) { if value.CanInterface() { p.arg = value.Interface() - if _, ok := p.arg.(i.SafeValue); ok { + if _, isSafe := p.arg.(i.SafeValue); isSafe { defer p.startSafeOverride().restore() + } else if p.tryWriteHashMarker(p.arg) { + return } if p.handleMethods(verb) { return From be6d00c62e48c1cd4e5d0ce30b49726417c17954 Mon Sep 17 00:00:00 2001 From: Vishesh Bardia Date: Wed, 18 Feb 2026 22:41:12 +0530 Subject: [PATCH 2/4] replaced sha1 with sha256 and optimised heap allocs. --- api.go | 10 ++- bench_test.go | 34 +++++++ go.mod | 2 +- internal/markers/hash.go | 165 ++++++++++++++++++++-------------- internal/markers/hash_test.go | 36 ++------ 5 files changed, 147 insertions(+), 100 deletions(-) diff --git a/api.go b/api.go index 0021c0e..6272df7 100644 --- a/api.go +++ b/api.go @@ -202,8 +202,14 @@ func RegisterSafeType(t reflect.Type) { // EnableHashing enables hash-based redaction with an optional salt. // Hash markers (‹†value›) will be replaced with hashes instead of being fully redacted. -// When salt is nil, hash markers use plain SHA1. -// When salt is provided, hash markers use HMAC-SHA1 for better security. +// When salt is nil, hash markers use plain SHA-256. +// When salt is provided, hash markers use HMAC-SHA256 for better security. func EnableHashing(salt []byte) { m.EnableHashing(salt) } + +// DisableHashing disables hash-based redaction. +// Hash markers will be fully redacted instead of being replaced with hashes. +func DisableHashing() { + m.DisableHashing() +} diff --git a/bench_test.go b/bench_test.go index d9af5ec..e71fe55 100644 --- a/bench_test.go +++ b/bench_test.go @@ -24,3 +24,37 @@ func BenchmarkRedact(b *testing.B) { _ = Sprintf("hello %v %v %v", 123, x, Safe("world"), Unsafe("universe")) } } + +// BenchmarkRedactCall_PlainMarkers calls .Redact() on a string with only +// regular ‹...› markers (no hash markers). This is the baseline. +func BenchmarkRedactCall_RegularRedaction(b *testing.B) { + s := Sprintf("user=%s action=%s", "alice", Safe("login")) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = s.Redact() + } +} + +// BenchmarkRedactCall_HashMarkers calls .Redact() on a string with ‹†...› +// hash markers and hashing enabled. This exercises the SHA-256 path. +func BenchmarkRedactCall_HashEnabled(b *testing.B) { + EnableHashing(nil) + defer DisableHashing() + s := Sprintf("user=%s action=%s %s", HashString("alice"), SafeString("login")) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = s.Redact() + } +} + +// BenchmarkRedactCall_SingleHashWithSalt — 1 ‹†alice› marker, HMAC-SHA256. +func BenchmarkRedactCall_HashWithSalt(b *testing.B) { + EnableHashing([]byte("my-secret-salt")) + defer DisableHashing() + s := Sprintf("user=%s action=%s", HashString("alice"), Safe("login")) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = s.Redact() + } +} + diff --git a/go.mod b/go.mod index cadc968..b51f1b0 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ module github.com/cockroachdb/redact -go 1.14 +go 1.19 diff --git a/internal/markers/hash.go b/internal/markers/hash.go index 272f9f6..ea285c2 100644 --- a/internal/markers/hash.go +++ b/internal/markers/hash.go @@ -16,109 +16,136 @@ package markers import ( "crypto/hmac" - "crypto/sha1" + "crypto/sha256" "encoding/hex" + "hash" "sync" + "sync/atomic" ) /* Hash function implementation notes: - We use SHA-1 because it provides a good balance of performance and output size - for the log correlation use case. SHA-1 is fast and produces compact hashes, - making it well-suited for high-throughput logging scenarios. + We use SHA-256 because it provides strong cryptographic properties with + negligible performance overhead on modern hardwares. - When a salt is provided via EnableHashing(), we use HMAC-SHA1 which provides - additional security properties and domain separation. + When a salt is provided via EnableHashing(), we use HMAC-SHA256 which + provides additional security properties and domain separation. - The hash output is truncated to 8 hex characters (32 bits) to keep log output - concise while still providing sufficient collision resistance for typical logging - workloads. + The hash output is truncated to 8 hex characters (32 bits) to keep log + output concise while still providing sufficient collision resistance for + typical logging workloads. + + Hasher instances are pooled via sync.Pool to avoid allocating a new + SHA-256 or HMAC struct on every call. The pool is created once in + EnableHashing() with the correct hasher type (SHA-256 or HMAC-SHA256) + baked in, so hash functions just Get/Reset/Put without checking salt. */ -// defaultHashLength is the number of hex characters to use from the SHA-1 hash. +// defaultHashLength is the number of hex characters to use from the hash. // 8 hex chars = 32 bits = ~4.3 billion unique values. // This provides a good balance between collision resistance and output brevity. -// For typical logging scenarios with fewer unique sensitive values per analysis window, -// this collision risk should be acceptable. If not, we can make this configurable in the future. const defaultHashLength = 8 -var hashConfig = struct { - sync.RWMutex - enabled bool +// compile-time assertion to guarantee defaultHashLength doesn't exceed hex-encoded hash length. +var _ [sha256.Size*2 - defaultHashLength]byte + +// hasherState holds a reusable hash.Hash and a pre-allocated Sum buffer. +// sumBuf must live in the pool because Sum() is an interface method — +// the compiler can't prove it won't store a reference, because of hash being +// in pool, so a local [32]byte would escape to heap on every call. +// hexBuf is a stack-allocated local in the hash functions because +// hex.Encode is a concrete function call that doesn't cause escape. +type hasherState struct { + h hash.Hash + sumBuf [sha256.Size]byte // reusable buffer for h.Sum() output +} + +var hashConfig struct { + enabled atomic.Bool salt []byte -}{ - enabled: false, - salt: nil, + pool *sync.Pool } // EnableHashing enables hash-based redaction with an optional salt. -// When salt is nil, hash markers use plain SHA1. -// When salt is provided, hash markers use HMAC-SHA1 for better security. +// When salt is nil, hash markers use plain SHA-256. +// When salt is provided, hash markers use HMAC-SHA256 for better security. +// +// A sync.Pool of hasherState instances is created here with the correct +// hasher type, so hash functions just Get/Reset/Put without checking salt. func EnableHashing(salt []byte) { - hashConfig.Lock() - defer hashConfig.Unlock() - hashConfig.enabled = true + var pool *sync.Pool + if len(salt) > 0 { + // Copy so the pool closure doesn't capture a mutable slice. + saltCopy := make([]byte, len(salt)) + copy(saltCopy, salt) + pool = &sync.Pool{ + New: func() interface{} { + return &hasherState{h: hmac.New(sha256.New, saltCopy)} + }, + } + } else { + pool = &sync.Pool{ + New: func() interface{} { + return &hasherState{h: sha256.New()} + }, + } + } + + // Write data before flipping the gate. + // The atomic store on enabled acts as a release barrier — any goroutine + // that reads enabled==true via atomic load is guaranteed to see pool and salt. hashConfig.salt = salt + hashConfig.pool = pool + hashConfig.enabled.Store(true) +} + +// DisableHashing disables hash-based redaction. +// The pool and salt are intentionally left intact so that in-flight +// hashers (which captured the pool pointer) can finish safely. +func DisableHashing() { + hashConfig.enabled.Store(false) } // IsHashingEnabled returns true if hash-based redaction is enabled. func IsHashingEnabled() bool { - hashConfig.RLock() - defer hashConfig.RUnlock() - return hashConfig.enabled + return hashConfig.enabled.Load() } -// hashString computes a truncated hash of the input string. -// Uses HMAC-SHA1 if salt is set, otherwise plain SHA1. +// hashString computes a truncated SHA-256 hash of the input string. +// Uses a pooled hasher instance. // Must only be called when hashing is enabled (IsHashingEnabled() == true). func hashString(value string) string { - hashConfig.RLock() - salt := hashConfig.salt - hashConfig.RUnlock() - - var h []byte - if len(salt) > 0 { - mac := hmac.New(sha1.New, salt) - mac.Write([]byte(value)) - h = mac.Sum(nil) - } else { - hasher := sha1.New() - hasher.Write([]byte(value)) - h = hasher.Sum(nil) - } - - fullHash := hex.EncodeToString(h) - if len(fullHash) > defaultHashLength { - return fullHash[:defaultHashLength] - } - return fullHash + p := hashConfig.pool + state := p.Get().(*hasherState) + + var hexBuf [sha256.Size * 2]byte + state.h.Reset() + state.h.Write([]byte(value)) + sum := state.h.Sum(state.sumBuf[:0]) + hex.Encode(hexBuf[:], sum) + + result := string(hexBuf[:defaultHashLength]) + p.Put(state) + return result } -// hashBytes computes a truncated hash of the input byte slice. -// Uses HMAC-SHA1 if salt is set, otherwise plain SHA1. +// hashBytes computes a truncated SHA-256 hash of the input byte slice. +// Uses a pooled hasher instance — no per-call hasher allocation. // Must only be called when hashing is enabled (IsHashingEnabled() == true). func hashBytes(value []byte) []byte { - hashConfig.RLock() - salt := hashConfig.salt - hashConfig.RUnlock() + p := hashConfig.pool + state := p.Get().(*hasherState) - var h []byte - if len(salt) > 0 { - mac := hmac.New(sha1.New, salt) - mac.Write(value) - h = mac.Sum(nil) - } else { - hasher := sha1.New() - hasher.Write(value) - h = hasher.Sum(nil) - } + var hexBuf [sha256.Size * 2]byte - fullHash := make([]byte, sha1.Size*2) - _ = hex.Encode(fullHash, h) + state.h.Reset() + state.h.Write(value) + sum := state.h.Sum(state.sumBuf[:0]) + hex.Encode(hexBuf[:], sum) - if len(fullHash) > defaultHashLength { - return fullHash[:defaultHashLength] - } - return fullHash + result := make([]byte, defaultHashLength) + copy(result, hexBuf[:defaultHashLength]) + p.Put(state) + return result } diff --git a/internal/markers/hash_test.go b/internal/markers/hash_test.go index f192081..b5ac121 100644 --- a/internal/markers/hash_test.go +++ b/internal/markers/hash_test.go @@ -20,17 +20,7 @@ import ( ) func TestHash(t *testing.T) { - // Save original state and restore after test - hashConfig.RLock() - originalEnabled := hashConfig.enabled - originalSalt := hashConfig.salt - hashConfig.RUnlock() - defer func() { - hashConfig.Lock() - hashConfig.enabled = originalEnabled - hashConfig.salt = originalSalt - hashConfig.Unlock() - }() + defer DisableHashing() testCases := []struct { name string @@ -42,37 +32,37 @@ func TestHash(t *testing.T) { name: "simple string", input: "test", salt: nil, - expected: "a94a8fe5", + expected: "9f86d081", }, { name: "empty string", input: "", salt: nil, - expected: "da39a3ee", + expected: "e3b0c442", }, { name: "input exceeding hash length", input: strings.Repeat("long-input-", 100), salt: nil, - expected: "c375461f", + expected: "130ca5ec", }, { name: "numeric string", input: "12345", salt: nil, - expected: "8cb2237d", + expected: "5994471a", }, { name: "simple string with salt", input: "test", salt: []byte("my-salt"), - expected: "c48ce5fd", + expected: "ac2fdbab", }, { name: "empty string with salt", input: "", salt: []byte("my-salt"), - expected: "7b1829af", + expected: "e5d13fde", }, } @@ -109,17 +99,7 @@ func TestHash(t *testing.T) { } func TestHashDeterminism(t *testing.T) { - // Save original state and restore after test - hashConfig.RLock() - originalEnabled := hashConfig.enabled - originalSalt := hashConfig.salt - hashConfig.RUnlock() - defer func() { - hashConfig.Lock() - hashConfig.enabled = originalEnabled - hashConfig.salt = originalSalt - hashConfig.Unlock() - }() + defer DisableHashing() EnableHashing(nil) From 576761f24d9769fb833fe12e3f7b52f7511fa451 Mon Sep 17 00:00:00 2001 From: Vishesh Bardia Date: Fri, 20 Feb 2026 21:09:39 +0530 Subject: [PATCH 3/4] updated pool type and optimised print and added e2e tests --- .github/workflows/ci.yaml | 10 +- bench_test.go | 2 +- hash_redact_test.go | 330 ++++++++++++++++++++++++++++++++++++ internal/markers/hash.go | 15 +- internal/markers/markers.go | 39 ++--- internal/rfmt/helpers.go | 31 ++++ internal/rfmt/print.go | 31 +--- 7 files changed, 394 insertions(+), 64 deletions(-) create mode 100644 hash_redact_test.go diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index fe38175..1edd815 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,11 +13,11 @@ jobs: strategy: matrix: go: - - "1.13" - - "1.14" - - "1.15" - - "1.16" - - "1.17" + - "1.19" + - "1.20" + - "1.21" + - "1.22" + - "1.23" steps: - uses: actions/checkout@v2 diff --git a/bench_test.go b/bench_test.go index e71fe55..ebf8a0d 100644 --- a/bench_test.go +++ b/bench_test.go @@ -40,7 +40,7 @@ func BenchmarkRedactCall_RegularRedaction(b *testing.B) { func BenchmarkRedactCall_HashEnabled(b *testing.B) { EnableHashing(nil) defer DisableHashing() - s := Sprintf("user=%s action=%s %s", HashString("alice"), SafeString("login")) + s := Sprintf("user=%s action=%s", HashString("alice"), SafeString("login")) b.ResetTimer() for i := 0; i < b.N; i++ { _ = s.Redact() diff --git a/hash_redact_test.go b/hash_redact_test.go new file mode 100644 index 0000000..f7e3293 --- /dev/null +++ b/hash_redact_test.go @@ -0,0 +1,330 @@ +// Copyright 2026 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package redact + +import ( + "reflect" + "testing" +) + +// TestHashRedact_EndToEnd tests the full Sprintf → Redact pipeline for hash types. +func TestHashRedact_EndToEnd(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("user=%s", HashString("alice")) + if expected := RedactableString("user=‹†alice›"); s != expected { + t.Errorf("expected %q, got %q", expected, s) + } + + redacted := s.Redact() + if expected := RedactableString("user=‹2bd806c9›"); redacted != expected { + t.Errorf("expected %q, got %q", expected, redacted) + } +} + +// TestHashRedact_DisabledHashing verifies hash markers are fully redacted when hashing is disabled. +func TestHashRedact_DisabledHashing(t *testing.T) { + s := Sprintf("user=%s", HashString("alice")) + + redacted := s.Redact() + expected := RedactableString("user=‹×›") + if redacted != expected { + t.Errorf("expected %q, got %q", expected, redacted) + } +} + +// TestHashRedact_MixedMarkers verifies that regular and hash markers in the same string +// are handled correctly: regular markers become ‹×›, hash markers become ‹hash›. +func TestHashRedact_MixedMarkers(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("user=%s action=%s", HashString("alice"), "login") + redacted := s.Redact() + + expected := RedactableString("user=‹2bd806c9› action=‹×›") + if redacted != expected { + t.Errorf("expected %q, got %q", expected, redacted) + } +} + +// TestHashRedact_FormatVerbs verifies that format verbs are respected for hash types. +func TestHashRedact_FormatVerbs(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + testData := []struct { + name string + format RedactableString + expectedPre RedactableString + expectedPost RedactableString + }{ + {"padded int", Sprintf("%05d", HashInt(42)), "‹†00042›", "‹be9c43d5›"}, + {"hex int", Sprintf("%x", HashInt(255)), "‹†ff›", "‹05a9bf22›"}, + {"quoted string", Sprintf("%q", HashString("hello")), "‹†\"hello\"›", "‹5aa762ae›"}, + {"default int", Sprintf("%d", HashInt(123)), "‹†123›", "‹a665a459›"}, + {"float", Sprintf("%f", HashFloat(3.14)), "‹†3.140000›", "‹f0f4a726›"}, + {"float precision", Sprintf("%.2f", HashFloat(3.14159)), "‹†3.14›", "‹2efff126›"}, + } + + for _, tc := range testData { + t.Run(tc.name, func(t *testing.T) { + if tc.format != tc.expectedPre { + t.Errorf("pre-redaction: expected %q, got %q", tc.expectedPre, tc.format) + } + if redacted := tc.format.Redact(); redacted != tc.expectedPost { + t.Errorf("post-redaction: expected %q, got %q", tc.expectedPost, redacted) + } + }) + } +} + +// TestHashRedact_SafeWrapping verifies that Safe() takes priority over HashValue. +func TestHashRedact_SafeWrapping(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("user=%s", Safe(HashString("alice"))) + expected := RedactableString("user=alice") + if s != expected { + t.Errorf("expected %q, got %q", expected, s) + } +} + +// TestHashRedact_UnsafeWrapping verifies that Unsafe() takes priority over HashValue. +func TestHashRedact_UnsafeWrapping(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("user=%s", Unsafe(HashString("alice"))) + if expected := RedactableString("user=‹alice›"); s != expected { + t.Errorf("pre-redaction: expected %q, got %q", expected, s) + } + + if redacted := s.Redact(); redacted != RedactableString("user=‹×›") { + t.Errorf("post-redaction: expected %q, got %q", RedactableString("user=‹×›"), redacted) + } +} + +// TestHashRedact_AllHashTypes verifies all concrete Hash* types work end-to-end. +func TestHashRedact_AllHashTypes(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + testData := []struct { + name string + format RedactableString + expectedPre RedactableString + expectedPost RedactableString + }{ + {"HashString", Sprintf("%s", HashString("test")), "‹†test›", "‹9f86d081›"}, + {"HashInt", Sprintf("%d", HashInt(42)), "‹†42›", "‹73475cb4›"}, + {"HashUint", Sprintf("%d", HashUint(42)), "‹†42›", "‹73475cb4›"}, + {"HashFloat", Sprintf("%f", HashFloat(3.14)), "‹†3.140000›", "‹f0f4a726›"}, + {"HashRune", Sprintf("%c", HashRune('A')), "‹†A›", "‹559aead0›"}, + {"HashByte", Sprintf("%d", HashByte(0xFF)), "‹†255›", "‹9556b824›"}, + } + + for _, tc := range testData { + t.Run(tc.name, func(t *testing.T) { + if tc.format != tc.expectedPre { + t.Errorf("pre-redaction: expected %q, got %q", tc.expectedPre, tc.format) + } + if redacted := tc.format.Redact(); redacted != tc.expectedPost { + t.Errorf("post-redaction: expected %q, got %q", tc.expectedPost, redacted) + } + }) + } +} + +// TestHashRedact_RedactableBytes verifies the RedactableBytes.Redact() hash path. +func TestHashRedact_RedactableBytes(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("user=%s", HashString("alice")) + redacted := s.ToBytes().Redact() + + expected := RedactableBytes("user=‹2bd806c9›") + if string(redacted) != string(expected) { + t.Errorf("expected %q, got %q", expected, redacted) + } + + // String and bytes redaction should produce the same result. + if string(redacted) != string(s.Redact()) { + t.Errorf("string and bytes redaction differ: %q vs %q", s.Redact(), redacted) + } +} + +// TestHashRedact_StripMarkers verifies StripMarkers on hash-marked strings. +func TestHashRedact_StripMarkers(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("user=%s", HashString("alice")) + if stripped := s.StripMarkers(); stripped != "user=alice" { + t.Errorf("expected %q, got %q", "user=alice", stripped) + } +} + +// TestHashRedact_EnableAfterFormat verifies that hash markers written before +// EnableHashing are correctly hashed when Redact() is called after enabling. +func TestHashRedact_EnableAfterFormat(t *testing.T) { + defer DisableHashing() + + s := Sprintf("user=%s", HashString("alice")) + if expected := RedactableString("user=‹†alice›"); s != expected { + t.Errorf("pre-redaction: expected %q, got %q", expected, s) + } + + EnableHashing(nil) + if redacted := s.Redact(); redacted != RedactableString("user=‹2bd806c9›") { + t.Errorf("post-redaction: expected %q, got %q", RedactableString("user=‹2bd806c9›"), redacted) + } +} + +// TestHashRedact_MultipleHashMarkers verifies multiple hash markers in one string. +func TestHashRedact_MultipleHashMarkers(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("a=%s b=%s c=%s", HashString("x"), HashString("y"), HashString("z")) + if expected := RedactableString("a=‹†x› b=‹†y› c=‹†z›"); s != expected { + t.Errorf("pre-redaction: expected %q, got %q", expected, s) + } + + if redacted := s.Redact(); redacted != RedactableString("a=‹2d711642› b=‹a1fce436› c=‹594e519a›") { + t.Errorf("post-redaction: expected %q, got %q", RedactableString("a=‹2d711642› b=‹a1fce436› c=‹594e519a›"), redacted) + } +} + +// TestHashRedact_WithSalt verifies that salted hashing produces different output than unsalted. +func TestHashRedact_WithSalt(t *testing.T) { + defer DisableHashing() + + EnableHashing(nil) + unsalted := Sprintf("%s", HashString("alice")).Redact() + if expected := RedactableString("‹2bd806c9›"); unsalted != expected { + t.Errorf("unsalted: expected %q, got %q", expected, unsalted) + } + + EnableHashing([]byte("my-secret-salt")) + salted := Sprintf("%s", HashString("alice")).Redact() + if expected := RedactableString("‹cffebd45›"); salted != expected { + t.Errorf("salted: expected %q, got %q", expected, salted) + } +} + +// TestHashRedact_StructWithHashField exercises printValue +// where a struct contains a HashValue field alongside a regular unsafe field. +func TestHashRedact_StructWithHashField(t *testing.T) { + type User struct { + Name HashString + Age int + } + + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("%v", User{Name: "alice", Age: 30}) + if expected := RedactableString("{‹†alice› ‹30›}"); s != expected { + t.Errorf("pre-redaction: expected %q, got %q", expected, s) + } + + if redacted := s.Redact(); redacted != RedactableString("{‹2bd806c9› ‹×›}") { + t.Errorf("post-redaction: expected %q, got %q", RedactableString("{‹2bd806c9› ‹×›}"), redacted) + } +} + +// TestHashRedact_ReflectValue exercises the reflect.Value case in printArg, +// where a reflect.Value wrapping a HashValue is passed as an argument. +func TestHashRedact_ReflectValue(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + v := reflect.ValueOf(HashString("alice")) + s := Sprintf("%v", v) + if expected := RedactableString("‹†alice›"); s != expected { + t.Errorf("pre-redaction: expected %q, got %q", expected, s) + } + + if redacted := s.Redact(); redacted != RedactableString("‹2bd806c9›") { + t.Errorf("post-redaction: expected %q, got %q", RedactableString("‹2bd806c9›"), redacted) + } +} + +// TestHashRedact_Sprint verifies Sprint with HashValue. +func TestHashRedact_Sprint(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprint(HashString("alice")) + if expected := RedactableString("‹†alice›"); s != expected { + t.Errorf("pre-redaction: expected %q, got %q", expected, s) + } + + if redacted := s.Redact(); redacted != RedactableString("‹2bd806c9›") { + t.Errorf("post-redaction: expected %q, got %q", RedactableString("‹2bd806c9›"), redacted) + } +} + +// TestHashRedact_HashBytes verifies HashBytes ([]byte type) end-to-end.`` +func TestHashRedact_HashBytes(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + s := Sprintf("%s", HashBytes([]byte("secret"))) + if expected := RedactableString("‹†secret›"); s != expected { + t.Errorf("pre-redaction: expected %q, got %q", expected, s) + } + + if redacted := s.Redact(); redacted != RedactableString("‹2bb80d53›") { + t.Errorf("post-redaction: expected %q, got %q", RedactableString("‹2bb80d53›"), redacted) + } +} + +// TestHashRedact_ToggleTransitions verifies Enable -> Disable -> Enable transitions. +func TestHashRedact_ToggleTransitions(t *testing.T) { + defer DisableHashing() + + s := Sprintf("user=%s", HashString("alice")) + + EnableHashing(nil) + if redacted := s.Redact(); redacted != RedactableString("user=‹2bd806c9›") { + t.Errorf("unsalted enabled: expected %q, got %q", RedactableString("user=‹2bd806c9›"), redacted) + } + + DisableHashing() + if redacted := s.Redact(); redacted != RedactableString("user=‹×›") { + t.Errorf("disabled: expected %q, got %q", RedactableString("user=‹×›"), redacted) + } + + EnableHashing([]byte("my-secret-salt")) + if redacted := s.Redact(); redacted != RedactableString("user=‹cffebd45›") { + t.Errorf("salted re-enabled: expected %q, got %q", RedactableString("user=‹cffebd45›"), redacted) + } +} + +// TestHashRedact_RedactableBytesDisabled verifies bytes-path behavior when hashing is disabled. +func TestHashRedact_RedactableBytesDisabled(t *testing.T) { + defer DisableHashing() + DisableHashing() + + s := Sprintf("user=%s", HashString("alice")) + if redacted := s.ToBytes().Redact(); string(redacted) != string(RedactableBytes("user=‹×›")) { + t.Errorf("expected %q, got %q", RedactableBytes("user=‹×›"), redacted) + } +} diff --git a/internal/markers/hash.go b/internal/markers/hash.go index ea285c2..fbbea96 100644 --- a/internal/markers/hash.go +++ b/internal/markers/hash.go @@ -1,4 +1,4 @@ -// Copyright 2025 The Cockroach Authors. +// Copyright 2026 The Cockroach Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -63,8 +63,7 @@ type hasherState struct { var hashConfig struct { enabled atomic.Bool - salt []byte - pool *sync.Pool + pool atomic.Value } // EnableHashing enables hash-based redaction with an optional salt. @@ -92,11 +91,7 @@ func EnableHashing(salt []byte) { } } - // Write data before flipping the gate. - // The atomic store on enabled acts as a release barrier — any goroutine - // that reads enabled==true via atomic load is guaranteed to see pool and salt. - hashConfig.salt = salt - hashConfig.pool = pool + hashConfig.pool.Store(pool) hashConfig.enabled.Store(true) } @@ -116,7 +111,7 @@ func IsHashingEnabled() bool { // Uses a pooled hasher instance. // Must only be called when hashing is enabled (IsHashingEnabled() == true). func hashString(value string) string { - p := hashConfig.pool + p := hashConfig.pool.Load().(*sync.Pool) state := p.Get().(*hasherState) var hexBuf [sha256.Size * 2]byte @@ -134,7 +129,7 @@ func hashString(value string) string { // Uses a pooled hasher instance — no per-call hasher allocation. // Must only be called when hashing is enabled (IsHashingEnabled() == true). func hashBytes(value []byte) []byte { - p := hashConfig.pool + p := hashConfig.pool.Load().(*sync.Pool) state := p.Get().(*hasherState) var hexBuf [sha256.Size * 2]byte diff --git a/internal/markers/markers.go b/internal/markers/markers.go index 6053b9c..3d52dce 100644 --- a/internal/markers/markers.go +++ b/internal/markers/markers.go @@ -42,21 +42,17 @@ func (s RedactableString) StripMarkers() string { // with hashed values (‹hash›) if hashing is enabled, otherwise // they are redacted like regular markers. The result string is still safe. func (s RedactableString) Redact() RedactableString { + if !IsHashingEnabled() { + return RedactableString(ReStripSensitive.ReplaceAllString(string(s), RedactedS)) + } result := ReStripSensitive.ReplaceAllStringFunc(string(s), func(match string) string { - // Check if this is a hash marker by looking for † after ‹ if len(match) > len(StartS)+len(EndS) && match[len(StartS):len(StartS)+len(HashPrefixS)] == HashPrefixS { - // Only hash if hashing is enabled, otherwise redact - if IsHashingEnabled() { - // Extract value without ‹† and › and return its hash - value := match[len(StartS)+len(HashPrefixS) : len(match)-len(EndS)] - return StartS + hashString(value) + EndS - } + value := match[len(StartS)+len(HashPrefixS) : len(match)-len(EndS)] + return StartS + hashString(value) + EndS } - // Regular marker or disabled hashing - replace with × return RedactedS }) - return RedactableString(result) } @@ -91,27 +87,22 @@ func (s RedactableBytes) StripMarkers() []byte { // with hashed values (‹hash›) if hashing is enabled, otherwise // they are redacted like regular markers. func (s RedactableBytes) Redact() RedactableBytes { + if !IsHashingEnabled() { + return RedactableBytes(ReStripSensitive.ReplaceAll([]byte(s), RedactedBytes)) + } result := ReStripSensitive.ReplaceAllFunc([]byte(s), func(match []byte) []byte { - // Check if this is a hash marker by looking for † after ‹ if len(match) > len(StartBytes)+len(EndBytes) && bytes.Equal(match[len(StartBytes):len(StartBytes)+len(HashPrefixS)], []byte(HashPrefixS)) { - // Only hash if hashing is enabled, otherwise redact - if IsHashingEnabled() { - // Extract value without ‹† and › and return its hash - value := match[len(StartBytes)+len(HashPrefixS) : len(match)-len(EndBytes)] - hashed := hashBytes(value) - res := make([]byte, len(StartBytes)+len(hashed)+len(EndBytes)) - n := copy(res, StartBytes) - n += copy(res[n:], hashed) - copy(res[n:], EndBytes) - return res - } + value := match[len(StartBytes)+len(HashPrefixS) : len(match)-len(EndBytes)] + hashed := hashBytes(value) + res := make([]byte, len(StartBytes)+len(hashed)+len(EndBytes)) + n := copy(res, StartBytes) + n += copy(res[n:], hashed) + copy(res[n:], EndBytes) + return res } - - // Regular marker or disabled hashing - replace with × return RedactedBytes }) - return RedactableBytes(result) } diff --git a/internal/rfmt/helpers.go b/internal/rfmt/helpers.go index 7868d1b..8aec053 100644 --- a/internal/rfmt/helpers.go +++ b/internal/rfmt/helpers.go @@ -81,6 +81,37 @@ func (r restorer) restore() { r.p.override = r.prevOverride } +type hashRestorer struct { + p *pp + prevMode b.OutputMode + prevOverride overrideMode + active bool +} + +func (r hashRestorer) restore() { + if r.active { + r.p.buf.SetMode(b.PreRedactable) + r.p.buf.WriteString(m.EndS) + } + r.p.buf.SetMode(r.prevMode) + r.p.override = r.prevOverride +} + +func (p *pp) startHashRedactable() hashRestorer { + prevMode := p.buf.GetMode() + prevOverride := p.override + active := false + if p.override == noOverride { + p.buf.SetMode(b.PreRedactable) + p.buf.WriteString(m.StartS) + p.buf.WriteString(m.HashPrefixS) + p.buf.SetMode(b.SafeEscaped) + p.override = overrideSafe + active = true + } + return hashRestorer{p, prevMode, prevOverride, active} +} + func (p *pp) handleSpecialValues( value reflect.Value, t reflect.Type, verb rune, depth int, ) (handled bool) { diff --git a/internal/rfmt/print.go b/internal/rfmt/print.go index 4c50eaf..8c747b5 100644 --- a/internal/rfmt/print.go +++ b/internal/rfmt/print.go @@ -67,7 +67,7 @@ type Formatter interface { } // Stringer is implemented by any value that has a String method, -// which defines the “native” format for that value. +// which defines the `native` format for that value. // The String method is used to print values passed as an operand // to any format that accepts a string or to an unformatted printer // such as Print. @@ -695,23 +695,6 @@ func (p *pp) handleMethods(verb rune) (handled bool) { return false } -// tryWriteHashMarker checks if arg implements HashValue and hashing is enabled. -// If so, writes hash markers around the value and returns true. -// Otherwise, returns false. -func (p *pp) tryWriteHashMarker(arg interface{}) bool { - if _, ok := arg.(i.HashValue); ok && m.IsHashingEnabled() { - // Mark the value for hashing during Redact(), but preserve the original value - // Use PreRedactable mode to prevent marker escaping - defer p.startPreRedactable().restore() - valueStr := origFmt.Sprint(arg) - p.buf.writeString(m.StartS) - p.buf.writeString(m.HashPrefixS) - p.buf.writeString(valueStr) - p.buf.writeString(m.EndS) - return true - } - return false -} func (p *pp) printArg(arg interface{}, verb rune) { t := reflect.TypeOf(arg) @@ -727,8 +710,8 @@ func (p *pp) printArg(arg interface{}, verb rune) { if _, isSafe := arg.(i.SafeValue); isSafe { defer p.startSafeOverride().restore() - } else if p.tryWriteHashMarker(arg) { - return + } else if _, ok := arg.(i.HashValue); ok { + defer p.startHashRedactable().restore() } p.arg = arg @@ -810,8 +793,8 @@ func (p *pp) printArg(arg interface{}, verb rune) { p.arg = f.Interface() if _, isSafe := p.arg.(i.SafeValue); isSafe { defer p.startSafeOverride().restore() - } else if p.tryWriteHashMarker(p.arg) { - return + } else if _, ok := p.arg.(i.HashValue); ok { + defer p.startHashRedactable().restore() } if p.handleMethods(verb) { return @@ -855,8 +838,8 @@ func (p *pp) printValue(value reflect.Value, verb rune, depth int) { p.arg = value.Interface() if _, isSafe := p.arg.(i.SafeValue); isSafe { defer p.startSafeOverride().restore() - } else if p.tryWriteHashMarker(p.arg) { - return + } else if _, ok := p.arg.(i.HashValue); ok { + defer p.startHashRedactable().restore() } if p.handleMethods(verb) { return From 95c49a1e26667a54f8ba9263c18e7adc1265ea66 Mon Sep 17 00:00:00 2001 From: Vishesh Bardia Date: Fri, 27 Feb 2026 20:55:24 +0530 Subject: [PATCH 4/4] Handled adjacent zones, InternalEscapeBytes for hash marker and added concurrency test --- hash_redact_test.go | 111 +++++++++++++++++++++++++++------ internal/buffer/buffer.go | 26 +++++--- internal/escape/escape.go | 10 +++ internal/escape/escape_test.go | 3 + internal/markers/constants.go | 1 + internal/markers/hash_test.go | 2 +- internal/markers/markers.go | 2 +- 7 files changed, 127 insertions(+), 28 deletions(-) diff --git a/hash_redact_test.go b/hash_redact_test.go index f7e3293..c361b50 100644 --- a/hash_redact_test.go +++ b/hash_redact_test.go @@ -16,10 +16,11 @@ package redact import ( "reflect" + "strings" + "sync" "testing" ) -// TestHashRedact_EndToEnd tests the full Sprintf → Redact pipeline for hash types. func TestHashRedact_EndToEnd(t *testing.T) { EnableHashing(nil) defer DisableHashing() @@ -35,7 +36,6 @@ func TestHashRedact_EndToEnd(t *testing.T) { } } -// TestHashRedact_DisabledHashing verifies hash markers are fully redacted when hashing is disabled. func TestHashRedact_DisabledHashing(t *testing.T) { s := Sprintf("user=%s", HashString("alice")) @@ -46,8 +46,6 @@ func TestHashRedact_DisabledHashing(t *testing.T) { } } -// TestHashRedact_MixedMarkers verifies that regular and hash markers in the same string -// are handled correctly: regular markers become ‹×›, hash markers become ‹hash›. func TestHashRedact_MixedMarkers(t *testing.T) { EnableHashing(nil) defer DisableHashing() @@ -61,7 +59,6 @@ func TestHashRedact_MixedMarkers(t *testing.T) { } } -// TestHashRedact_FormatVerbs verifies that format verbs are respected for hash types. func TestHashRedact_FormatVerbs(t *testing.T) { EnableHashing(nil) defer DisableHashing() @@ -92,7 +89,6 @@ func TestHashRedact_FormatVerbs(t *testing.T) { } } -// TestHashRedact_SafeWrapping verifies that Safe() takes priority over HashValue. func TestHashRedact_SafeWrapping(t *testing.T) { EnableHashing(nil) defer DisableHashing() @@ -104,7 +100,6 @@ func TestHashRedact_SafeWrapping(t *testing.T) { } } -// TestHashRedact_UnsafeWrapping verifies that Unsafe() takes priority over HashValue. func TestHashRedact_UnsafeWrapping(t *testing.T) { EnableHashing(nil) defer DisableHashing() @@ -119,7 +114,6 @@ func TestHashRedact_UnsafeWrapping(t *testing.T) { } } -// TestHashRedact_AllHashTypes verifies all concrete Hash* types work end-to-end. func TestHashRedact_AllHashTypes(t *testing.T) { EnableHashing(nil) defer DisableHashing() @@ -150,7 +144,6 @@ func TestHashRedact_AllHashTypes(t *testing.T) { } } -// TestHashRedact_RedactableBytes verifies the RedactableBytes.Redact() hash path. func TestHashRedact_RedactableBytes(t *testing.T) { EnableHashing(nil) defer DisableHashing() @@ -163,13 +156,11 @@ func TestHashRedact_RedactableBytes(t *testing.T) { t.Errorf("expected %q, got %q", expected, redacted) } - // String and bytes redaction should produce the same result. if string(redacted) != string(s.Redact()) { t.Errorf("string and bytes redaction differ: %q vs %q", s.Redact(), redacted) } } -// TestHashRedact_StripMarkers verifies StripMarkers on hash-marked strings. func TestHashRedact_StripMarkers(t *testing.T) { EnableHashing(nil) defer DisableHashing() @@ -196,7 +187,6 @@ func TestHashRedact_EnableAfterFormat(t *testing.T) { } } -// TestHashRedact_MultipleHashMarkers verifies multiple hash markers in one string. func TestHashRedact_MultipleHashMarkers(t *testing.T) { EnableHashing(nil) defer DisableHashing() @@ -211,7 +201,6 @@ func TestHashRedact_MultipleHashMarkers(t *testing.T) { } } -// TestHashRedact_WithSalt verifies that salted hashing produces different output than unsalted. func TestHashRedact_WithSalt(t *testing.T) { defer DisableHashing() @@ -266,7 +255,6 @@ func TestHashRedact_ReflectValue(t *testing.T) { } } -// TestHashRedact_Sprint verifies Sprint with HashValue. func TestHashRedact_Sprint(t *testing.T) { EnableHashing(nil) defer DisableHashing() @@ -281,7 +269,6 @@ func TestHashRedact_Sprint(t *testing.T) { } } -// TestHashRedact_HashBytes verifies HashBytes ([]byte type) end-to-end.`` func TestHashRedact_HashBytes(t *testing.T) { EnableHashing(nil) defer DisableHashing() @@ -296,7 +283,6 @@ func TestHashRedact_HashBytes(t *testing.T) { } } -// TestHashRedact_ToggleTransitions verifies Enable -> Disable -> Enable transitions. func TestHashRedact_ToggleTransitions(t *testing.T) { defer DisableHashing() @@ -318,9 +304,7 @@ func TestHashRedact_ToggleTransitions(t *testing.T) { } } -// TestHashRedact_RedactableBytesDisabled verifies bytes-path behavior when hashing is disabled. func TestHashRedact_RedactableBytesDisabled(t *testing.T) { - defer DisableHashing() DisableHashing() s := Sprintf("user=%s", HashString("alice")) @@ -328,3 +312,94 @@ func TestHashRedact_RedactableBytesDisabled(t *testing.T) { t.Errorf("expected %q, got %q", RedactableBytes("user=‹×›"), redacted) } } + +// TestHashRedact_ConcurrentToggle exercises concurrent Enable/Disable/Redact +// calls to verify there are no races or panics under contention. +func TestHashRedact_ConcurrentToggle(t *testing.T) { + defer DisableHashing() + + s := Sprintf("user=%s action=%s", HashString("alice"), "login") + + var wg sync.WaitGroup + const goroutines = 8 + const iterations = 500 + + for g := 0; g < goroutines; g++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + for i := 0; i < iterations; i++ { + switch id % 3 { + case 0: + EnableHashing(nil) + case 1: + EnableHashing([]byte("salt")) + case 2: + DisableHashing() + } + redacted := s.Redact() + // The raw value must never leak through redaction. + if strings.Contains(string(redacted), "alice") { + t.Errorf("raw value leaked: %q", redacted) + } + } + }(g) + } + wg.Wait() +} + +// TestHashRedact_AdjacentZones verifies that the buffer's zone-merging +// optimization does not combine a hash zone with an adjacent unsafe zone. +func TestHashRedact_AdjacentZones(t *testing.T) { + EnableHashing(nil) + defer DisableHashing() + + tests := []struct { + name string + input RedactableString + expectedPre string + expectedPost string + }{ + { + "hash then unsafe (no separator)", + Sprintf("%s%s", HashString("alice"), "bob"), + "‹†alice›‹bob›", + "‹2bd806c9›‹×›", + }, + { + "unsafe then hash (no separator)", + Sprintf("%s%s", "bob", HashString("alice")), + "‹bob›‹†alice›", + "‹×›‹2bd806c9›", + }, + { + "hash then hash (no separator)", + Sprintf("%s%s", HashString("alice"), HashString("bob")), + "‹†alice›‹†bob›", + "‹2bd806c9›‹81b637d8›", + }, + { + "unsafe then unsafe (merge is ok)", + Sprintf("%s%s", "alice", "bob"), + "‹alicebob›", + "‹×›", + }, + { + "hash with space separator", + Sprintf("%s%s", "alice", HashString("bob")), + "‹alice›‹†bob›", + "‹×›‹81b637d8›", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if string(tc.input) != tc.expectedPre { + t.Errorf("pre-redaction:\n expected %q\n got %q", tc.expectedPre, tc.input) + } + if redacted := tc.input.Redact(); string(redacted) != tc.expectedPost { + t.Errorf("post-redaction:\n expected %q\n got %q", tc.expectedPost, redacted) + } + }) + } +} diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index 387e254..e205063 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -218,18 +218,28 @@ func (b *Buffer) startWrite() { } } -// endRedactable adds the closing redaction marker. +// startRedactable adds the opening redaction marker. func (b *Buffer) startRedactable() { if bytes.HasSuffix(b.buf, m.EndBytes) { - // Special case: remove a trailing closing marker and call it a day. - b.buf = b.buf[:len(b.buf)-m.EndLen] - } else { - p, ok := b.tryGrowByReslice(len(m.StartS)) - if !ok { - p = b.grow(len(m.StartS)) + // Optimization: merge adjacent unsafe zones by removing the + // trailing › instead of writing a new ‹. However, we must NOT + // merge into a hash zone (‹†...›), because that would cause the + // next value to be hashed together with the hash-marked value. + preceding := b.buf[:len(b.buf)-m.EndLen] + startIdx := bytes.LastIndex(preceding, m.StartBytes) + isHashZone := startIdx >= 0 && + bytes.HasPrefix(preceding[startIdx+m.StartLen:], m.HashPrefixBytes) + if !isHashZone { + b.buf = preceding + b.markerOpen = true + return } - copy(b.buf[p:], m.StartS) } + p, ok := b.tryGrowByReslice(len(m.StartS)) + if !ok { + p = b.grow(len(m.StartS)) + } + copy(b.buf[p:], m.StartS) b.markerOpen = true } diff --git a/internal/escape/escape.go b/internal/escape/escape.go index 1fc6706..9249073 100644 --- a/internal/escape/escape.go +++ b/internal/escape/escape.go @@ -37,6 +37,7 @@ func InternalEscapeBytes(b []byte, startLoc int, breakNewLines, strip bool) (res // accelerates the loops below. start, ls := m.StartBytes, len(m.StartS) end, le := m.EndBytes, len(m.EndS) + hashPrefix, lh := m.HashPrefixBytes, len(m.HashPrefixS) escape := m.EscapeMarkBytes // Trim final newlines/spaces, for convenience. @@ -116,6 +117,15 @@ func InternalEscapeBytes(b []byte, startLoc int, breakNewLines, strip bool) (res // Advance the counters by the length (in bytes) of the delimiter. k = i + le i += le - 1 /* -1 because we have i++ at the end of every iteration */ + } else if i+lh <= len(b) && bytes.Equal(b[i:i+lh], hashPrefix) { + if !copied { + res = make([]byte, 0, len(b)+len(escape)) + copied = true + } + res = append(res, b[k:i]...) + res = append(res, escape...) + k = i + lh + i += lh - 1 } } // If the string terminates with an invalid utf-8 sequence, we diff --git a/internal/escape/escape_test.go b/internal/escape/escape_test.go index 7314cf6..219debe 100644 --- a/internal/escape/escape_test.go +++ b/internal/escape/escape_test.go @@ -37,6 +37,9 @@ func TestInternalEscape(t *testing.T) { {[]byte("abc\n‹d\nef›\n \n\n "), len([]byte("abc")), true, true, "abc›\n‹?d›\n‹ef?"}, {[]byte("‹abc› ‹def›"), len([]byte("‹abc› ")), true, true, "‹abc› ?def?"}, {[]byte("abc‹\ndef"), len([]byte("abc‹")), true, true, "abc\n‹def"}, + {[]byte("†abc"), 0, false, false, "?abc"}, + {[]byte("‹†abc›"), 3, false, false, "‹?abc?"}, + {[]byte("hello†world"), 0, false, false, "hello?world"}, } for _, tc := range testCases { diff --git a/internal/markers/constants.go b/internal/markers/constants.go index e734fea..1b8b854 100644 --- a/internal/markers/constants.go +++ b/internal/markers/constants.go @@ -37,6 +37,7 @@ var ( EndBytes = []byte(EndS) EscapeMarkBytes = []byte(EscapeMarkS) RedactedBytes = []byte(RedactedS) + HashPrefixBytes = []byte(HashPrefixS) ReStripSensitive = regexp.MustCompile(StartS + HashPrefixS + "?" + "[^" + StartS + EndS + "]*" + EndS) ReStripMarkers = regexp.MustCompile("[" + StartS + EndS + HashPrefixS + "]") ) diff --git a/internal/markers/hash_test.go b/internal/markers/hash_test.go index b5ac121..386836a 100644 --- a/internal/markers/hash_test.go +++ b/internal/markers/hash_test.go @@ -1,4 +1,4 @@ -// Copyright 2025 The Cockroach Authors. +// Copyright 2026 The Cockroach Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/internal/markers/markers.go b/internal/markers/markers.go index 3d52dce..a81efb2 100644 --- a/internal/markers/markers.go +++ b/internal/markers/markers.go @@ -92,7 +92,7 @@ func (s RedactableBytes) Redact() RedactableBytes { } result := ReStripSensitive.ReplaceAllFunc([]byte(s), func(match []byte) []byte { if len(match) > len(StartBytes)+len(EndBytes) && - bytes.Equal(match[len(StartBytes):len(StartBytes)+len(HashPrefixS)], []byte(HashPrefixS)) { + bytes.Equal(match[len(StartBytes):len(StartBytes)+len(HashPrefixS)], HashPrefixBytes) { value := match[len(StartBytes)+len(HashPrefixS) : len(match)-len(EndBytes)] hashed := hashBytes(value) res := make([]byte, len(StartBytes)+len(hashed)+len(EndBytes))