Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions datadog/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,16 @@ func TestClientSanitizesMetricNames(t *testing.T) {
{Name: "colon:atsign@pipe|end", Value: stats.ValueOf(5)},
},
Tags: []stats.Tag{
stats.T("colon:atsign@pipe|end", "42"),
stats.T("colon:atsign@pipe|end", "http://example.com/path|with:special"),
stats.T("env", "prod"),
},
}
client.HandleMeasures(time.Time{}, testMeasure)
client.Flush()

expectedPacket1 := "request_colon_atsign_laughing_end.colon_atsign_pipe_end:5|c|#colon_atsign_pipe_end:42\n"
// Note: Tag names are strictly sanitized, but tag values are lenient
// The tag value can contain colons, pipes, slashes (but not commas)
expectedPacket1 := "request_colon_atsign_laughing_end.colon_atsign_pipe_end:5|c|#colon_atsign_pipe_end:http://example.com/path|with:special,env:prod\n"
select {
case packet := <-packets:
assert.EqualValues(t, expectedPacket1, string(packet))
Expand Down
62 changes: 60 additions & 2 deletions datadog/serializer.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,18 @@ func (s *serializer) AppendMeasures(b []byte, _ time.Time, measures ...stats.Mea
// 2-byte UTF-8 sequences that decode to these codepoints.
var latin1SupplementMap [256]byte

// valid[byte] = 1 if the ASCII char is allowed, 0 otherwise.
// valid[byte] = true if the ASCII char is allowed in metric/tag names, false otherwise.
var valid = [256]bool{
'.': true, '-': true, '_': true,
}

// validTagValue[byte] = true if the ASCII char is allowed in tag values, false otherwise.
// Tag values are more lenient than metric names - they can contain most characters
// except commas (which separate tags) and a few control characters.
var validTagValue = [256]bool{
'.': true, '-': true, '_': true, '/': true, ':': true, '|': true,
}

func init() {
// Initialize all to identity mapping
for i := range latin1SupplementMap {
Expand Down Expand Up @@ -203,12 +210,15 @@ func init() {

for c := '0'; c <= '9'; c++ {
valid[c] = true
validTagValue[c] = true
}
for c := 'A'; c <= 'Z'; c++ {
valid[c] = true
validTagValue[c] = true
}
for c := 'a'; c <= 'z'; c++ {
valid[c] = true
validTagValue[c] = true
}
}

Expand Down Expand Up @@ -269,6 +279,54 @@ func appendSanitizedMetricName(dst []byte, raw string) []byte {
return dst
}

// appendSanitizedTagValue sanitizes tag values for DogStatsD format.
// Tag values are more lenient than metric names - they can contain colons, slashes,
// pipes, etc. The main restriction is that commas are not allowed since they
// separate tags in the protocol.
func appendSanitizedTagValue(dst []byte, raw string) []byte {
origLen := len(dst)
if raw == "" {
return dst
}

// Simple transformation: iterate through runes and convert/replace as needed
lastWasRepl := false
for i, r := range raw {
if i >= maxLen {
break
}

if r < utf8.RuneSelf && validTagValue[byte(r)] {
// Valid ASCII character
dst = append(dst, byte(r))
lastWasRepl = false
} else if r >= 0xC0 && r <= 0xFF {
// Latin-1 Supplement block (common accented characters like À, É, ñ)
mapped := latin1SupplementMap[r]
if validTagValue[mapped] {
dst = append(dst, mapped)
lastWasRepl = false
} else if !lastWasRepl {
dst = append(dst, replacement)
lastWasRepl = true
}
} else if !lastWasRepl {
// Invalid or unsupported character - only append if we didn't just add a replacement
dst = append(dst, replacement)
lastWasRepl = true
}
}

// Trim leading/trailing '.', '_' or '-'
trimmed := bytes.Trim(dst[origLen:], "._-")
dst = append(dst[:origLen], trimmed...)

if len(dst) == origLen {
return append(dst, "_truncated_"...)
}
return dst
}

// AppendMeasure is a formatting routine to append the dogstatsd protocol
// representation of a measure to a memory buffer.
// Tags listed in the s.filters are removed. (some tags may not be suitable for submission to DataDog)
Expand Down Expand Up @@ -325,7 +383,7 @@ func (s *serializer) AppendMeasure(b []byte, m stats.Measure) []byte {
}
b = appendSanitizedMetricName(b, t.Name)
b = append(b, ':')
b = appendSanitizedMetricName(b, t.Value)
b = appendSanitizedTagValue(b, t.Value)
}
}
b = append(b, '\n')
Expand Down
123 changes: 122 additions & 1 deletion datadog/serializer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ request.rtt:0.1|h|#answer:42,hello:world
stats.T("hello", "world"),
},
},
s: `request.count:5|c|#ans_wer_blah:also_pipe_colon_comma,hello:world
s: `request.count:5|c|#ans_wer_blah:also|pipe:colon_comma,hello:world
`,
dp: []string{},
},
Expand All @@ -87,6 +87,24 @@ request.rtt:0.1|h|#answer:42,hello:world
`,
dp: []string{"dist_"},
},

// Test lenient tag values - URLs, paths, colons, pipes
{
m: stats.Measure{
Name: "api.request",
Fields: []stats.Field{
stats.MakeField("count", 1, stats.Counter),
},
Tags: []stats.Tag{
stats.T("url", "http://api.example.com/v1/users"),
stats.T("path", "/api/v1/users"),
stats.T("env", "prod:us-east-1"),
},
},
s: `api.request.count:1|c|#url:http://api.example.com/v1/users,path:/api/v1/users,env:prod:us-east-1
`,
dp: []string{},
},
}

func TestAppendMeasure(t *testing.T) {
Expand Down Expand Up @@ -511,6 +529,109 @@ func TestSanitizationPreservesUTF8Validity(t *testing.T) {
}
}

func TestAppendSanitizedTagValue(t *testing.T) {
long := strings.Repeat("x", 300) // longer than maxLen
cases := []struct {
prefix string // existing data in buffer
in, want string
}{
// Basic cases - tag values should be more lenient
{"", "simple", "simple"},
{"", "with-dashes_underscores.dots", "with-dashes_underscores.dots"},

// Tag values can contain colons and pipes (unlike the protocol separators)
{"", "http://example.com", "http://example.com"},
{"", "path/to/resource", "path/to/resource"},
{"", "key:value:pair", "key:value:pair"},
{"", "pipe|separated|values", "pipe|separated|values"},
{"", "mixed:pipe|slash/colon", "mixed:pipe|slash/colon"},

// Commas must be sanitized (they separate tags in the protocol)
{"", "value,with,commas", "value_with_commas"},
{"", "item1,item2,item3", "item1_item2_item3"},

// Special characters that should be sanitized
{"", "value@sign#hash", "value_sign_hash"},
{"", "brackets[test]", "brackets_test"},
{"", "parens(test)", "parens_test"},

// Accented characters should be normalized
{"", "café", "cafe"},
{"", "naïve", "naive"},
{"", "señor", "senor"},

// Leading/trailing special chars should be trimmed
{"", "-leading-dash", "leading-dash"},
{"", "trailing-dash-", "trailing-dash"},
{"", "...dots...", "dots"},
{"", "__underscores__", "underscores"},

// Empty string handling
{"", "", ""},
{"prefix:", "", "prefix:"},

// Multiple consecutive special chars collapse
{"", "foo!!!bar", "foo_bar"},
{"", "test@@@value", "test_value"},

// Mixed valid and invalid characters
{"", "env:prod|region:us-east-1", "env:prod|region:us-east-1"},
{"", "url:http://api.example.com/v1", "url:http://api.example.com/v1"},
{"", "list:item1,item2,item3", "list:item1_item2_item3"},

// Emojis and other multi-byte sequences
{"", "test🤡emoji", "test_emoji"},
{"", "hello🌍world", "hello_world"},
{"", "测试值", "_truncated_"}, // Chinese -> _truncated_ (all invalid)

// Over-long values should be truncated
{"", long, strings.Repeat("x", maxLen)},

// With prefix
{"tagname:", "http://example.com", "tagname:http://example.com"},
{"key:", "value,with,comma", "key:value_with_comma"},
{"env:", "production", "env:production"},
}

for _, c := range cases {
// Start with prefix data in buffer
buf := []byte(c.prefix)
originalLen := len(buf)

// Append sanitized tag value
buf = appendSanitizedTagValue(buf, c.in)
got := string(buf)

if got != c.want {
t.Fatalf("prefix=%q in=%q want=%q (len %v) got=%q (len %v)", c.prefix, c.in, c.want, len(c.want), got, len(got))
}

// Verify prefix is preserved
if len(c.prefix) > 0 && !strings.HasPrefix(got, c.prefix) {
t.Errorf("prefix %q not preserved in result %q", c.prefix, got)
}

// Verify we only modified the buffer from the original length onward
if originalLen > 0 && originalLen <= len(buf) {
originalPart := string(buf[:originalLen])
if originalPart != c.prefix {
t.Errorf("original buffer data corrupted: want %q, got %q", c.prefix, originalPart)
}
}
}

// Additional test: verify behavior with various buffer capacities
t.Run("BufferReuse", func(t *testing.T) {
buf := make([]byte, 0, 100) // pre-allocated capacity
buf = append(buf, "tag:"...)
buf = appendSanitizedTagValue(buf, "http://café.com/path")
expected := "tag:http://cafe.com/path"
if string(buf) != expected {
t.Errorf("buffer reuse failed: want %q, got %q", expected, string(buf))
}
})
}

// BenchmarkAppendSanitizedMetricName measures performance of metric name sanitization
// across different input types to ensure the implementation is efficient.
func BenchmarkAppendSanitizedMetricName(b *testing.B) {
Expand Down
Loading