segmentio · kevinburkesegment · Nov 24, 2025 · Nov 4, 2025
diff --git a/datadog/client_test.go b/datadog/client_test.go
@@ -55,13 +55,16 @@ func TestClientSanitizesMetricNames(t *testing.T) {
 			{Name: "colon:atsign@pipe|end", Value: stats.ValueOf(5)},
 		},
 		Tags: []stats.Tag{
-			stats.T("colon:atsign@pipe|end", "42"),
+			stats.T("colon:atsign@pipe|end", "http://example.com/path|with:special"),
+			stats.T("env", "prod"),
 		},
 	}
 	client.HandleMeasures(time.Time{}, testMeasure)
 	client.Flush()
 
-	expectedPacket1 := "request_colon_atsign_laughing_end.colon_atsign_pipe_end:5|c|#colon_atsign_pipe_end:42\n"
+	// Note: Tag names are strictly sanitized, but tag values are lenient
+	// The tag value can contain colons, pipes, slashes (but not commas)
+	expectedPacket1 := "request_colon_atsign_laughing_end.colon_atsign_pipe_end:5|c|#colon_atsign_pipe_end:http://example.com/path|with:special,env:prod\n"
 	select {
 	case packet := <-packets:
 		assert.EqualValues(t, expectedPacket1, string(packet))

diff --git a/datadog/serializer.go b/datadog/serializer.go
@@ -93,11 +93,18 @@ func (s *serializer) AppendMeasures(b []byte, _ time.Time, measures ...stats.Mea
 // 2-byte UTF-8 sequences that decode to these codepoints.
 var latin1SupplementMap [256]byte
 
-// valid[byte] = 1 if the ASCII char is allowed, 0 otherwise.
+// valid[byte] = true if the ASCII char is allowed in metric/tag names, false otherwise.
 var valid = [256]bool{
 	'.': true, '-': true, '_': true,
 }
 
+// validTagValue[byte] = true if the ASCII char is allowed in tag values, false otherwise.
+// Tag values are more lenient than metric names - they can contain most characters
+// except commas (which separate tags) and a few control characters.
+var validTagValue = [256]bool{
+	'.': true, '-': true, '_': true, '/': true, ':': true, '|': true,
+}
+
 func init() {
 	// Initialize all to identity mapping
 	for i := range latin1SupplementMap {
@@ -203,12 +210,15 @@ func init() {
 
 	for c := '0'; c <= '9'; c++ {
 		valid[c] = true
+		validTagValue[c] = true
 	}
 	for c := 'A'; c <= 'Z'; c++ {
 		valid[c] = true
+		validTagValue[c] = true
 	}
 	for c := 'a'; c <= 'z'; c++ {
 		valid[c] = true
+		validTagValue[c] = true
 	}
 }
 
@@ -269,6 +279,54 @@ func appendSanitizedMetricName(dst []byte, raw string) []byte {
 	return dst
 }
 
+// appendSanitizedTagValue sanitizes tag values for DogStatsD format.
+// Tag values are more lenient than metric names - they can contain colons, slashes,
+// pipes, etc. The main restriction is that commas are not allowed since they
+// separate tags in the protocol.
+func appendSanitizedTagValue(dst []byte, raw string) []byte {
+	origLen := len(dst)
+	if raw == "" {
+		return dst
+	}
+
+	// Simple transformation: iterate through runes and convert/replace as needed
+	lastWasRepl := false
+	for i, r := range raw {
+		if i >= maxLen {
+			break
+		}
+
+		if r < utf8.RuneSelf && validTagValue[byte(r)] {
+			// Valid ASCII character
+			dst = append(dst, byte(r))
+			lastWasRepl = false
+		} else if r >= 0xC0 && r <= 0xFF {
+			// Latin-1 Supplement block (common accented characters like À, É, ñ)
+			mapped := latin1SupplementMap[r]
+			if validTagValue[mapped] {
+				dst = append(dst, mapped)
+				lastWasRepl = false
+			} else if !lastWasRepl {
+				dst = append(dst, replacement)
+				lastWasRepl = true
+			}
+		} else if !lastWasRepl {
+			// Invalid or unsupported character - only append if we didn't just add a replacement
+			dst = append(dst, replacement)
+			lastWasRepl = true
+		}
+	}
+
+	// Trim leading/trailing '.', '_' or '-'
+	trimmed := bytes.Trim(dst[origLen:], "._-")
+	dst = append(dst[:origLen], trimmed...)
+
+	if len(dst) == origLen {
+		return append(dst, "_truncated_"...)
+	}
+	return dst
+}
+
 // AppendMeasure is a formatting routine to append the dogstatsd protocol
 // representation of a measure to a memory buffer.
 // Tags listed in the s.filters are removed. (some tags may not be suitable for submission to DataDog)
@@ -325,7 +383,7 @@ func (s *serializer) AppendMeasure(b []byte, m stats.Measure) []byte {
 				}
 				b = appendSanitizedMetricName(b, t.Name)
 				b = append(b, ':')
-				b = appendSanitizedMetricName(b, t.Value)
+				b = appendSanitizedTagValue(b, t.Value)
 			}
 		}
 		b = append(b, '\n')

diff --git a/datadog/serializer_test.go b/datadog/serializer_test.go
@@ -67,7 +67,7 @@ request.rtt:0.1|h|#answer:42,hello:world
 				stats.T("hello", "world"),
 			},
 		},
-		s: `request.count:5|c|#ans_wer_blah:also_pipe_colon_comma,hello:world
+		s: `request.count:5|c|#ans_wer_blah:also|pipe:colon_comma,hello:world
 `,
 		dp: []string{},
 	},
@@ -87,6 +87,24 @@ request.rtt:0.1|h|#answer:42,hello:world
 `,
 		dp: []string{"dist_"},
 	},
+
+	// Test lenient tag values - URLs, paths, colons, pipes
+	{
+		m: stats.Measure{
+			Name: "api.request",
+			Fields: []stats.Field{
+				stats.MakeField("count", 1, stats.Counter),
+			},
+			Tags: []stats.Tag{
+				stats.T("url", "http://api.example.com/v1/users"),
+				stats.T("path", "/api/v1/users"),
+				stats.T("env", "prod:us-east-1"),
+			},
+		},
+		s: `api.request.count:1|c|#url:http://api.example.com/v1/users,path:/api/v1/users,env:prod:us-east-1
+`,
+		dp: []string{},
+	},
 }
 
 func TestAppendMeasure(t *testing.T) {
@@ -511,6 +529,109 @@ func TestSanitizationPreservesUTF8Validity(t *testing.T) {
 	}
 }
 
+func TestAppendSanitizedTagValue(t *testing.T) {
+	long := strings.Repeat("x", 300) // longer than maxLen
+	cases := []struct {
+		prefix   string // existing data in buffer
+		in, want string
+	}{
+		// Basic cases - tag values should be more lenient
+		{"", "simple", "simple"},
+		{"", "with-dashes_underscores.dots", "with-dashes_underscores.dots"},
+
+		// Tag values can contain colons and pipes (unlike the protocol separators)
+		{"", "http://example.com", "http://example.com"},
+		{"", "path/to/resource", "path/to/resource"},
+		{"", "key:value:pair", "key:value:pair"},
+		{"", "pipe|separated|values", "pipe|separated|values"},
+		{"", "mixed:pipe|slash/colon", "mixed:pipe|slash/colon"},
+
+		// Commas must be sanitized (they separate tags in the protocol)
+		{"", "value,with,commas", "value_with_commas"},
+		{"", "item1,item2,item3", "item1_item2_item3"},
+
+		// Special characters that should be sanitized
+		{"", "value@sign#hash", "value_sign_hash"},
+		{"", "brackets[test]", "brackets_test"},
+		{"", "parens(test)", "parens_test"},
+
+		// Accented characters should be normalized
+		{"", "café", "cafe"},
+		{"", "naïve", "naive"},
+		{"", "señor", "senor"},
+
+		// Leading/trailing special chars should be trimmed
+		{"", "-leading-dash", "leading-dash"},
+		{"", "trailing-dash-", "trailing-dash"},
+		{"", "...dots...", "dots"},
+		{"", "__underscores__", "underscores"},
+
+		// Empty string handling
+		{"", "", ""},
+		{"prefix:", "", "prefix:"},
+
+		// Multiple consecutive special chars collapse
+		{"", "foo!!!bar", "foo_bar"},
+		{"", "test@@@value", "test_value"},
+
+		// Mixed valid and invalid characters
+		{"", "env:prod|region:us-east-1", "env:prod|region:us-east-1"},
+		{"", "url:http://api.example.com/v1", "url:http://api.example.com/v1"},
+		{"", "list:item1,item2,item3", "list:item1_item2_item3"},
+
+		// Emojis and other multi-byte sequences
+		{"", "test🤡emoji", "test_emoji"},
+		{"", "hello🌍world", "hello_world"},
+		{"", "测试值", "_truncated_"}, // Chinese -> _truncated_ (all invalid)
+
+		// Over-long values should be truncated
+		{"", long, strings.Repeat("x", maxLen)},
+
+		// With prefix
+		{"tagname:", "http://example.com", "tagname:http://example.com"},
+		{"key:", "value,with,comma", "key:value_with_comma"},
+		{"env:", "production", "env:production"},
+	}
+
+	for _, c := range cases {
+		// Start with prefix data in buffer
+		buf := []byte(c.prefix)
+		originalLen := len(buf)
+
+		// Append sanitized tag value
+		buf = appendSanitizedTagValue(buf, c.in)
+		got := string(buf)
+
+		if got != c.want {
+			t.Fatalf("prefix=%q in=%q want=%q (len %v) got=%q (len %v)", c.prefix, c.in, c.want, len(c.want), got, len(got))
+		}
+
+		// Verify prefix is preserved
+		if len(c.prefix) > 0 && !strings.HasPrefix(got, c.prefix) {
+			t.Errorf("prefix %q not preserved in result %q", c.prefix, got)
+		}
+
+		// Verify we only modified the buffer from the original length onward
+		if originalLen > 0 && originalLen <= len(buf) {
+			originalPart := string(buf[:originalLen])
+			if originalPart != c.prefix {
+				t.Errorf("original buffer data corrupted: want %q, got %q", c.prefix, originalPart)
+			}
+		}
+	}
+
+	// Additional test: verify behavior with various buffer capacities
+	t.Run("BufferReuse", func(t *testing.T) {
+		buf := make([]byte, 0, 100) // pre-allocated capacity
+		buf = append(buf, "tag:"...)
+		buf = appendSanitizedTagValue(buf, "http://café.com/path")
+		expected := "tag:http://cafe.com/path"
+		if string(buf) != expected {
+			t.Errorf("buffer reuse failed: want %q, got %q", expected, string(buf))
+		}
+	})
+}
+
 // BenchmarkAppendSanitizedMetricName measures performance of metric name sanitization
 // across different input types to ensure the implementation is efficient.
 func BenchmarkAppendSanitizedMetricName(b *testing.B) {