Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions internal/engine/finding_ids.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package engine

import (
"github.com/pmclSF/terrain/internal/identity"
"github.com/pmclSF/terrain/internal/models"
)

// assignFindingIDs walks every signal in the snapshot (both top-level
// `snapshot.Signals` and per-test-file `TestFile.Signals`) and populates
// the stable `FindingID` field for any signal that doesn't already have
// one. Detectors that need a non-default ID (e.g. signals attached to
// virtual locations like a manifest entry) can pre-set FindingID and
// this pass leaves them alone.
//
// Idempotent — calling twice produces the same result.
//
// Called from RunPipelineContext after SortSnapshot so the IDs land in
// canonical order. Order matters: the assignment uses Type +
// Location.{File,Symbol,Line} as the inputs, so signals that are
// indistinguishable on those four fields get the same ID by design
// (deduplication during snapshot construction is upstream's job).
func assignFindingIDs(snapshot *models.TestSuiteSnapshot) {
if snapshot == nil {
return
}
for i := range snapshot.Signals {
assignSignalID(&snapshot.Signals[i])
}
for fi := range snapshot.TestFiles {
tf := &snapshot.TestFiles[fi]
for si := range tf.Signals {
assignSignalID(&tf.Signals[si])
}
}
}

func assignSignalID(s *models.Signal) {
if s.FindingID != "" {
// Detector pre-set the ID — preserve it.
return
}
s.FindingID = identity.BuildFindingID(
string(s.Type),
s.Location.File,
s.Location.Symbol,
s.Location.Line,
)
}
97 changes: 97 additions & 0 deletions internal/engine/finding_ids_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package engine

import (
"strings"
"testing"

"github.com/pmclSF/terrain/internal/models"
)

func TestAssignFindingIDs_TopLevelAndPerFile(t *testing.T) {
t.Parallel()
snap := &models.TestSuiteSnapshot{
Signals: []models.Signal{
{
Type: "weakAssertion",
Location: models.SignalLocation{
File: "internal/auth/login_test.go", Symbol: "TestLogin", Line: 42,
},
},
},
TestFiles: []models.TestFile{
{
Path: "internal/auth/login_test.go",
Signals: []models.Signal{
{
Type: "mockHeavyTest",
Location: models.SignalLocation{
File: "internal/auth/login_test.go", Symbol: "TestLogin", Line: 100,
},
},
},
},
},
}
assignFindingIDs(snap)

if snap.Signals[0].FindingID == "" {
t.Error("top-level signal FindingID was not populated")
}
if snap.TestFiles[0].Signals[0].FindingID == "" {
t.Error("per-file signal FindingID was not populated")
}
if !strings.HasPrefix(snap.Signals[0].FindingID, "weakAssertion@") {
t.Errorf("top-level FindingID has wrong shape: %q", snap.Signals[0].FindingID)
}
if !strings.HasPrefix(snap.TestFiles[0].Signals[0].FindingID, "mockHeavyTest@") {
t.Errorf("per-file FindingID has wrong shape: %q", snap.TestFiles[0].Signals[0].FindingID)
}
}

func TestAssignFindingIDs_PreservesPreSetID(t *testing.T) {
t.Parallel()
snap := &models.TestSuiteSnapshot{
Signals: []models.Signal{
{
Type: "weakAssertion",
FindingID: "custom@id",
Location: models.SignalLocation{
File: "internal/auth/login_test.go", Symbol: "TestLogin", Line: 42,
},
},
},
}
assignFindingIDs(snap)
if snap.Signals[0].FindingID != "custom@id" {
t.Errorf("pre-set FindingID was overwritten: %q", snap.Signals[0].FindingID)
}
}

func TestAssignFindingIDs_Idempotent(t *testing.T) {
t.Parallel()
snap := &models.TestSuiteSnapshot{
Signals: []models.Signal{
{
Type: "weakAssertion",
Location: models.SignalLocation{
File: "internal/auth/login_test.go", Symbol: "TestLogin", Line: 42,
},
},
},
}
assignFindingIDs(snap)
first := snap.Signals[0].FindingID
assignFindingIDs(snap)
second := snap.Signals[0].FindingID
if first != second {
t.Errorf("non-idempotent: first=%q, second=%q", first, second)
}
}

func TestAssignFindingIDs_NilSafe(t *testing.T) {
t.Parallel()
// Should not panic on nil snapshot.
assignFindingIDs(nil)
// Should not panic on snapshot with no signals.
assignFindingIDs(&models.TestSuiteSnapshot{})
}
7 changes: 7 additions & 0 deletions internal/engine/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,13 @@ func RunPipelineContext(ctx context.Context, root string, opts ...PipelineOption
progress(5, "Writing report")
models.SortSnapshot(snapshot)

// Step 10b: assign stable FindingIDs to every signal. Runs after
// the sort so IDs land in canonical order; uses Type + Location
// (file/symbol/line) so the IDs survive everything except a
// rename/move of the signal's underlying location. See
// `internal/identity.BuildFindingID` for the format.
assignFindingIDs(snapshot)

if err := models.ValidateSnapshot(snapshot); err != nil {
return nil, fmt.Errorf("invalid snapshot produced by pipeline: %w", err)
}
Expand Down
153 changes: 153 additions & 0 deletions internal/identity/finding_id.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package identity

import (
"fmt"
"strconv"
"strings"
)

// FindingID is the stable identifier for a single signal/finding emitted by
// a Terrain detector. It enables suppressions (`.terrain/suppressions.yaml`),
// the `terrain explain finding <id>` round-trip, baseline-aware gating
// (`--new-findings-only`), and cross-run deduplication.
//
// Shape:
//
// {detector}@{normalized_path}:{anchor}#{hash}
//
// Where:
// - detector = the signal type (e.g. "weakAssertion")
// - normalized_path = forward-slash, repo-relative path
// - anchor = symbol name when present, "L<line>" otherwise,
// "_" when neither is available
// - hash = 8 hex chars derived from the canonical form for
// collision resistance
//
// Example: `weakAssertion@internal/auth/login_test.go:TestLogin#a1b2c3d4`
//
// Stability guarantees:
// - Same (detector, path, symbol, line) → same ID across runs.
// - Whitespace changes inside the file do NOT change the ID, *as long as*
// the symbol name and line number are preserved by the detector. (Line
// drift is a known limitation; AST-anchored 0.3 work removes it.)
// - File rename or symbol rename produces a new ID. That's the right
// thing — the underlying finding has moved.
//
// Trade-offs:
// - The ID is human-readable enough to mention in a PR comment, but
// also unique enough that two findings of the same type on the same
// line (different symbols / different sub-locations) get distinct IDs.
// - The hash is short (8 chars = 32 bits) — collision risk in any single
// repo is effectively zero, but the ID is not a global identifier.

// BuildFindingID constructs a stable finding ID from its components.
//
// Empty signalType is treated as "_" rather than producing an invalid
// id; this keeps callers that don't yet emit a type from breaking the
// suppression file format. Empty file is also tolerated (yields an
// "_" path component) but in practice every detector emits a file.
func BuildFindingID(signalType, file, symbol string, line int) string {
detector := normalizeIDComponent(signalType)
path := normalizePathOrPlaceholder(file)
anchor := buildAnchor(symbol, line)

canonical := detector + "::" + path + "::" + anchor
hash := GenerateID(canonical) // returns 16 hex chars
short := hash[:8]

return fmt.Sprintf("%s@%s:%s#%s", detector, path, anchor, short)
}

// ParseFindingID extracts the components from a finding ID. Returns
// (detector, path, anchor, hash) and ok=false if the ID doesn't match
// the expected shape. Useful for `terrain explain finding <id>` where
// we want to validate the input before searching the snapshot.
func ParseFindingID(id string) (detector, path, anchor, hash string, ok bool) {
// Split on '#' to peel off the hash.
hashAt := strings.LastIndexByte(id, '#')
if hashAt < 0 {
return "", "", "", "", false
}
hash = id[hashAt+1:]
rest := id[:hashAt]

// Split on '@' to peel off the detector.
atAt := strings.IndexByte(rest, '@')
if atAt < 0 {
return "", "", "", "", false
}
detector = rest[:atAt]
rest = rest[atAt+1:]

// Split path from anchor on the FIRST ':' after the detector. File
// paths in Terrain are repo-relative POSIX paths (no ':'); anchors
// may legitimately contain ':' (e.g. "TestSuite::TestCase"), so the
// first ':' is the unambiguous separator.
colonAt := strings.IndexByte(rest, ':')
if colonAt < 0 {
return "", "", "", "", false
}
path = rest[:colonAt]
anchor = rest[colonAt+1:]

if detector == "" || path == "" || anchor == "" || hash == "" {
return "", "", "", "", false
}
return detector, path, anchor, hash, true
}

// MatchFindingID returns true when `id` could correspond to the given
// signal coordinates. The hash is recomputed from the components and
// compared; the human-readable prefix is also checked. This is what
// `terrain explain finding <id>` uses to round-trip an ID back to a
// signal in the snapshot.
func MatchFindingID(id, signalType, file, symbol string, line int) bool {
expected := BuildFindingID(signalType, file, symbol, line)
return id == expected
}

// ── helpers ─────────────────────────────────────────────────────────

// normalizeIDComponent tames a string for use in an ID component:
// trims whitespace, replaces internal whitespace with "_". Does not
// alter case (signal types are camelCase by convention; preserved).
func normalizeIDComponent(s string) string {
s = strings.TrimSpace(s)
if s == "" {
return "_"
}
// Replace any whitespace runs with single underscore.
var out strings.Builder
prevSpace := false
for _, r := range s {
if r == ' ' || r == '\t' || r == '\n' || r == '\r' {
if !prevSpace {
out.WriteByte('_')
prevSpace = true
}
continue
}
out.WriteRune(r)
prevSpace = false
}
return out.String()
}

func normalizePathOrPlaceholder(file string) string {
p := NormalizePath(file)
if p == "" {
return "_"
}
return p
}

func buildAnchor(symbol string, line int) string {
sym := normalizeIDComponent(symbol)
if sym != "" && sym != "_" {
return sym
}
if line > 0 {
return "L" + strconv.Itoa(line)
}
return "_"
}
Loading
Loading