diff --git a/pkg/anonymize/annotations.go b/pkg/anonymize/annotations.go new file mode 100644 index 000000000..44582f084 --- /dev/null +++ b/pkg/anonymize/annotations.go @@ -0,0 +1,208 @@ +package anonymize + +import ( + v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" +) + +// anonymizeResourceTypeAnnotations processes all annotations on a ResourceType, +// anonymizing known types and dropping unknown types. +func (a *Anonymizer) anonymizeResourceTypeAnnotations(rt *v2.ResourceType) error { + if rt == nil { + return nil + } + + var result []*anypb.Any + for _, ann := range rt.GetAnnotations() { + if processed, err := a.processResourceTypeAnnotation(ann); err != nil { + return err + } else if processed != nil { + result = append(result, processed) + } + // Unknown types are dropped + } + + rt.SetAnnotations(result) + return nil +} + +// processResourceTypeAnnotation processes a single annotation from a ResourceType. +// Returns nil if the annotation should be dropped. +func (a *Anonymizer) processResourceTypeAnnotation(ann *anypb.Any) (*anypb.Any, error) { + // ChildResourceType - anonymize resource type ID + childRT := &v2.ChildResourceType{} + if ann.MessageIs(childRT) { + if err := ann.UnmarshalTo(childRT); err != nil { + return nil, err + } + if childRT.GetResourceTypeId() != "" { + childRT.SetResourceTypeId(a.hasher.AnonymizeResourceType(childRT.GetResourceTypeId())) + } + return anypb.New(childRT) + } + + // ExternalLink - anonymize URL + externalLink := &v2.ExternalLink{} + if ann.MessageIs(externalLink) { + if err := ann.UnmarshalTo(externalLink); err != nil { + return nil, err + } + if externalLink.GetUrl() != "" { + externalLink.SetUrl(a.hasher.AnonymizeURL(externalLink.GetUrl())) + } + return anypb.New(externalLink) + } + + // Empty marker types - preserve as-is (no PII) + for _, marker := range []proto.Message{ + &v2.SkipEntitlementsAndGrants{}, + &v2.SkipGrants{}, + &v2.SkipEntitlements{}, + } { + if ann.MessageIs(marker) { + return ann, nil + } + } + + // Unknown type - drop it + return nil, nil +} + +// anonymizeGrantAnnotations processes all annotations on a Grant, +// anonymizing known types and dropping unknown types. +func (a *Anonymizer) anonymizeGrantAnnotations(g *v2.Grant) error { + if g == nil { + return nil + } + + var result []*anypb.Any + for _, ann := range g.GetAnnotations() { + if processed, err := a.processGrantAnnotation(ann); err != nil { + return err + } else if processed != nil { + result = append(result, processed) + } + } + + g.SetAnnotations(result) + return nil +} + +// processGrantAnnotation processes a single annotation from a Grant. +// Returns nil if the annotation should be dropped. +func (a *Anonymizer) processGrantAnnotation(ann *anypb.Any) (*anypb.Any, error) { + // GrantExpandable - anonymize entitlement and resource type IDs + expandable := &v2.GrantExpandable{} + if ann.MessageIs(expandable) { + if err := ann.UnmarshalTo(expandable); err != nil { + return nil, err + } + entitlementIDs := expandable.GetEntitlementIds() + for i, eid := range entitlementIDs { + if eid != "" { + entitlementIDs[i] = a.hasher.AnonymizeExternalID(eid) + } + } + expandable.SetEntitlementIds(entitlementIDs) + + resourceTypeIDs := expandable.GetResourceTypeIds() + for i, rtid := range resourceTypeIDs { + if rtid != "" { + resourceTypeIDs[i] = a.hasher.AnonymizeResourceType(rtid) + } + } + expandable.SetResourceTypeIds(resourceTypeIDs) + return anypb.New(expandable) + } + + // GrantMetadata - clear metadata + grantMetadata := &v2.GrantMetadata{} + if ann.MessageIs(grantMetadata) { + if err := ann.UnmarshalTo(grantMetadata); err != nil { + return nil, err + } + grantMetadata.ClearMetadata() + return anypb.New(grantMetadata) + } + + // GrantImmutable - anonymize source_id, clear metadata + grantImmutable := &v2.GrantImmutable{} + if ann.MessageIs(grantImmutable) { + if err := ann.UnmarshalTo(grantImmutable); err != nil { + return nil, err + } + if grantImmutable.GetSourceId() != "" { + grantImmutable.SetSourceId(a.hasher.Hash(grantImmutable.GetSourceId())) + } + grantImmutable.ClearMetadata() + return anypb.New(grantImmutable) + } + + // ExternalLink - anonymize URL + externalLink := &v2.ExternalLink{} + if ann.MessageIs(externalLink) { + if err := ann.UnmarshalTo(externalLink); err != nil { + return nil, err + } + if externalLink.GetUrl() != "" { + externalLink.SetUrl(a.hasher.AnonymizeURL(externalLink.GetUrl())) + } + return anypb.New(externalLink) + } + + // Unknown type - drop it + return nil, nil +} + +// anonymizeEntitlementAnnotations processes all annotations on an Entitlement, +// anonymizing known types and dropping unknown types. +func (a *Anonymizer) anonymizeEntitlementAnnotations(e *v2.Entitlement) error { + if e == nil { + return nil + } + + var result []*anypb.Any + for _, ann := range e.GetAnnotations() { + if processed, err := a.processEntitlementAnnotation(ann); err != nil { + return err + } else if processed != nil { + result = append(result, processed) + } + } + + e.SetAnnotations(result) + return nil +} + +// processEntitlementAnnotation processes a single annotation from an Entitlement. +// Returns nil if the annotation should be dropped. +func (a *Anonymizer) processEntitlementAnnotation(ann *anypb.Any) (*anypb.Any, error) { + // EntitlementImmutable - anonymize source_id, clear metadata + entitlementImmutable := &v2.EntitlementImmutable{} + if ann.MessageIs(entitlementImmutable) { + if err := ann.UnmarshalTo(entitlementImmutable); err != nil { + return nil, err + } + if entitlementImmutable.GetSourceId() != "" { + entitlementImmutable.SetSourceId(a.hasher.Hash(entitlementImmutable.GetSourceId())) + } + entitlementImmutable.ClearMetadata() + return anypb.New(entitlementImmutable) + } + + // ExternalLink - anonymize URL + externalLink := &v2.ExternalLink{} + if ann.MessageIs(externalLink) { + if err := ann.UnmarshalTo(externalLink); err != nil { + return nil, err + } + if externalLink.GetUrl() != "" { + externalLink.SetUrl(a.hasher.AnonymizeURL(externalLink.GetUrl())) + } + return anypb.New(externalLink) + } + + // Unknown type - drop it + return nil, nil +} diff --git a/pkg/anonymize/anonymize.go b/pkg/anonymize/anonymize.go new file mode 100644 index 000000000..37917d95a --- /dev/null +++ b/pkg/anonymize/anonymize.go @@ -0,0 +1,144 @@ +// Package anonymize provides functionality to anonymize c1z files by replacing +// personally identifiable information (PII) and sensitive data with deterministic, +// anonymized equivalents while preserving structural relationships. +package anonymize + +import ( + "crypto/hmac" + "crypto/sha256" + "encoding/hex" + "time" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +// Config holds configuration options for the anonymization process. +type Config struct { + // Salt is used as a key for HMAC-based hashing to generate deterministic + // but unpredictable anonymized values. Different salts produce different outputs. + // Required. + Salt string +} + +// defaultConfig returns a Config with sensible default values. +func defaultConfig() Config { + return Config{ + Salt: "baton-anonymize-default-salt", + } +} + +// Anonymizer handles the anonymization of c1z file data. +type Anonymizer struct { + config Config + hasher *Hasher + timestamp time.Time // Single timestamp used for all anonymized timestamps +} + +// New creates a new Anonymizer with the given configuration. +func New(config Config) *Anonymizer { + if config.Salt == "" { + panic("salt is required") + } + return &Anonymizer{ + config: config, + hasher: NewHasher(config.Salt), + timestamp: time.Now(), + } +} + +// AnonymizedTimestamp returns the single timestamp used for all anonymized records. +func (a *Anonymizer) AnonymizedTimestamp() *timestamppb.Timestamp { + return timestamppb.New(a.timestamp) +} + +// newWithDefaults creates a new Anonymizer with default configuration. +func newWithDefaults() *Anonymizer { + return New(defaultConfig()) +} + +// Hasher provides deterministic hashing for anonymization. +// It uses HMAC-SHA256 with a salt to generate consistent but unpredictable values. +type Hasher struct { + salt []byte +} + +// NewHasher creates a new Hasher with the given salt. +func NewHasher(salt string) *Hasher { + return &Hasher{ + salt: []byte(salt), + } +} + +// Hash generates a deterministic hash of the input string. +// The same input with the same salt always produces the same output. +func (h *Hasher) Hash(input string) string { + mac := hmac.New(sha256.New, h.salt) + mac.Write([]byte(input)) + return hex.EncodeToString(mac.Sum(nil)) +} + +// HashN generates a deterministic hash truncated to n characters. +func (h *Hasher) HashN(input string, n int) string { + hash := h.Hash(input) + if len(hash) < n { + return hash + } + return hash[:n] +} + +// AnonymizeEmail generates an anonymized email address using the full hash digest, split around an @ symbol. +func (h *Hasher) AnonymizeEmail(email string) string { + e := h.HashN(email, 32) + return e[:len(e)/2] + "@" + e[len(e)/2:] +} + +// AnonymizeDisplayName generates an anonymized display name using the full hash digest. +func (h *Hasher) AnonymizeDisplayName(name string) string { + return h.HashN(name, 16) +} + +// AnonymizeLogin generates an anonymized login/username using the full hash digest. +func (h *Hasher) AnonymizeLogin(login string) string { + return h.HashN(login, max(32, len(login))) +} + +// AnonymizeEmployeeID generates an anonymized employee ID using the full hash digest. +func (h *Hasher) AnonymizeEmployeeID(empID string) string { + return h.HashN(empID, max(32, len(empID))) +} + +// AnonymizeResourceID generates an anonymized resource ID, preserving the original length. +func (h *Hasher) AnonymizeResourceID(resourceID string) string { + return h.HashN(resourceID, max(32, len(resourceID))) +} + +// AnonymizeExternalID generates an anonymized external ID, preserving the original length. +func (h *Hasher) AnonymizeExternalID(externalID string) string { + return h.HashN(externalID, max(32, len(externalID))) +} + +// AnonymizeURL generates an anonymized URL. +func (h *Hasher) AnonymizeURL(url string) string { + return "https://example.com/" + h.HashN(url, 16) +} + +// AnonymizeGivenName generates an anonymized given name using the full hash digest. +func (h *Hasher) AnonymizeGivenName(name string) string { + return h.HashN(name, 16) +} + +// AnonymizeFamilyName generates an anonymized family name using the full hash digest. +func (h *Hasher) AnonymizeFamilyName(name string) string { + return h.HashN(name, 16) +} + +// AnonymizeMiddleName generates an anonymized middle name using the full hash digest. +func (h *Hasher) AnonymizeMiddleName(name string) string { + return h.HashN(name, 16) +} + +// AnonymizeResourceType generates an anonymized resource type name using the full hash digest. +// Uses deterministic hashing so the same type always maps to the same value. +func (h *Hasher) AnonymizeResourceType(resourceType string) string { + return h.HashN(resourceType, 8) +} diff --git a/pkg/anonymize/anonymize_test.go b/pkg/anonymize/anonymize_test.go new file mode 100644 index 000000000..fb34d15c0 --- /dev/null +++ b/pkg/anonymize/anonymize_test.go @@ -0,0 +1,168 @@ +package anonymize + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +const testEmailAddress = "john.doe@example.com" + +func TestHasher_Deterministic(t *testing.T) { + h := NewHasher("test-salt") + + // Same input should produce same output + hash1 := h.Hash("test-input") + hash2 := h.Hash("test-input") + require.Equal(t, hash1, hash2, "Hash should be deterministic") + + // Different inputs should produce different outputs + hash3 := h.Hash("different-input") + require.NotEqual(t, hash1, hash3, "Different inputs should produce different hashes") +} + +func TestHasher_DifferentSalts(t *testing.T) { + h1 := NewHasher("salt1") + h2 := NewHasher("salt2") + + // Same input with different salts should produce different outputs + hash1 := h1.Hash("test-input") + hash2 := h2.Hash("test-input") + require.NotEqual(t, hash1, hash2, "Different salts should produce different hashes") +} + +func TestHasher_HashN(t *testing.T) { + h := NewHasher("test-salt") + + hash8 := h.HashN("test-input", 8) + require.Len(t, hash8, 8, "HashN should truncate to specified length") + + hash16 := h.HashN("test-input", 16) + require.Len(t, hash16, 16, "HashN should truncate to specified length") + + // hash8 should be a prefix of hash16 + require.True(t, hash16[:8] == hash8, "Shorter hash should be prefix of longer hash") +} + +func TestHasher_AnonymizeEmail(t *testing.T) { + h := NewHasher("test-salt") + + original := testEmailAddress + email := h.AnonymizeEmail(original) + require.Contains(t, email, "@", "Anonymized email should contain @") + require.Len(t, email, 33, "Anonymized email should be 16+1+16 chars (hash@hash)") + require.NotEqual(t, original, email, "Anonymized email should be different from original") + + // Same input should produce same output + email2 := h.AnonymizeEmail(original) + require.Equal(t, email, email2, "Email anonymization should be deterministic") +} + +func TestHasher_AnonymizeDisplayName(t *testing.T) { + h := NewHasher("test-salt") + + original := "John Doe" + name := h.AnonymizeDisplayName(original) + require.Len(t, name, 16, "Anonymized name should be 16 chars") + require.NotEqual(t, original, name, "Anonymized name should be different from original") + + // Second call should be deterministic + name2 := h.AnonymizeDisplayName(original) + require.Equal(t, name, name2, "Display name anonymization should be deterministic") +} + +func TestHasher_AnonymizeLogin(t *testing.T) { + h := NewHasher("test-salt") + + original := "johndoe" + login := h.AnonymizeLogin(original) + require.Len(t, login, max(32, len(original)), "Anonymized login should be max(32, original length)") + require.NotEqual(t, original, login, "Anonymized login should be different from original") + + // Same input should produce same output + login2 := h.AnonymizeLogin(original) + require.Equal(t, login, login2, "Login anonymization should be deterministic") +} + +func TestHasher_AnonymizeEmployeeID(t *testing.T) { + h := NewHasher("test-salt") + + original := "EMP001" + empID := h.AnonymizeEmployeeID(original) + require.Len(t, empID, max(32, len(original)), "Anonymized employee ID should be max(32, original length)") + require.NotEqual(t, original, empID, "Anonymized employee ID should be different from original") +} + +func TestHasher_AnonymizeResourceID(t *testing.T) { + h := NewHasher("test-salt") + + original := "resource-123" + resID := h.AnonymizeResourceID(original) + require.Len(t, resID, max(32, len(original)), "Anonymized resource ID should be max(32, original length)") + require.NotEqual(t, original, resID, "Anonymized resource ID should be different from original") +} + +func TestHasher_AnonymizeExternalID(t *testing.T) { + h := NewHasher("test-salt") + + original := "ext-123" + extID := h.AnonymizeExternalID(original) + require.Len(t, extID, max(32, len(original)), "Anonymized external ID should be max(32, original length)") + require.NotEqual(t, original, extID, "Anonymized external ID should be different from original") +} + +func TestHasher_AnonymizeURL(t *testing.T) { + h := NewHasher("test-salt") + + url := h.AnonymizeURL("https://example.com/help") + require.Contains(t, url, "https://example.com/", "Anonymized URL should use example.com domain") +} + +func TestHasher_AnonymizeStructuredName(t *testing.T) { + h := NewHasher("test-salt") + + originalGiven := "John" + givenName := h.AnonymizeGivenName(originalGiven) + require.Len(t, givenName, 16, "Anonymized given name should be 16 chars") + require.NotEqual(t, originalGiven, givenName, "Anonymized given name should be different from original") + + originalFamily := "Doe" + familyName := h.AnonymizeFamilyName(originalFamily) + require.Len(t, familyName, 16, "Anonymized family name should be 16 chars") + require.NotEqual(t, originalFamily, familyName, "Anonymized family name should be different from original") + + originalMiddle := "Robert" + middleName := h.AnonymizeMiddleName(originalMiddle) + require.Len(t, middleName, 16, "Anonymized middle name should be 16 chars") + require.NotEqual(t, originalMiddle, middleName, "Anonymized middle name should be different from original") +} + +func TestDefaultConfig(t *testing.T) { + config := defaultConfig() + + require.NotEmpty(t, config.Salt, "Default config should have a salt") +} + +func TestNew(t *testing.T) { + a := newWithDefaults() + require.NotNil(t, a, "NewWithDefaults should return an Anonymizer") + require.NotNil(t, a.hasher, "Anonymizer should have a hasher") +} + +func TestNew_WithEmptySalt(t *testing.T) { + require.Panics(t, func() { + New(Config{ + Salt: "", + }) + }, "New should panic with empty salt") +} + +func TestShouldDeleteAssets(t *testing.T) { + a := newWithDefaults() + require.True(t, a.ShouldDeleteAssets(), "Assets should always be deleted during anonymization") +} + +func TestShouldClearSessionStore(t *testing.T) { + a := newWithDefaults() + require.True(t, a.ShouldClearSessionStore(), "Session store should always be cleared") +} diff --git a/pkg/anonymize/entitlement.go b/pkg/anonymize/entitlement.go new file mode 100644 index 000000000..e411a1a18 --- /dev/null +++ b/pkg/anonymize/entitlement.go @@ -0,0 +1,55 @@ +package anonymize + +import ( + v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2" +) + +// AnonymizeEntitlement anonymizes an Entitlement in place. +func (a *Anonymizer) AnonymizeEntitlement(e *v2.Entitlement) error { + if e == nil { + return nil + } + + // Anonymize display name + if e.GetDisplayName() != "" { + e.SetDisplayName(a.hasher.AnonymizeDisplayName(e.GetDisplayName())) + } + + // Anonymize description + if e.GetDescription() != "" { + e.SetDescription("[ANONYMIZED]") + } + + // Anonymize slug + if e.GetSlug() != "" { + e.SetSlug(a.hasher.HashN(e.GetSlug(), 12)) + } + + // Anonymize ID + if e.GetId() != "" { + e.SetId(a.hasher.AnonymizeExternalID(e.GetId())) + } + + // Anonymize embedded resource + if e.HasResource() { + if err := a.AnonymizeResource(e.GetResource()); err != nil { + return err + } + } + + // Anonymize grantable_to ResourceTypes + for _, rt := range e.GetGrantableTo() { + if err := a.AnonymizeResourceType(rt); err != nil { + return err + } + } + + // Anonymize entitlement annotations (EntitlementImmutable, etc.) + if err := a.anonymizeEntitlementAnnotations(e); err != nil { + return err + } + + // Note: Purpose is an enum value, not PII + + return nil +} diff --git a/pkg/anonymize/entitlement_test.go b/pkg/anonymize/entitlement_test.go new file mode 100644 index 000000000..e16ea0357 --- /dev/null +++ b/pkg/anonymize/entitlement_test.go @@ -0,0 +1,102 @@ +package anonymize + +import ( + "testing" + + v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2" + "github.com/stretchr/testify/require" +) + +func TestAnonymizeEntitlement_Basic(t *testing.T) { + a := newWithDefaults() + + originalID := "entitlement-123" + originalDisplayName := "Admin Access" + originalSlug := "admin-access" + + e := &v2.Entitlement{ + Id: originalID, + DisplayName: originalDisplayName, + Description: "Full administrative access", + Slug: originalSlug, + Resource: &v2.Resource{ + Id: &v2.ResourceId{ + ResourceType: "app", + Resource: "my-app", + }, + DisplayName: "My Application", + }, + } + + err := a.AnonymizeEntitlement(e) + require.NoError(t, err) + + // ID should be anonymized with max(32, len) chars + require.NotEqual(t, originalID, e.GetId()) + require.Len(t, e.GetId(), max(32, len(originalID))) + + // Display name should be anonymized with 16 chars + require.NotEqual(t, originalDisplayName, e.GetDisplayName()) + require.Len(t, e.GetDisplayName(), 16) + + // Description should be anonymized + require.Equal(t, "[ANONYMIZED]", e.GetDescription()) + + // Slug should be anonymized with 12 chars + require.NotEqual(t, originalSlug, e.GetSlug()) + require.Len(t, e.GetSlug(), 12) + + // Embedded resource should be anonymized + require.NotEqual(t, "My Application", e.GetResource().GetDisplayName()) + require.NotEqual(t, "my-app", e.GetResource().GetId().GetResource()) +} + +func TestAnonymizeEntitlement_NilEntitlement(t *testing.T) { + a := newWithDefaults() + + err := a.AnonymizeEntitlement(nil) + require.NoError(t, err) +} + +func TestAnonymizeEntitlement_NilResource(t *testing.T) { + a := newWithDefaults() + + originalID := "entitlement-123" + + e := &v2.Entitlement{ + Id: originalID, + DisplayName: "Test Entitlement", + Resource: nil, // No embedded resource + } + + err := a.AnonymizeEntitlement(e) + require.NoError(t, err) + + // Should still anonymize other fields with max(32, len) chars + require.NotEqual(t, originalID, e.GetId()) + require.Len(t, e.GetId(), max(32, len(originalID))) +} + +func TestAnonymizeEntitlement_Deterministic(t *testing.T) { + a := newWithDefaults() + + e1 := &v2.Entitlement{ + Id: "entitlement-123", + DisplayName: "Admin Access", + } + + e2 := &v2.Entitlement{ + Id: "entitlement-123", + DisplayName: "Admin Access", + } + + err := a.AnonymizeEntitlement(e1) + require.NoError(t, err) + + err = a.AnonymizeEntitlement(e2) + require.NoError(t, err) + + // Same input should produce same output + require.Equal(t, e1.GetId(), e2.GetId()) + require.Equal(t, e1.GetDisplayName(), e2.GetDisplayName()) +} diff --git a/pkg/anonymize/field_coverage_test.go b/pkg/anonymize/field_coverage_test.go new file mode 100644 index 000000000..1653920f8 --- /dev/null +++ b/pkg/anonymize/field_coverage_test.go @@ -0,0 +1,685 @@ +package anonymize + +import ( + "fmt" + "strings" + "testing" + + v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/proto" +) + +// fieldPolicy defines how a field should be handled during anonymization. +type fieldPolicy int + +const ( + // fieldPolicyAnonymize indicates the field contains PII and is anonymized. + fieldPolicyAnonymize fieldPolicy = iota + // fieldPolicySafe indicates the field does not contain PII (enums, timestamps, bools, etc). + fieldPolicySafe + // fieldPolicyRecurse indicates the field is a nested message with its own anonymization. + fieldPolicyRecurse + // fieldPolicyClear indicates the field is cleared entirely (e.g., Profile structs). + fieldPolicyClear +) + +// resourceFieldPolicies defines the anonymization policy for Resource fields. +var resourceFieldPolicies = map[string]fieldPolicy{ + "id": fieldPolicyAnonymize, // ResourceId - resource part anonymized + "parent_resource_id": fieldPolicyAnonymize, // ResourceId - resource part anonymized + "display_name": fieldPolicyAnonymize, // User/group/app names + "annotations": fieldPolicyRecurse, // Contains traits that are anonymized + "description": fieldPolicyAnonymize, // May contain identifying info + "baton_resource": fieldPolicySafe, // Boolean flag + "external_id": fieldPolicyAnonymize, // ExternalId - all parts anonymized + "creation_source": fieldPolicySafe, // Enum value +} + +// resourceIdFieldPolicies defines the anonymization policy for ResourceId fields. +var resourceIdFieldPolicies = map[string]fieldPolicy{ + "resource_type": fieldPolicyAnonymize, // Anonymized consistently with ResourceType.id + "resource": fieldPolicyAnonymize, // May be email, username, etc. + "baton_resource": fieldPolicySafe, // Boolean flag indicating if this is a baton-managed resource +} + +// externalIdFieldPolicies defines the anonymization policy for ExternalId fields. +var externalIdFieldPolicies = map[string]fieldPolicy{ + "id": fieldPolicyAnonymize, // External identifier + "link": fieldPolicyAnonymize, // URL that may identify org + "description": fieldPolicyAnonymize, // May contain identifying info +} + +// userTraitFieldPolicies defines the anonymization policy for UserTrait fields. +var userTraitFieldPolicies = map[string]fieldPolicy{ + "emails": fieldPolicyAnonymize, // Email addresses - primary PII + "status": fieldPolicyRecurse, // Nested struct with anonymized details field + "profile": fieldPolicyClear, // Arbitrary data - cleared + "icon": fieldPolicyClear, // AssetRef - cleared (profile pictures are identifying) + "account_type": fieldPolicySafe, // Enum value + "login": fieldPolicyAnonymize, // Username/login + "login_aliases": fieldPolicyAnonymize, // Additional logins + "employee_ids": fieldPolicyAnonymize, // Employee identifiers + "created_at": fieldPolicyAnonymize, // Timestamp - set to single anonymized timestamp + "last_login": fieldPolicyAnonymize, // Timestamp - set to single anonymized timestamp + "mfa_status": fieldPolicySafe, // MFA status struct (no PII) + "sso_status": fieldPolicySafe, // SSO status struct (no PII) + "structured_name": fieldPolicyAnonymize, // Given/family/middle names +} + +// userTraitEmailFieldPolicies defines the anonymization policy for UserTrait.Email fields. +var userTraitEmailFieldPolicies = map[string]fieldPolicy{ + "address": fieldPolicyAnonymize, // Email address + "is_primary": fieldPolicySafe, // Boolean flag +} + +// userTraitStructuredNameFieldPolicies defines the anonymization policy for UserTrait.StructuredName fields. +var userTraitStructuredNameFieldPolicies = map[string]fieldPolicy{ + "given_name": fieldPolicyAnonymize, // First name + "family_name": fieldPolicyAnonymize, // Last name + "middle_names": fieldPolicyAnonymize, // Middle names + "prefix": fieldPolicyClear, // Title like "Mr.", "Dr." - can be identifying + "suffix": fieldPolicyClear, // Suffix like "Jr.", "III" - can be identifying +} + +// userTraitStatusFieldPolicies defines the anonymization policy for UserTrait.Status fields. +// NOTE: The "details" field could potentially contain PII in some implementations. +// If your connector puts PII in status details, update this policy and the anonymization code. +var userTraitStatusFieldPolicies = map[string]fieldPolicy{ + "status": fieldPolicySafe, // Enum value (ENABLED, DISABLED, etc.) + "details": fieldPolicyAnonymize, // Status details - may contain identifying info +} + +// userTraitMFAStatusFieldPolicies defines the anonymization policy for UserTrait.MFAStatus fields. +var userTraitMFAStatusFieldPolicies = map[string]fieldPolicy{ + "mfa_enabled": fieldPolicySafe, // Boolean flag +} + +// userTraitSSOStatusFieldPolicies defines the anonymization policy for UserTrait.SSOStatus fields. +var userTraitSSOStatusFieldPolicies = map[string]fieldPolicy{ + "sso_enabled": fieldPolicySafe, // Boolean flag +} + +// groupTraitFieldPolicies defines the anonymization policy for GroupTrait fields. +var groupTraitFieldPolicies = map[string]fieldPolicy{ + "icon": fieldPolicyClear, // AssetRef - cleared (profile pictures are identifying) + "profile": fieldPolicyClear, // Arbitrary data - cleared +} + +// roleTraitFieldPolicies defines the anonymization policy for RoleTrait fields. +var roleTraitFieldPolicies = map[string]fieldPolicy{ + "profile": fieldPolicyClear, // Arbitrary data - cleared +} + +// appTraitFieldPolicies defines the anonymization policy for AppTrait fields. +var appTraitFieldPolicies = map[string]fieldPolicy{ + "help_url": fieldPolicyAnonymize, // URL that may identify org + "icon": fieldPolicyClear, // AssetRef - cleared (profile pictures are identifying) + "logo": fieldPolicyClear, // AssetRef - cleared (logos are identifying) + "profile": fieldPolicyClear, // Arbitrary data - cleared + "flags": fieldPolicySafe, // Enum flags +} + +// secretTraitFieldPolicies defines the anonymization policy for SecretTrait fields. +var secretTraitFieldPolicies = map[string]fieldPolicy{ + "profile": fieldPolicyClear, // Arbitrary data - cleared + "created_at": fieldPolicyClear, // Timestamp - cleared (not used by SDK, avoids fingerprinting) + "expires_at": fieldPolicyClear, // Timestamp - cleared (not used by SDK, avoids fingerprinting) + "last_used_at": fieldPolicyClear, // Timestamp - cleared (not used by SDK, avoids fingerprinting) + "created_by_id": fieldPolicyAnonymize, // ResourceId - may identify user + "identity_id": fieldPolicyAnonymize, // ResourceId - may identify user +} + +// entitlementFieldPolicies defines the anonymization policy for Entitlement fields. +var entitlementFieldPolicies = map[string]fieldPolicy{ + "resource": fieldPolicyRecurse, // Embedded Resource - anonymized recursively + "id": fieldPolicyAnonymize, // Entitlement ID + "display_name": fieldPolicyAnonymize, // May contain identifying info + "description": fieldPolicyAnonymize, // May contain identifying info + "grantable_to": fieldPolicyRecurse, // Contains ResourceTypes that must be anonymized + "annotations": fieldPolicyRecurse, // Contains EntitlementImmutable, ExternalLink - processed + "purpose": fieldPolicySafe, // Enum value + "slug": fieldPolicyAnonymize, // May contain identifying info +} + +// grantFieldPolicies defines the anonymization policy for Grant fields. +var grantFieldPolicies = map[string]fieldPolicy{ + "entitlement": fieldPolicyRecurse, // Embedded Entitlement - anonymized recursively + "principal": fieldPolicyRecurse, // Embedded Resource - anonymized recursively + "id": fieldPolicyAnonymize, // Grant ID + "sources": fieldPolicyAnonymize, // GrantSources - keys may be identifiers + "annotations": fieldPolicyRecurse, // Contains GrantExpandable, GrantMetadata, etc. - processed +} + +// resourceTypeFieldPolicies defines the anonymization policy for ResourceType fields. +var resourceTypeFieldPolicies = map[string]fieldPolicy{ + "id": fieldPolicyAnonymize, // Type ID (optionally preserved) + "display_name": fieldPolicyAnonymize, // Type name (optionally preserved) + "traits": fieldPolicySafe, // Enum values + "annotations": fieldPolicyRecurse, // Contains ChildResourceType, ExternalLink - processed + "description": fieldPolicyAnonymize, // May contain identifying info + "sourced_externally": fieldPolicySafe, // Boolean flag +} + +// assetRefFieldPolicies defines the anonymization policy for AssetRef fields. +// AssetRef is used for icon/logo references in traits - these are cleared as icons/logos are identifying. +var assetRefFieldPolicies = map[string]fieldPolicy{ + "id": fieldPolicyClear, // Cleared - icon/logo references are identifying +} + +// grantSourcesFieldPolicies defines the anonymization policy for GrantSources fields. +var grantSourcesFieldPolicies = map[string]fieldPolicy{ + "sources": fieldPolicyAnonymize, // Map keys may be identifiers +} + +// grantSourcesGrantSourceFieldPolicies defines the anonymization policy for GrantSources.GrantSource fields. +// Note: This message currently has no fields, but coverage ensures future fields are handled. +var grantSourcesGrantSourceFieldPolicies = map[string]fieldPolicy{ + // Currently empty - no fields in GrantSources_GrantSource +} + +// grantExpandableFieldPolicies defines the anonymization policy for GrantExpandable annotation fields. +var grantExpandableFieldPolicies = map[string]fieldPolicy{ + "entitlement_ids": fieldPolicyAnonymize, // Must match anonymized entitlement IDs + "shallow": fieldPolicySafe, // Boolean flag + "resource_type_ids": fieldPolicyAnonymize, // Must match anonymized resource type IDs +} + +// grantMetadataFieldPolicies defines the anonymization policy for GrantMetadata annotation fields. +var grantMetadataFieldPolicies = map[string]fieldPolicy{ + "metadata": fieldPolicyClear, // Arbitrary data - cleared +} + +// grantImmutableFieldPolicies defines the anonymization policy for GrantImmutable annotation fields. +var grantImmutableFieldPolicies = map[string]fieldPolicy{ + "source_id": fieldPolicyAnonymize, // Source identifier - anonymized + "metadata": fieldPolicyClear, // Arbitrary data - cleared +} + +// entitlementImmutableFieldPolicies defines the anonymization policy for EntitlementImmutable annotation fields. +var entitlementImmutableFieldPolicies = map[string]fieldPolicy{ + "source_id": fieldPolicyAnonymize, // Source identifier - anonymized + "metadata": fieldPolicyClear, // Arbitrary data - cleared +} + +// externalLinkFieldPolicies defines the anonymization policy for ExternalLink annotation fields. +var externalLinkFieldPolicies = map[string]fieldPolicy{ + "url": fieldPolicyAnonymize, // URLs can identify orgs - anonymized +} + +// childResourceTypeFieldPolicies defines the anonymization policy for ChildResourceType annotation fields. +var childResourceTypeFieldPolicies = map[string]fieldPolicy{ + "resource_type_id": fieldPolicyAnonymize, // Must match anonymized resource type IDs +} + +// validateFieldCoverage checks that all fields in a message type are accounted for +// in the policy map. Returns an error if any fields are not handled. +func validateFieldCoverage(msg proto.Message, policies map[string]fieldPolicy) error { + md := msg.ProtoReflect().Descriptor() + fields := md.Fields() + + var unhandledFields []string + + for i := 0; i < fields.Len(); i++ { + field := fields.Get(i) + fieldName := string(field.Name()) + + if _, ok := policies[fieldName]; !ok { + unhandledFields = append(unhandledFields, fieldName) + } + } + + if len(unhandledFields) > 0 { + return fmt.Errorf( + "unhandled fields in %s: [%s] - these must be explicitly added to the field policy map with appropriate anonymization handling", + md.FullName(), + strings.Join(unhandledFields, ", "), + ) + } + + return nil +} + +// validateNoPolicyForMissingFields checks that the policy map doesn't have entries +// for fields that don't exist in the message (catches typos and removed fields). +func validateNoPolicyForMissingFields(msg proto.Message, policies map[string]fieldPolicy) error { + md := msg.ProtoReflect().Descriptor() + fields := md.Fields() + + // Build a set of actual field names + actualFields := make(map[string]bool) + for i := 0; i < fields.Len(); i++ { + field := fields.Get(i) + actualFields[string(field.Name())] = true + } + + var extraPolicies []string + for policyField := range policies { + if !actualFields[policyField] { + extraPolicies = append(extraPolicies, policyField) + } + } + + if len(extraPolicies) > 0 { + return fmt.Errorf( + "policy entries for non-existent fields in %s: [%s] - these fields may have been removed or renamed", + md.FullName(), + strings.Join(extraPolicies, ", "), + ) + } + + return nil +} + +// TestFieldCoverage_Resource ensures all Resource fields are handled. +func TestFieldCoverage_Resource(t *testing.T) { + err := validateFieldCoverage(&v2.Resource{}, resourceFieldPolicies) + require.NoError(t, err, "All Resource fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.Resource{}, resourceFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_ResourceId ensures all ResourceId fields are handled. +func TestFieldCoverage_ResourceId(t *testing.T) { + err := validateFieldCoverage(&v2.ResourceId{}, resourceIdFieldPolicies) + require.NoError(t, err, "All ResourceId fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.ResourceId{}, resourceIdFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_ExternalId ensures all ExternalId fields are handled. +func TestFieldCoverage_ExternalId(t *testing.T) { + err := validateFieldCoverage(&v2.ExternalId{}, externalIdFieldPolicies) + require.NoError(t, err, "All ExternalId fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.ExternalId{}, externalIdFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_UserTrait ensures all UserTrait fields are handled. +func TestFieldCoverage_UserTrait(t *testing.T) { + err := validateFieldCoverage(&v2.UserTrait{}, userTraitFieldPolicies) + require.NoError(t, err, "All UserTrait fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.UserTrait{}, userTraitFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_UserTraitEmail ensures all UserTrait.Email fields are handled. +func TestFieldCoverage_UserTraitEmail(t *testing.T) { + err := validateFieldCoverage(&v2.UserTrait_Email{}, userTraitEmailFieldPolicies) + require.NoError(t, err, "All UserTrait.Email fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.UserTrait_Email{}, userTraitEmailFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_UserTraitStructuredName ensures all UserTrait.StructuredName fields are handled. +func TestFieldCoverage_UserTraitStructuredName(t *testing.T) { + err := validateFieldCoverage(&v2.UserTrait_StructuredName{}, userTraitStructuredNameFieldPolicies) + require.NoError(t, err, "All UserTrait.StructuredName fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.UserTrait_StructuredName{}, userTraitStructuredNameFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_UserTraitStatus ensures all UserTrait.Status fields are handled. +func TestFieldCoverage_UserTraitStatus(t *testing.T) { + err := validateFieldCoverage(&v2.UserTrait_Status{}, userTraitStatusFieldPolicies) + require.NoError(t, err, "All UserTrait.Status fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.UserTrait_Status{}, userTraitStatusFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_UserTraitMFAStatus ensures all UserTrait.MFAStatus fields are handled. +func TestFieldCoverage_UserTraitMFAStatus(t *testing.T) { + err := validateFieldCoverage(&v2.UserTrait_MFAStatus{}, userTraitMFAStatusFieldPolicies) + require.NoError(t, err, "All UserTrait.MFAStatus fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.UserTrait_MFAStatus{}, userTraitMFAStatusFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_UserTraitSSOStatus ensures all UserTrait.SSOStatus fields are handled. +func TestFieldCoverage_UserTraitSSOStatus(t *testing.T) { + err := validateFieldCoverage(&v2.UserTrait_SSOStatus{}, userTraitSSOStatusFieldPolicies) + require.NoError(t, err, "All UserTrait.SSOStatus fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.UserTrait_SSOStatus{}, userTraitSSOStatusFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_GroupTrait ensures all GroupTrait fields are handled. +func TestFieldCoverage_GroupTrait(t *testing.T) { + err := validateFieldCoverage(&v2.GroupTrait{}, groupTraitFieldPolicies) + require.NoError(t, err, "All GroupTrait fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.GroupTrait{}, groupTraitFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_RoleTrait ensures all RoleTrait fields are handled. +func TestFieldCoverage_RoleTrait(t *testing.T) { + err := validateFieldCoverage(&v2.RoleTrait{}, roleTraitFieldPolicies) + require.NoError(t, err, "All RoleTrait fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.RoleTrait{}, roleTraitFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_AppTrait ensures all AppTrait fields are handled. +func TestFieldCoverage_AppTrait(t *testing.T) { + err := validateFieldCoverage(&v2.AppTrait{}, appTraitFieldPolicies) + require.NoError(t, err, "All AppTrait fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.AppTrait{}, appTraitFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_SecretTrait ensures all SecretTrait fields are handled. +func TestFieldCoverage_SecretTrait(t *testing.T) { + err := validateFieldCoverage(&v2.SecretTrait{}, secretTraitFieldPolicies) + require.NoError(t, err, "All SecretTrait fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.SecretTrait{}, secretTraitFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_Entitlement ensures all Entitlement fields are handled. +func TestFieldCoverage_Entitlement(t *testing.T) { + err := validateFieldCoverage(&v2.Entitlement{}, entitlementFieldPolicies) + require.NoError(t, err, "All Entitlement fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.Entitlement{}, entitlementFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_Grant ensures all Grant fields are handled. +func TestFieldCoverage_Grant(t *testing.T) { + err := validateFieldCoverage(&v2.Grant{}, grantFieldPolicies) + require.NoError(t, err, "All Grant fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.Grant{}, grantFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_ResourceType ensures all ResourceType fields are handled. +func TestFieldCoverage_ResourceType(t *testing.T) { + err := validateFieldCoverage(&v2.ResourceType{}, resourceTypeFieldPolicies) + require.NoError(t, err, "All ResourceType fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.ResourceType{}, resourceTypeFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_AssetRef ensures all AssetRef fields are handled. +func TestFieldCoverage_AssetRef(t *testing.T) { + err := validateFieldCoverage(&v2.AssetRef{}, assetRefFieldPolicies) + require.NoError(t, err, "All AssetRef fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.AssetRef{}, assetRefFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_GrantSources ensures all GrantSources fields are handled. +func TestFieldCoverage_GrantSources(t *testing.T) { + err := validateFieldCoverage(&v2.GrantSources{}, grantSourcesFieldPolicies) + require.NoError(t, err, "All GrantSources fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.GrantSources{}, grantSourcesFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_GrantSourcesGrantSource ensures all GrantSources.GrantSource fields are handled. +func TestFieldCoverage_GrantSourcesGrantSource(t *testing.T) { + err := validateFieldCoverage(&v2.GrantSources_GrantSource{}, grantSourcesGrantSourceFieldPolicies) + require.NoError(t, err, "All GrantSources.GrantSource fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.GrantSources_GrantSource{}, grantSourcesGrantSourceFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_GrantExpandable ensures all GrantExpandable annotation fields are handled. +func TestFieldCoverage_GrantExpandable(t *testing.T) { + err := validateFieldCoverage(&v2.GrantExpandable{}, grantExpandableFieldPolicies) + require.NoError(t, err, "All GrantExpandable fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.GrantExpandable{}, grantExpandableFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_GrantMetadata ensures all GrantMetadata annotation fields are handled. +func TestFieldCoverage_GrantMetadata(t *testing.T) { + err := validateFieldCoverage(&v2.GrantMetadata{}, grantMetadataFieldPolicies) + require.NoError(t, err, "All GrantMetadata fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.GrantMetadata{}, grantMetadataFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_GrantImmutable ensures all GrantImmutable annotation fields are handled. +func TestFieldCoverage_GrantImmutable(t *testing.T) { + err := validateFieldCoverage(&v2.GrantImmutable{}, grantImmutableFieldPolicies) + require.NoError(t, err, "All GrantImmutable fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.GrantImmutable{}, grantImmutableFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_EntitlementImmutable ensures all EntitlementImmutable annotation fields are handled. +func TestFieldCoverage_EntitlementImmutable(t *testing.T) { + err := validateFieldCoverage(&v2.EntitlementImmutable{}, entitlementImmutableFieldPolicies) + require.NoError(t, err, "All EntitlementImmutable fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.EntitlementImmutable{}, entitlementImmutableFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_ExternalLink ensures all ExternalLink annotation fields are handled. +func TestFieldCoverage_ExternalLink(t *testing.T) { + err := validateFieldCoverage(&v2.ExternalLink{}, externalLinkFieldPolicies) + require.NoError(t, err, "All ExternalLink fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.ExternalLink{}, externalLinkFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_ChildResourceType ensures all ChildResourceType annotation fields are handled. +func TestFieldCoverage_ChildResourceType(t *testing.T) { + err := validateFieldCoverage(&v2.ChildResourceType{}, childResourceTypeFieldPolicies) + require.NoError(t, err, "All ChildResourceType fields must have an anonymization policy") + + err = validateNoPolicyForMissingFields(&v2.ChildResourceType{}, childResourceTypeFieldPolicies) + require.NoError(t, err, "Policy should not reference non-existent fields") +} + +// TestFieldCoverage_AllTypes runs all field coverage tests as a single test for CI visibility. +func TestFieldCoverage_AllTypes(t *testing.T) { + tests := []struct { + name string + msg proto.Message + policies map[string]fieldPolicy + }{ + {"Resource", &v2.Resource{}, resourceFieldPolicies}, + {"ResourceId", &v2.ResourceId{}, resourceIdFieldPolicies}, + {"ExternalId", &v2.ExternalId{}, externalIdFieldPolicies}, + {"UserTrait", &v2.UserTrait{}, userTraitFieldPolicies}, + {"UserTrait_Email", &v2.UserTrait_Email{}, userTraitEmailFieldPolicies}, + {"UserTrait_StructuredName", &v2.UserTrait_StructuredName{}, userTraitStructuredNameFieldPolicies}, + {"UserTrait_Status", &v2.UserTrait_Status{}, userTraitStatusFieldPolicies}, + {"UserTrait_MFAStatus", &v2.UserTrait_MFAStatus{}, userTraitMFAStatusFieldPolicies}, + {"UserTrait_SSOStatus", &v2.UserTrait_SSOStatus{}, userTraitSSOStatusFieldPolicies}, + {"GroupTrait", &v2.GroupTrait{}, groupTraitFieldPolicies}, + {"RoleTrait", &v2.RoleTrait{}, roleTraitFieldPolicies}, + {"AppTrait", &v2.AppTrait{}, appTraitFieldPolicies}, + {"SecretTrait", &v2.SecretTrait{}, secretTraitFieldPolicies}, + {"Entitlement", &v2.Entitlement{}, entitlementFieldPolicies}, + {"Grant", &v2.Grant{}, grantFieldPolicies}, + {"ResourceType", &v2.ResourceType{}, resourceTypeFieldPolicies}, + {"AssetRef", &v2.AssetRef{}, assetRefFieldPolicies}, + {"GrantSources", &v2.GrantSources{}, grantSourcesFieldPolicies}, + {"GrantSources_GrantSource", &v2.GrantSources_GrantSource{}, grantSourcesGrantSourceFieldPolicies}, + // Annotation types + {"GrantExpandable", &v2.GrantExpandable{}, grantExpandableFieldPolicies}, + {"GrantMetadata", &v2.GrantMetadata{}, grantMetadataFieldPolicies}, + {"GrantImmutable", &v2.GrantImmutable{}, grantImmutableFieldPolicies}, + {"EntitlementImmutable", &v2.EntitlementImmutable{}, entitlementImmutableFieldPolicies}, + {"ExternalLink", &v2.ExternalLink{}, externalLinkFieldPolicies}, + {"ChildResourceType", &v2.ChildResourceType{}, childResourceTypeFieldPolicies}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validateFieldCoverage(tt.msg, tt.policies) + require.NoError(t, err, "All %s fields must have an anonymization policy", tt.name) + + err = validateNoPolicyForMissingFields(tt.msg, tt.policies) + require.NoError(t, err, "Policy for %s should not reference non-existent fields", tt.name) + }) + } +} + +// fieldPolicyToString returns a human-readable string for a field policy. +func fieldPolicyToString(fp fieldPolicy) string { + switch fp { + case fieldPolicyAnonymize: + return "ANONYMIZE" + case fieldPolicySafe: + return "SAFE" + case fieldPolicyRecurse: + return "RECURSE" + case fieldPolicyClear: + return "CLEAR" + default: + return "UNKNOWN" + } +} + +// getAnonymizedFields returns a list of fields that are marked for anonymization. +func getAnonymizedFields(policies map[string]fieldPolicy) []string { + var fields []string + for field, policy := range policies { + if policy == fieldPolicyAnonymize { + fields = append(fields, field) + } + } + return fields +} + +// TestFieldPolicy_UserTraitHasPIIFields verifies that UserTrait has expected PII fields marked. +func TestFieldPolicy_UserTraitHasPIIFields(t *testing.T) { + anonymizedFields := getAnonymizedFields(userTraitFieldPolicies) + + // These fields MUST be anonymized as they contain PII + requiredAnonymizedFields := []string{ + "emails", + "login", + "login_aliases", + "employee_ids", + "structured_name", + } + + for _, required := range requiredAnonymizedFields { + found := false + for _, actual := range anonymizedFields { + if actual == required { + found = true + break + } + } + require.True(t, found, "UserTrait field %q must be marked for anonymization", required) + } +} + +// TestFieldPolicy_ProfileFieldsAreCleared verifies that profile fields are cleared. +func TestFieldPolicy_ProfileFieldsAreCleared(t *testing.T) { + tests := []struct { + name string + policies map[string]fieldPolicy + }{ + {"UserTrait", userTraitFieldPolicies}, + {"GroupTrait", groupTraitFieldPolicies}, + {"RoleTrait", roleTraitFieldPolicies}, + {"AppTrait", appTraitFieldPolicies}, + {"SecretTrait", secretTraitFieldPolicies}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + policy, ok := tt.policies["profile"] + require.True(t, ok, "%s should have a policy for 'profile' field", tt.name) + require.Equal(t, fieldPolicyClear, policy, "%s profile field should be CLEARED, not %s", tt.name, fieldPolicyToString(policy)) + }) + } +} + +// knownNestedMessageTypes lists all nested message types that must have field coverage. +// If a new nested message type is added to the protobufs, add it here AND create a policy map. +// This ensures new nested types can't be added without explicit anonymization handling. +var knownNestedMessageTypes = map[string]bool{ + "c1.connector.v2.UserTrait.Email": true, + "c1.connector.v2.UserTrait.StructuredName": true, + "c1.connector.v2.UserTrait.Status": true, + "c1.connector.v2.UserTrait.MFAStatus": true, + "c1.connector.v2.UserTrait.SSOStatus": true, + "c1.connector.v2.AssetRef": true, + "c1.connector.v2.ResourceId": true, + "c1.connector.v2.ExternalId": true, + "c1.connector.v2.ResourceType": true, + "c1.connector.v2.Resource": true, + "c1.connector.v2.Entitlement": true, + "c1.connector.v2.GrantSources": true, + "c1.connector.v2.GrantSources.GrantSource": true, + // Well-known Google types that don't need anonymization policies + "google.protobuf.Any": true, // Handled via annotation processing + "google.protobuf.Struct": true, // Cleared as "profile" fields + "google.protobuf.Timestamp": true, // Cleared in implementation +} + +// parentTypesWithNestedMessages lists parent message types that may contain nested messages. +// These are the types we anonymize that could have new nested message fields added. +var parentTypesWithNestedMessages = []proto.Message{ + &v2.Resource{}, + &v2.ResourceType{}, + &v2.Entitlement{}, + &v2.Grant{}, + &v2.UserTrait{}, + &v2.GroupTrait{}, + &v2.RoleTrait{}, + &v2.AppTrait{}, + &v2.SecretTrait{}, +} + +// TestNestedTypeCoverage ensures all nested message types have field coverage. +// This test will fail if a new nested message type is added to the protobufs +// without being added to knownNestedMessageTypes and having a policy map created. +func TestNestedTypeCoverage(t *testing.T) { + var unknownTypes []string + + for _, parent := range parentTypesWithNestedMessages { + md := parent.ProtoReflect().Descriptor() + fields := md.Fields() + + for i := 0; i < fields.Len(); i++ { + field := fields.Get(i) + + // Check if this field is a message type + if field.Kind().String() == "message" { + msgName := string(field.Message().FullName()) + + if !knownNestedMessageTypes[msgName] { + unknownTypes = append(unknownTypes, + fmt.Sprintf("%s.%s -> %s", md.FullName(), field.Name(), msgName)) + } + } + } + } + + if len(unknownTypes) > 0 { + t.Errorf("Unknown nested message types found - add them to knownNestedMessageTypes AND create field policy maps:\n %s", + strings.Join(unknownTypes, "\n ")) + } +} diff --git a/pkg/anonymize/grant.go b/pkg/anonymize/grant.go new file mode 100644 index 000000000..beeda018c --- /dev/null +++ b/pkg/anonymize/grant.go @@ -0,0 +1,66 @@ +package anonymize + +import ( + v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2" +) + +// AnonymizeGrant anonymizes a Grant in place. +func (a *Anonymizer) AnonymizeGrant(g *v2.Grant) error { + if g == nil { + return nil + } + + // Anonymize ID + if g.GetId() != "" { + g.SetId(a.hasher.AnonymizeExternalID(g.GetId())) + } + + // Anonymize embedded entitlement + if g.HasEntitlement() { + if err := a.AnonymizeEntitlement(g.GetEntitlement()); err != nil { + return err + } + } + + // Anonymize principal (which is a Resource) + if g.HasPrincipal() { + if err := a.AnonymizeResource(g.GetPrincipal()); err != nil { + return err + } + } + + // Anonymize grant sources + if g.HasSources() { + a.anonymizeGrantSources(g.GetSources()) + } + + // Anonymize grant annotations (GrantExpandable, GrantMetadata, etc.) + if err := a.anonymizeGrantAnnotations(g); err != nil { + return err + } + + return nil +} + +// anonymizeGrantSources anonymizes GrantSources in place. +func (a *Anonymizer) anonymizeGrantSources(gs *v2.GrantSources) { + if gs == nil { + return + } + + // Grant sources contain a map of source identifiers to GrantSource objects + // The keys might be resource IDs or other identifiers that should be anonymized + sources := gs.GetSources() + if sources == nil { + return + } + + // Create a new map with anonymized keys + newSources := make(map[string]*v2.GrantSources_GrantSource) + for key, source := range sources { + // Anonymize the key (which is typically a resource identifier) + newKey := a.hasher.AnonymizeResourceID(key) + newSources[newKey] = source + } + gs.SetSources(newSources) +} diff --git a/pkg/anonymize/grant_test.go b/pkg/anonymize/grant_test.go new file mode 100644 index 000000000..6ccc856a6 --- /dev/null +++ b/pkg/anonymize/grant_test.go @@ -0,0 +1,179 @@ +package anonymize + +import ( + "testing" + + v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2" + "github.com/stretchr/testify/require" +) + +const testGrantID = "grant-123" + +func TestAnonymizeGrant_Basic(t *testing.T) { + a := newWithDefaults() + + originalGrantID := testGrantID + originalEntitlementID := "entitlement-456" + + g := &v2.Grant{ + Id: originalGrantID, + Entitlement: &v2.Entitlement{ + Id: originalEntitlementID, + DisplayName: "Admin Access", + Resource: &v2.Resource{ + Id: &v2.ResourceId{ + ResourceType: "app", + Resource: "my-app", + }, + DisplayName: "My Application", + }, + }, + Principal: &v2.Resource{ + Id: &v2.ResourceId{ + ResourceType: "user", + Resource: "john.doe@example.com", + }, + DisplayName: "John Doe", + }, + } + + err := a.AnonymizeGrant(g) + require.NoError(t, err) + + // Grant ID should be anonymized with max(32, len) chars + require.NotEqual(t, originalGrantID, g.GetId()) + require.Len(t, g.GetId(), max(32, len(originalGrantID))) + + // Entitlement should be anonymized with max(32, len) chars + require.NotEqual(t, originalEntitlementID, g.GetEntitlement().GetId()) + require.Len(t, g.GetEntitlement().GetId(), max(32, len(originalEntitlementID))) + require.NotEqual(t, "Admin Access", g.GetEntitlement().GetDisplayName()) + + // Entitlement's resource should be anonymized + require.NotEqual(t, "my-app", g.GetEntitlement().GetResource().GetId().GetResource()) + + // Principal should be anonymized + require.NotEqual(t, "john.doe@example.com", g.GetPrincipal().GetId().GetResource()) + require.NotEqual(t, "John Doe", g.GetPrincipal().GetDisplayName()) +} + +func TestAnonymizeGrant_WithSources(t *testing.T) { + a := newWithDefaults() + + g := &v2.Grant{ + Id: testGrantID, + Sources: &v2.GrantSources{ + Sources: map[string]*v2.GrantSources_GrantSource{ + "source-1": {}, + "source-2": {}, + }, + }, + } + + err := a.AnonymizeGrant(g) + require.NoError(t, err) + + // Sources should be anonymized (keys) + require.Len(t, g.GetSources().GetSources(), 2) + for key := range g.GetSources().GetSources() { + require.NotEqual(t, "source-1", key) + require.NotEqual(t, "source-2", key) + } +} + +func TestAnonymizeGrant_NilGrant(t *testing.T) { + a := newWithDefaults() + + err := a.AnonymizeGrant(nil) + require.NoError(t, err) +} + +func TestAnonymizeGrant_NilEntitlement(t *testing.T) { + a := newWithDefaults() + + originalGrantID := testGrantID + originalResource := "user-123" + + g := &v2.Grant{ + Id: originalGrantID, + Entitlement: nil, + Principal: &v2.Resource{ + Id: &v2.ResourceId{ + ResourceType: "user", + Resource: originalResource, + }, + DisplayName: "Test User", + }, + } + + err := a.AnonymizeGrant(g) + require.NoError(t, err) + + // Should still anonymize other fields with max(32, len) chars + require.NotEqual(t, originalGrantID, g.GetId()) + require.Len(t, g.GetId(), max(32, len(originalGrantID))) + require.NotEqual(t, originalResource, g.GetPrincipal().GetId().GetResource()) + require.Len(t, g.GetPrincipal().GetId().GetResource(), max(32, len(originalResource))) +} + +func TestAnonymizeGrant_NilPrincipal(t *testing.T) { + a := newWithDefaults() + + originalGrantID := testGrantID + originalEntitlementID := "entitlement-456" + + g := &v2.Grant{ + Id: originalGrantID, + Entitlement: &v2.Entitlement{ + Id: originalEntitlementID, + DisplayName: "Test Entitlement", + }, + Principal: nil, + } + + err := a.AnonymizeGrant(g) + require.NoError(t, err) + + // Should still anonymize other fields with max(32, len) chars + require.NotEqual(t, originalGrantID, g.GetId()) + require.Len(t, g.GetId(), max(32, len(originalGrantID))) + require.NotEqual(t, originalEntitlementID, g.GetEntitlement().GetId()) + require.Len(t, g.GetEntitlement().GetId(), max(32, len(originalEntitlementID))) +} + +func TestAnonymizeGrant_Deterministic(t *testing.T) { + a := newWithDefaults() + + g1 := &v2.Grant{ + Id: testGrantID, + Principal: &v2.Resource{ + Id: &v2.ResourceId{ + ResourceType: "user", + Resource: "user-123", + }, + DisplayName: "Test User", + }, + } + + g2 := &v2.Grant{ + Id: testGrantID, + Principal: &v2.Resource{ + Id: &v2.ResourceId{ + ResourceType: "user", + Resource: "user-123", + }, + DisplayName: "Test User", + }, + } + + err := a.AnonymizeGrant(g1) + require.NoError(t, err) + + err = a.AnonymizeGrant(g2) + require.NoError(t, err) + + // Same input should produce same output + require.Equal(t, g1.GetId(), g2.GetId()) + require.Equal(t, g1.GetPrincipal().GetId().GetResource(), g2.GetPrincipal().GetId().GetResource()) + require.Equal(t, g1.GetPrincipal().GetDisplayName(), g2.GetPrincipal().GetDisplayName()) +} diff --git a/pkg/anonymize/misc.go b/pkg/anonymize/misc.go new file mode 100644 index 000000000..a3f8b88a0 --- /dev/null +++ b/pkg/anonymize/misc.go @@ -0,0 +1,50 @@ +package anonymize + +import ( + v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2" +) + +// AnonymizeResourceType anonymizes a ResourceType in place. +func (a *Anonymizer) AnonymizeResourceType(rt *v2.ResourceType) error { + if rt == nil { + return nil + } + + // Anonymize ID - uses same hashing as ResourceId.resource_type for consistency + if rt.GetId() != "" { + rt.SetId(a.hasher.AnonymizeResourceType(rt.GetId())) + } + + // Anonymize display name + if rt.GetDisplayName() != "" { + rt.SetDisplayName(a.hasher.AnonymizeDisplayName(rt.GetDisplayName())) + } + + // Anonymize description + if rt.GetDescription() != "" { + rt.SetDescription("[ANONYMIZED]") + } + + // Anonymize annotations (ChildResourceType, ExternalLink, etc.) + if err := a.anonymizeResourceTypeAnnotations(rt); err != nil { + return err + } + + // Note: Traits are enum values, not PII + // Note: SourcedExternally is a boolean flag, not PII + + return nil +} + +// ShouldDeleteAssets returns true because all assets are deleted during anonymization. +// Assets (icons, logos, etc.) can contain identifying information. +func (a *Anonymizer) ShouldDeleteAssets() bool { + return true +} + +// ShouldClearSessionStore returns whether the session store should be cleared. +func (a *Anonymizer) ShouldClearSessionStore() bool { + // Session store always contains potentially sensitive cached data + // and should be cleared during anonymization. + return true +} diff --git a/pkg/anonymize/misc_test.go b/pkg/anonymize/misc_test.go new file mode 100644 index 000000000..4e246ac01 --- /dev/null +++ b/pkg/anonymize/misc_test.go @@ -0,0 +1,73 @@ +package anonymize + +import ( + "testing" + + v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2" + "github.com/stretchr/testify/require" +) + +func TestAnonymizeResourceType_Basic(t *testing.T) { + a := newWithDefaults() + + originalID := "user" + originalDisplayName := "User" + + rt := &v2.ResourceType{ + Id: originalID, + DisplayName: originalDisplayName, + Description: "A user in the system", + Traits: []v2.ResourceType_Trait{ + v2.ResourceType_TRAIT_USER, + }, + } + + err := a.AnonymizeResourceType(rt) + require.NoError(t, err) + + // ID should be anonymized with 8 char hash + require.NotEqual(t, originalID, rt.GetId()) + require.Len(t, rt.GetId(), 8) + + // Display name should be anonymized with 16 char hash + require.NotEqual(t, originalDisplayName, rt.GetDisplayName()) + require.Len(t, rt.GetDisplayName(), 16) + + // Description should be anonymized + require.Equal(t, "[ANONYMIZED]", rt.GetDescription()) + + // Traits should be preserved + require.Len(t, rt.GetTraits(), 1) + require.Equal(t, v2.ResourceType_TRAIT_USER, rt.GetTraits()[0]) +} + +func TestAnonymizeResourceType_NilResourceType(t *testing.T) { + a := newWithDefaults() + + err := a.AnonymizeResourceType(nil) + require.NoError(t, err) +} + +func TestAnonymizeResourceType_Deterministic(t *testing.T) { + a := newWithDefaults() + + rt1 := &v2.ResourceType{ + Id: "user", + DisplayName: "User", + } + + rt2 := &v2.ResourceType{ + Id: "user", + DisplayName: "User", + } + + err := a.AnonymizeResourceType(rt1) + require.NoError(t, err) + + err = a.AnonymizeResourceType(rt2) + require.NoError(t, err) + + // Same input should produce same output + require.Equal(t, rt1.GetId(), rt2.GetId()) + require.Equal(t, rt1.GetDisplayName(), rt2.GetDisplayName()) +} diff --git a/pkg/anonymize/processor.go b/pkg/anonymize/processor.go new file mode 100644 index 000000000..823468b84 --- /dev/null +++ b/pkg/anonymize/processor.go @@ -0,0 +1,284 @@ +package anonymize + +import ( + "context" + "fmt" + "os" + + v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2" + reader_v2 "github.com/conductorone/baton-sdk/pb/c1/reader/v2" + "github.com/conductorone/baton-sdk/pkg/connectorstore" + "github.com/conductorone/baton-sdk/pkg/dotc1z" +) + +// ProcessorStats contains statistics about the anonymization process. +type ProcessorStats struct { + ResourceTypesProcessed int + ResourcesProcessed int + EntitlementsProcessed int + GrantsProcessed int + AssetsDeleted bool + SessionsCleared bool + SyncRunsCleared bool +} + +// AnonymizeC1ZFile anonymizes a c1z file and writes the result to the output path. +// If outputPath is empty, it creates a file with ".anonymized" suffix. +func (a *Anonymizer) AnonymizeC1ZFile(ctx context.Context, inputPath string, outputPath string) (*ProcessorStats, error) { + if outputPath == "" { + outputPath = inputPath + ".anonymized" + } + + // Open the input file read-only + inputFile, err := dotc1z.NewC1ZFile(ctx, inputPath, dotc1z.WithReadOnly(true)) + if err != nil { + return nil, fmt.Errorf("failed to open input c1z file: %w", err) + } + defer inputFile.Close() + + // Create the output file + outputFile, err := dotc1z.NewC1ZFile(ctx, outputPath) + if err != nil { + return nil, fmt.Errorf("failed to create output c1z file: %w", err) + } + + stats := &ProcessorStats{} + + // Process each sync run from the input file + if err := a.processC1File(ctx, inputFile, outputFile, stats); err != nil { + _ = outputFile.Close() + _ = os.Remove(outputPath) + return nil, err + } + + // Anonymize the timestamps on all sync runs in the output file + if err := outputFile.ClearSyncRunTimestamps(ctx); err != nil { + _ = outputFile.Close() + _ = os.Remove(outputPath) + return nil, fmt.Errorf("failed to anonymize sync run timestamps: %w", err) + } + stats.SyncRunsCleared = true + + // Close the output file (this saves the changes) + if err := outputFile.Close(); err != nil { + _ = os.Remove(outputPath) + return nil, fmt.Errorf("failed to save anonymized file: %w", err) + } + + return stats, nil +} + +// processC1File processes all data from the input file and writes anonymized data to the output file. +// It iterates through each sync run in the input and creates a corresponding sync in the output. +func (a *Anonymizer) processC1File(ctx context.Context, input *dotc1z.C1File, output *dotc1z.C1File, stats *ProcessorStats) error { + // List all sync runs from the input file + // pageToken := "" + // for { + latestSync, err := input.GetLatestFinishedSync(ctx, reader_v2.SyncsReaderServiceGetLatestFinishedSyncRequest_builder{ + SyncType: string(connectorstore.SyncTypeAny), + }.Build()) + if err != nil { + return fmt.Errorf("failed to get latest finished sync: %w", err) + } + + // Set the input file to view this specific sync + if err := input.ViewSync(ctx, latestSync.GetSync().GetId()); err != nil { + return fmt.Errorf("failed to set view sync: %w", err) + } + + // Start a new sync in the output file with the same type + syncType := connectorstore.SyncType(latestSync.GetSync().GetSyncType()) + if _, err := output.StartNewSync(ctx, syncType, ""); err != nil { + return fmt.Errorf("failed to start sync in output file: %w", err) + } + + err = a.processSyncData(ctx, input, output, stats) + if err != nil { + return fmt.Errorf("failed to process sync data: %w", err) + } + + // End the sync in the output file + if err := output.EndSync(ctx); err != nil { + return fmt.Errorf("failed to end sync in output file: %w", err) + } + + // Note: Assets and sessions are NOT copied to the output file. + // The output file starts fresh without these potentially identifying data. + stats.AssetsDeleted = true + stats.SessionsCleared = true + + return nil +} + +// processSyncData processes all data types for the current sync. +func (a *Anonymizer) processSyncData(ctx context.Context, input *dotc1z.C1File, output *dotc1z.C1File, stats *ProcessorStats) error { + // Process resource types + if err := a.processResourceTypes(ctx, input, output, stats); err != nil { + return fmt.Errorf("failed to process resource types: %w", err) + } + + // Process resources + if err := a.processResources(ctx, input, output, stats); err != nil { + return fmt.Errorf("failed to process resources: %w", err) + } + + // Process entitlements + if err := a.processEntitlements(ctx, input, output, stats); err != nil { + return fmt.Errorf("failed to process entitlements: %w", err) + } + + // Process grants + if err := a.processGrants(ctx, input, output, stats); err != nil { + return fmt.Errorf("failed to process grants: %w", err) + } + + return nil +} + +// processResourceTypes reads resource types from input, anonymizes them, and writes to output. +func (a *Anonymizer) processResourceTypes(ctx context.Context, input *dotc1z.C1File, output *dotc1z.C1File, stats *ProcessorStats) error { + pageToken := "" + for { + req := v2.ResourceTypesServiceListResourceTypesRequest_builder{ + PageSize: 1000, + PageToken: pageToken, + }.Build() + + resp, err := input.ListResourceTypes(ctx, req) + if err != nil { + return err + } + + slice := resp.GetList() + for _, rt := range slice { + if err := a.AnonymizeResourceType(rt); err != nil { + return err + } + stats.ResourceTypesProcessed++ + } + + // Write the anonymized resource types to output + if len(slice) > 0 { + if err := output.PutResourceTypes(ctx, slice...); err != nil { + return err + } + } + + pageToken = resp.GetNextPageToken() + if pageToken == "" { + break + } + } + return nil +} + +// processResources reads resources from input, anonymizes them, and writes to output. +func (a *Anonymizer) processResources(ctx context.Context, input *dotc1z.C1File, output *dotc1z.C1File, stats *ProcessorStats) error { + pageToken := "" + for { + req := v2.ResourcesServiceListResourcesRequest_builder{ + PageSize: 1000, + PageToken: pageToken, + }.Build() + + resp, err := input.ListResources(ctx, req) + if err != nil { + return err + } + + slice := resp.GetList() + for _, r := range slice { + if err := a.AnonymizeResource(r); err != nil { + return err + } + stats.ResourcesProcessed++ + } + + // Write the anonymized resources to output + if len(slice) > 0 { + if err := output.PutResources(ctx, slice...); err != nil { + return err + } + } + + pageToken = resp.GetNextPageToken() + if pageToken == "" { + break + } + } + return nil +} + +// processEntitlements reads entitlements from input, anonymizes them, and writes to output. +func (a *Anonymizer) processEntitlements(ctx context.Context, input *dotc1z.C1File, output *dotc1z.C1File, stats *ProcessorStats) error { + pageToken := "" + for { + req := v2.EntitlementsServiceListEntitlementsRequest_builder{ + PageSize: 1000, + PageToken: pageToken, + }.Build() + + resp, err := input.ListEntitlements(ctx, req) + if err != nil { + return err + } + + slice := resp.GetList() + for _, e := range slice { + if err := a.AnonymizeEntitlement(e); err != nil { + return err + } + stats.EntitlementsProcessed++ + } + + // Write the anonymized entitlements to output + if len(slice) > 0 { + if err := output.PutEntitlements(ctx, slice...); err != nil { + return err + } + } + + pageToken = resp.GetNextPageToken() + if pageToken == "" { + break + } + } + return nil +} + +// processGrants reads grants from input, anonymizes them, and writes to output. +func (a *Anonymizer) processGrants(ctx context.Context, input *dotc1z.C1File, output *dotc1z.C1File, stats *ProcessorStats) error { + pageToken := "" + for { + req := v2.GrantsServiceListGrantsRequest_builder{ + PageSize: 1000, + PageToken: pageToken, + }.Build() + + resp, err := input.ListGrants(ctx, req) + if err != nil { + return err + } + + slice := resp.GetList() + for _, g := range slice { + if err := a.AnonymizeGrant(g); err != nil { + return err + } + stats.GrantsProcessed++ + } + + // Write the anonymized grants to output + if len(slice) > 0 { + if err := output.PutGrants(ctx, slice...); err != nil { + return err + } + } + + pageToken = resp.GetNextPageToken() + if pageToken == "" { + break + } + } + return nil +} diff --git a/pkg/anonymize/processor_test.go b/pkg/anonymize/processor_test.go new file mode 100644 index 000000000..7c5320d51 --- /dev/null +++ b/pkg/anonymize/processor_test.go @@ -0,0 +1,360 @@ +package anonymize + +import ( + "context" + "os" + "path/filepath" + "testing" + + v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2" + "github.com/conductorone/baton-sdk/pkg/connectorstore" + "github.com/conductorone/baton-sdk/pkg/dotc1z" + "github.com/stretchr/testify/require" +) + +// TestAnonymizeC1ZFile_EndToEnd verifies that AnonymizeC1ZFile produces +// an anonymized c1z file on disk with all PII properly anonymized. +func TestAnonymizeC1ZFile_EndToEnd(t *testing.T) { + ctx := context.Background() + tempDir := t.TempDir() + + // Original test data + const ( + originalResourceType = "user" + originalResourceID = "john.doe@example.com" + originalDisplayName = "John Doe" + originalDescription = "A test user for engineering team" + + originalEntitlementID = "admin-access-entitlement" + originalEntitlementName = "Admin Access" + originalEntitlementDesc = "Full administrative permissions" + originalEntitlementSlug = "admin-access" + + originalGrantID = "grant-john-admin" + ) + + inputPath := filepath.Join(tempDir, "input.c1z") + + // Step 1: Create a c1z file with known PII data + createTestC1ZFile(t, ctx, inputPath, + originalResourceType, originalResourceID, originalDisplayName, originalDescription, + originalEntitlementID, originalEntitlementName, originalEntitlementDesc, originalEntitlementSlug, + originalGrantID, + ) + + // Step 2: Anonymize the file + outputPath := filepath.Join(tempDir, "output.c1z.anonymized") + anonymizer := New(Config{Salt: "test-salt-for-e2e"}) + stats, err := anonymizer.AnonymizeC1ZFile(ctx, inputPath, outputPath) + require.NoError(t, err, "AnonymizeC1ZFile should succeed") + // // Verify stats + require.Equal(t, 1, stats.ResourceTypesProcessed, "Should process 1 resource type") + require.Equal(t, 1, stats.ResourcesProcessed, "Should process 1 resource") + require.Equal(t, 1, stats.EntitlementsProcessed, "Should process 1 entitlement") + require.Equal(t, 1, stats.GrantsProcessed, "Should process 1 grant") + require.True(t, stats.AssetsDeleted, "Assets should be deleted") + require.True(t, stats.SessionsCleared, "Sessions should be cleared") + require.True(t, stats.SyncRunsCleared, "Sync runs should be cleared") + + // Step 4: Open the anonymized file and verify data is anonymized + verifyAnonymizedC1ZFile(t, ctx, outputPath, + originalResourceType, originalResourceID, originalDisplayName, originalDescription, + originalEntitlementID, originalEntitlementName, originalEntitlementDesc, originalEntitlementSlug, + originalGrantID, + ) +} + +// TestAnonymizeC1ZFile_DefaultOutputPath verifies that when outputPath is empty, +// the file is created with ".anonymized" suffix. +func TestAnonymizeC1ZFile_DefaultOutputPath(t *testing.T) { + ctx := context.Background() + tempDir := t.TempDir() + + inputPath := filepath.Join(tempDir, "input.c1z") + expectedOutputPath := inputPath + ".anonymized" + + // Create a minimal c1z file + createMinimalC1ZFile(t, ctx, inputPath) + + // Anonymize with empty output path + anonymizer := New(Config{Salt: "test-salt"}) + _, err := anonymizer.AnonymizeC1ZFile(ctx, inputPath, "") + require.NoError(t, err) + + // Verify the default output path was used + _, err = os.Stat(expectedOutputPath) + require.NoError(t, err, "Output file should be created at input.c1z.anonymized") +} + +// TestAnonymizeC1ZFile_Deterministic verifies that anonymizing the same file +// twice with the same salt produces identical results. +func TestAnonymizeC1ZFile_Deterministic(t *testing.T) { + ctx := context.Background() + tempDir := t.TempDir() + + inputPath := filepath.Join(tempDir, "input.c1z") + outputPath1 := filepath.Join(tempDir, "output1.c1z") + outputPath2 := filepath.Join(tempDir, "output2.c1z") + + // Create a c1z file + createMinimalC1ZFile(t, ctx, inputPath) + + // Anonymize twice with the same salt + salt := "deterministic-salt" + anonymizer := New(Config{Salt: salt}) + + _, err := anonymizer.AnonymizeC1ZFile(ctx, inputPath, outputPath1) + require.NoError(t, err) + + _, err = anonymizer.AnonymizeC1ZFile(ctx, inputPath, outputPath2) + require.NoError(t, err) + + // Open both files and compare the anonymized display names + c1f1, err := dotc1z.NewC1ZFile(ctx, outputPath1, dotc1z.WithReadOnly(true)) + require.NoError(t, err) + defer c1f1.Close() + + c1f2, err := dotc1z.NewC1ZFile(ctx, outputPath2, dotc1z.WithReadOnly(true)) + require.NoError(t, err) + defer c1f2.Close() + + resp1, err := c1f1.ListResourceTypes(ctx, v2.ResourceTypesServiceListResourceTypesRequest_builder{PageSize: 100}.Build()) + require.NoError(t, err) + + resp2, err := c1f2.ListResourceTypes(ctx, v2.ResourceTypesServiceListResourceTypesRequest_builder{PageSize: 100}.Build()) + require.NoError(t, err) + + require.Equal(t, len(resp1.GetList()), len(resp2.GetList())) + for i := range resp1.GetList() { + require.Equal(t, resp1.GetList()[i].GetId(), resp2.GetList()[i].GetId(), + "Same salt should produce same anonymized IDs") + require.Equal(t, resp1.GetList()[i].GetDisplayName(), resp2.GetList()[i].GetDisplayName(), + "Same salt should produce same anonymized display names") + } +} + +// TestAnonymizeC1ZFile_DifferentSalts verifies that different salts produce +// different anonymized output. +func TestAnonymizeC1ZFile_DifferentSalts(t *testing.T) { + ctx := context.Background() + tempDir := t.TempDir() + + inputPath := filepath.Join(tempDir, "input.c1z") + outputPath1 := filepath.Join(tempDir, "output1.c1z") + outputPath2 := filepath.Join(tempDir, "output2.c1z") + + // Create a c1z file + createMinimalC1ZFile(t, ctx, inputPath) + + // Anonymize with different salts + anonymizer1 := New(Config{Salt: "salt-one"}) + anonymizer2 := New(Config{Salt: "salt-two"}) + + _, err := anonymizer1.AnonymizeC1ZFile(ctx, inputPath, outputPath1) + require.NoError(t, err) + + _, err = anonymizer2.AnonymizeC1ZFile(ctx, inputPath, outputPath2) + require.NoError(t, err) + + // Open both files and verify the anonymized data is different + c1f1, err := dotc1z.NewC1ZFile(ctx, outputPath1, dotc1z.WithReadOnly(true)) + require.NoError(t, err) + defer c1f1.Close() + + c1f2, err := dotc1z.NewC1ZFile(ctx, outputPath2, dotc1z.WithReadOnly(true)) + require.NoError(t, err) + defer c1f2.Close() + + resp1, err := c1f1.ListResourceTypes(ctx, v2.ResourceTypesServiceListResourceTypesRequest_builder{PageSize: 100}.Build()) + require.NoError(t, err) + + resp2, err := c1f2.ListResourceTypes(ctx, v2.ResourceTypesServiceListResourceTypesRequest_builder{PageSize: 100}.Build()) + require.NoError(t, err) + + require.Equal(t, len(resp1.GetList()), len(resp2.GetList())) + require.NotEqual(t, resp1.GetList()[0].GetId(), resp2.GetList()[0].GetId(), + "Different salts should produce different anonymized IDs") +} + +// TestAnonymizeC1ZFile_InvalidInputPath verifies error handling for non-existent input file. +func TestAnonymizeC1ZFile_InvalidInputPath(t *testing.T) { + ctx := context.Background() + tempDir := t.TempDir() + + inputPath := filepath.Join(tempDir, "does-not-exist.c1z") + outputPath := filepath.Join(tempDir, "output.c1z") + + anonymizer := New(Config{Salt: "test-salt"}) + _, err := anonymizer.AnonymizeC1ZFile(ctx, inputPath, outputPath) + require.Error(t, err, "Should fail for non-existent input file") +} + +// createTestC1ZFile creates a c1z file with comprehensive test data. +func createTestC1ZFile(t *testing.T, ctx context.Context, path string, + resourceType, resourceID, displayName, description string, + entitlementID, entitlementName, entitlementDesc, entitlementSlug string, + grantID string, +) { + t.Helper() + + c1f, err := dotc1z.NewC1ZFile(ctx, path) + require.NoError(t, err) + + // Start a sync + _, err = c1f.StartNewSync(ctx, connectorstore.SyncTypeFull, "") + require.NoError(t, err) + + // Add resource type + err = c1f.PutResourceTypes(ctx, v2.ResourceType_builder{ + Id: resourceType, + DisplayName: resourceType + " type", + }.Build()) + require.NoError(t, err) + + // Add resource + resource := v2.Resource_builder{ + Id: v2.ResourceId_builder{ + ResourceType: resourceType, + Resource: resourceID, + }.Build(), + DisplayName: displayName, + Description: description, + }.Build() + err = c1f.PutResources(ctx, resource) + require.NoError(t, err) + + // Add entitlement + entitlement := v2.Entitlement_builder{ + Id: entitlementID, + DisplayName: entitlementName, + Description: entitlementDesc, + Slug: entitlementSlug, + Resource: v2.Resource_builder{ + Id: v2.ResourceId_builder{ + ResourceType: resourceType, + Resource: resourceID, + }.Build(), + }.Build(), + }.Build() + err = c1f.PutEntitlements(ctx, entitlement) + require.NoError(t, err) + + // Add grant + grant := v2.Grant_builder{ + Id: grantID, + Principal: v2.Resource_builder{ + Id: v2.ResourceId_builder{ + ResourceType: resourceType, + Resource: resourceID, + }.Build(), + }.Build(), + Entitlement: v2.Entitlement_builder{ + Id: entitlementID, + Resource: v2.Resource_builder{ + Id: v2.ResourceId_builder{ + ResourceType: resourceType, + Resource: resourceID, + }.Build(), + }.Build(), + }.Build(), + }.Build() + err = c1f.PutGrants(ctx, grant) + require.NoError(t, err) + + // End sync and close + err = c1f.EndSync(ctx) + require.NoError(t, err) + + err = c1f.Close() + require.NoError(t, err) +} + +// createMinimalC1ZFile creates a minimal c1z file for simple tests. +func createMinimalC1ZFile(t *testing.T, ctx context.Context, path string) { + t.Helper() + + c1f, err := dotc1z.NewC1ZFile(ctx, path) + require.NoError(t, err) + + _, err = c1f.StartNewSync(ctx, connectorstore.SyncTypeFull, "") + require.NoError(t, err) + + err = c1f.PutResourceTypes(ctx, v2.ResourceType_builder{ + Id: "test-type", + DisplayName: "Test Type", + }.Build()) + require.NoError(t, err) + + err = c1f.EndSync(ctx) + require.NoError(t, err) + + err = c1f.Close() + require.NoError(t, err) +} + +// verifyAnonymizedC1ZFile opens the anonymized file and verifies all data is properly anonymized. +func verifyAnonymizedC1ZFile(t *testing.T, ctx context.Context, path string, + originalResourceType, originalResourceID, originalDisplayName, originalDescription string, + originalEntitlementID, originalEntitlementName, originalEntitlementDesc, originalEntitlementSlug string, + originalGrantID string, +) { + t.Helper() + + c1f, err := dotc1z.NewC1ZFile(ctx, path, dotc1z.WithReadOnly(true)) + require.NoError(t, err) + defer c1f.Close() + + // Verify resource types are anonymized + rtResp, err := c1f.ListResourceTypes(ctx, v2.ResourceTypesServiceListResourceTypesRequest_builder{PageSize: 100}.Build()) + require.NoError(t, err) + require.Len(t, rtResp.GetList(), 1) + + rt := rtResp.GetList()[0] + require.NotEqual(t, originalResourceType, rt.GetId(), "Resource type ID should be anonymized") + require.Len(t, rt.GetId(), 8, "Resource type ID should be 8 chars (HashN with 8)") + + // Verify resources are anonymized + rResp, err := c1f.ListResources(ctx, v2.ResourcesServiceListResourcesRequest_builder{PageSize: 100}.Build()) + require.NoError(t, err) + require.Len(t, rResp.GetList(), 1) + + r := rResp.GetList()[0] + require.NotEqual(t, originalDisplayName, r.GetDisplayName(), "Resource display name should be anonymized") + require.Len(t, r.GetDisplayName(), 16, "Resource display name should be 16 chars (HashN with 16)") + require.NotEqual(t, originalDescription, r.GetDescription(), "Resource description should be anonymized") + require.NotEqual(t, originalResourceID, r.GetId().GetResource(), "Resource ID should be anonymized") + require.Len(t, r.GetId().GetResource(), max(32, len(originalResourceID)), "Resource ID length should be max(32, original)") + + // Verify entitlements are anonymized + eResp, err := c1f.ListEntitlements(ctx, v2.EntitlementsServiceListEntitlementsRequest_builder{PageSize: 100}.Build()) + require.NoError(t, err) + require.Len(t, eResp.GetList(), 1) + + e := eResp.GetList()[0] + require.NotEqual(t, originalEntitlementID, e.GetId(), "Entitlement ID should be anonymized") + require.Len(t, e.GetId(), max(32, len(originalEntitlementID)), "Entitlement ID length should be max(32, original)") + require.NotEqual(t, originalEntitlementName, e.GetDisplayName(), "Entitlement display name should be anonymized") + require.Len(t, e.GetDisplayName(), 16, "Entitlement display name should be 16 chars") + require.NotEqual(t, originalEntitlementDesc, e.GetDescription(), "Entitlement description should be anonymized") + require.NotEqual(t, originalEntitlementSlug, e.GetSlug(), "Entitlement slug should be anonymized") + require.Len(t, e.GetSlug(), 12, "Entitlement slug should be 12 chars") + + // Verify grants are anonymized + gResp, err := c1f.ListGrants(ctx, v2.GrantsServiceListGrantsRequest_builder{PageSize: 100}.Build()) + require.NoError(t, err) + require.Len(t, gResp.GetList(), 1) + + g := gResp.GetList()[0] + require.NotEqual(t, originalGrantID, g.GetId(), "Grant ID should be anonymized") + require.Len(t, g.GetId(), max(32, len(originalGrantID)), "Grant ID length should be max(32, original)") + + // Verify the principal resource within the grant is anonymized + require.NotNil(t, g.GetPrincipal()) + require.NotEqual(t, originalResourceID, g.GetPrincipal().GetId().GetResource(), + "Grant principal resource ID should be anonymized") + + // Verify the entitlement within the grant is anonymized + require.NotNil(t, g.GetEntitlement()) + require.NotEqual(t, originalEntitlementID, g.GetEntitlement().GetId(), + "Grant entitlement ID should be anonymized") +} diff --git a/pkg/anonymize/resource.go b/pkg/anonymize/resource.go new file mode 100644 index 000000000..35a637417 --- /dev/null +++ b/pkg/anonymize/resource.go @@ -0,0 +1,305 @@ +package anonymize + +import ( + v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2" + "google.golang.org/protobuf/types/known/anypb" +) + +// AnonymizeResource anonymizes a Resource in place. +func (a *Anonymizer) AnonymizeResource(r *v2.Resource) error { + if r == nil { + return nil + } + + // Anonymize display name + if r.GetDisplayName() != "" { + r.SetDisplayName(a.hasher.AnonymizeDisplayName(r.GetDisplayName())) + } + + // Anonymize description + if r.GetDescription() != "" { + r.SetDescription("[ANONYMIZED]") + } + + // Anonymize resource ID + if r.HasId() { + a.anonymizeResourceID(r.GetId()) + } + + // Anonymize parent resource ID + if r.HasParentResourceId() { + a.anonymizeResourceID(r.GetParentResourceId()) + } + + // Anonymize external ID + if r.HasExternalId() { + a.anonymizeExternalID(r.GetExternalId()) + } + + // Anonymize trait annotations + if err := a.anonymizeResourceAnnotations(r); err != nil { + return err + } + + return nil +} + +// anonymizeResourceID anonymizes a ResourceId in place. +func (a *Anonymizer) anonymizeResourceID(rid *v2.ResourceId) { + if rid == nil { + return + } + if rid.GetResource() != "" { + rid.SetResource(a.hasher.AnonymizeResourceID(rid.GetResource())) + } + // Anonymize resource type - uses same hashing as ResourceType.Id for consistency + if rid.GetResourceType() != "" { + rid.SetResourceType(a.hasher.AnonymizeResourceType(rid.GetResourceType())) + } +} + +// anonymizeExternalID anonymizes an ExternalId in place. +func (a *Anonymizer) anonymizeExternalID(eid *v2.ExternalId) { + if eid == nil { + return + } + if eid.GetId() != "" { + eid.SetId(a.hasher.AnonymizeExternalID(eid.GetId())) + } + if eid.GetLink() != "" { + eid.SetLink(a.hasher.AnonymizeURL(eid.GetLink())) + } + if eid.GetDescription() != "" { + eid.SetDescription("[ANONYMIZED]") + } +} + +// anonymizeResourceAnnotations processes all annotations on a Resource, +// anonymizing known types and dropping unknown types. +func (a *Anonymizer) anonymizeResourceAnnotations(r *v2.Resource) error { + var result []*anypb.Any + for _, ann := range r.GetAnnotations() { + if processed, err := a.processResourceAnnotation(ann); err != nil { + return err + } else if processed != nil { + result = append(result, processed) + } + } + + r.SetAnnotations(result) + return nil +} + +// processResourceAnnotation processes a single annotation from a Resource. +// Returns nil if the annotation should be dropped. +func (a *Anonymizer) processResourceAnnotation(ann *anypb.Any) (*anypb.Any, error) { + // UserTrait + ut := &v2.UserTrait{} + if ann.MessageIs(ut) { + if err := ann.UnmarshalTo(ut); err != nil { + return nil, err + } + a.anonymizeUserTrait(ut) + return anypb.New(ut) + } + + // GroupTrait + gt := &v2.GroupTrait{} + if ann.MessageIs(gt) { + if err := ann.UnmarshalTo(gt); err != nil { + return nil, err + } + a.anonymizeGroupTrait(gt) + return anypb.New(gt) + } + + // RoleTrait + rt := &v2.RoleTrait{} + if ann.MessageIs(rt) { + if err := ann.UnmarshalTo(rt); err != nil { + return nil, err + } + a.anonymizeRoleTrait(rt) + return anypb.New(rt) + } + + // AppTrait + at := &v2.AppTrait{} + if ann.MessageIs(at) { + if err := ann.UnmarshalTo(at); err != nil { + return nil, err + } + a.anonymizeAppTrait(at) + return anypb.New(at) + } + + // SecretTrait + st := &v2.SecretTrait{} + if ann.MessageIs(st) { + if err := ann.UnmarshalTo(st); err != nil { + return nil, err + } + a.anonymizeSecretTrait(st) + return anypb.New(st) + } + + // ExternalLink - anonymize URL + externalLink := &v2.ExternalLink{} + if ann.MessageIs(externalLink) { + if err := ann.UnmarshalTo(externalLink); err != nil { + return nil, err + } + if externalLink.GetUrl() != "" { + externalLink.SetUrl(a.hasher.AnonymizeURL(externalLink.GetUrl())) + } + return anypb.New(externalLink) + } + + // Unknown type - drop it + return nil, nil +} + +// anonymizeUserTrait anonymizes a UserTrait in place. +func (a *Anonymizer) anonymizeUserTrait(ut *v2.UserTrait) { + if ut == nil { + return + } + + // Anonymize emails + for _, email := range ut.GetEmails() { + if email.GetAddress() != "" { + email.SetAddress(a.hasher.AnonymizeEmail(email.GetAddress())) + } + } + + // Anonymize login + if ut.GetLogin() != "" { + ut.SetLogin(a.hasher.AnonymizeLogin(ut.GetLogin())) + } + + // Anonymize login aliases + aliases := ut.GetLoginAliases() + for i, alias := range aliases { + if alias != "" { + aliases[i] = a.hasher.AnonymizeLogin(alias) + } + } + ut.SetLoginAliases(aliases) + + // Anonymize employee IDs + empIDs := ut.GetEmployeeIds() + for i, empID := range empIDs { + if empID != "" { + empIDs[i] = a.hasher.AnonymizeEmployeeID(empID) + } + } + ut.SetEmployeeIds(empIDs) + + // Anonymize structured name + if ut.HasStructuredName() { + sn := ut.GetStructuredName() + if sn.GetGivenName() != "" { + sn.SetGivenName(a.hasher.AnonymizeGivenName(sn.GetGivenName())) + } + if sn.GetFamilyName() != "" { + sn.SetFamilyName(a.hasher.AnonymizeFamilyName(sn.GetFamilyName())) + } + middleNames := sn.GetMiddleNames() + for i, mn := range middleNames { + if mn != "" { + middleNames[i] = a.hasher.AnonymizeMiddleName(mn) + } + } + sn.SetMiddleNames(middleNames) + // Clear prefix/suffix - can be identifying when combined with other data + sn.SetPrefix("") + sn.SetSuffix("") + } + + // Clear profile (may contain arbitrary PII) + ut.ClearProfile() + + // Clear icon (profile pictures are identifying) + ut.ClearIcon() + + // Anonymize status details (may contain identifying information) + if ut.HasStatus() && ut.GetStatus().GetDetails() != "" { + ut.GetStatus().SetDetails("[ANONYMIZED]") + } + + // Set timestamps to single anonymized timestamp to avoid fingerprinting + ut.SetCreatedAt(a.AnonymizedTimestamp()) + ut.SetLastLogin(a.AnonymizedTimestamp()) + + // Note: Status enum, AccountType, MfaStatus, SsoStatus are kept + // as they don't typically contain PII and are useful for analysis +} + +// anonymizeGroupTrait anonymizes a GroupTrait in place. +func (a *Anonymizer) anonymizeGroupTrait(gt *v2.GroupTrait) { + if gt == nil { + return + } + + // Clear profile (may contain arbitrary PII) + gt.ClearProfile() + + // Clear icon (profile pictures are identifying) + gt.ClearIcon() +} + +// anonymizeRoleTrait anonymizes a RoleTrait in place. +func (a *Anonymizer) anonymizeRoleTrait(rt *v2.RoleTrait) { + if rt == nil { + return + } + + // Clear profile (may contain arbitrary PII) + rt.ClearProfile() +} + +// anonymizeAppTrait anonymizes an AppTrait in place. +func (a *Anonymizer) anonymizeAppTrait(at *v2.AppTrait) { + if at == nil { + return + } + + // Anonymize help URL + if at.GetHelpUrl() != "" { + at.SetHelpUrl(a.hasher.AnonymizeURL(at.GetHelpUrl())) + } + + // Clear profile (may contain arbitrary PII) + at.ClearProfile() + + // Clear icon and logo (these are identifying) + at.ClearIcon() + at.ClearLogo() + + // Note: Flags are kept as they don't contain PII +} + +// anonymizeSecretTrait anonymizes a SecretTrait in place. +func (a *Anonymizer) anonymizeSecretTrait(st *v2.SecretTrait) { + if st == nil { + return + } + + // Clear profile (may contain arbitrary PII) + st.ClearProfile() + + // Anonymize CreatedById + if st.HasCreatedById() { + a.anonymizeResourceID(st.GetCreatedById()) + } + + // Anonymize IdentityId + if st.HasIdentityId() { + a.anonymizeResourceID(st.GetIdentityId()) + } + + // Set timestamps to single anonymized timestamp to avoid fingerprinting + st.SetCreatedAt(a.AnonymizedTimestamp()) + st.SetExpiresAt(a.AnonymizedTimestamp()) + st.SetLastUsedAt(a.AnonymizedTimestamp()) +} diff --git a/pkg/anonymize/resource_test.go b/pkg/anonymize/resource_test.go new file mode 100644 index 000000000..7e0293c55 --- /dev/null +++ b/pkg/anonymize/resource_test.go @@ -0,0 +1,310 @@ +package anonymize + +import ( + "testing" + + v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2" + "github.com/conductorone/baton-sdk/pkg/annotations" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/structpb" +) + +func TestAnonymizeResource_Basic(t *testing.T) { + a := newWithDefaults() + + originalType := "user" + originalResource := "john.doe@example.com" + originalDisplayName := "John Doe" + + r := &v2.Resource{ + Id: &v2.ResourceId{ + ResourceType: originalType, + Resource: originalResource, + }, + DisplayName: originalDisplayName, + Description: "Test user description", + } + + err := a.AnonymizeResource(r) + require.NoError(t, err) + + // Display name should be anonymized with 16 char hash + require.NotEqual(t, originalDisplayName, r.GetDisplayName()) + require.Len(t, r.GetDisplayName(), 16) + + // Description should be anonymized + require.Equal(t, "[ANONYMIZED]", r.GetDescription()) + + // Resource ID should be fully anonymized + require.NotEqual(t, originalType, r.GetId().GetResourceType()) + require.Len(t, r.GetId().GetResourceType(), 8) + require.NotEqual(t, originalResource, r.GetId().GetResource()) + require.Len(t, r.GetId().GetResource(), max(32, len(originalResource))) +} + +func TestAnonymizeResource_WithUserTrait(t *testing.T) { + a := newWithDefaults() + + originalEmail1 := "john.doe@example.com" + originalEmail2 := "jdoe@work.com" + originalLogin := "johndoe" + originalAlias1 := "jd" + originalAlias2 := "john.d" + originalEmpID1 := "EMP001" + originalEmpID2 := "EMP002" + originalGivenName := "John" + originalFamilyName := "Doe" + originalMiddleName := "Robert" + + // Create a resource with UserTrait + userTrait := &v2.UserTrait{ + Emails: []*v2.UserTrait_Email{ + {Address: originalEmail1, IsPrimary: true}, + {Address: originalEmail2, IsPrimary: false}, + }, + Login: originalLogin, + LoginAliases: []string{originalAlias1, originalAlias2}, + EmployeeIds: []string{originalEmpID1, originalEmpID2}, + Profile: func() *structpb.Struct { + s, _ := structpb.NewStruct(map[string]interface{}{ + "department": "Engineering", + "manager": "Jane Smith", + }) + return s + }(), + StructuredName: &v2.UserTrait_StructuredName{ + GivenName: originalGivenName, + FamilyName: originalFamilyName, + MiddleNames: []string{originalMiddleName}, + }, + } + + annos := annotations.New(userTrait) + r := &v2.Resource{ + Id: &v2.ResourceId{ + ResourceType: "user", + Resource: originalEmail1, + }, + DisplayName: "John Doe", + Annotations: annos, + } + + err := a.AnonymizeResource(r) + require.NoError(t, err) + + // Check that UserTrait was anonymized + ut := &v2.UserTrait{} + updatedAnnos := annotations.Annotations(r.GetAnnotations()) + found, err := updatedAnnos.Pick(ut) + require.NoError(t, err) + require.True(t, found) + + // Emails should be anonymized with hash@hash format (33 chars: 16 + @ + 16) + require.Len(t, ut.GetEmails(), 2) + require.NotEqual(t, originalEmail1, ut.GetEmails()[0].GetAddress()) + require.Len(t, ut.GetEmails()[0].GetAddress(), 33) + require.NotEqual(t, originalEmail2, ut.GetEmails()[1].GetAddress()) + require.Len(t, ut.GetEmails()[1].GetAddress(), 33) + + // Login should be anonymized with max(32, len) chars + require.NotEqual(t, originalLogin, ut.GetLogin()) + require.Len(t, ut.GetLogin(), max(32, len(originalLogin))) + + // Login aliases should be anonymized with max(32, len) chars + require.NotEqual(t, originalAlias1, ut.GetLoginAliases()[0]) + require.Len(t, ut.GetLoginAliases()[0], max(32, len(originalAlias1))) + require.NotEqual(t, originalAlias2, ut.GetLoginAliases()[1]) + require.Len(t, ut.GetLoginAliases()[1], max(32, len(originalAlias2))) + + // Employee IDs should be anonymized with max(32, len) chars + require.NotEqual(t, originalEmpID1, ut.GetEmployeeIds()[0]) + require.Len(t, ut.GetEmployeeIds()[0], max(32, len(originalEmpID1))) + require.NotEqual(t, originalEmpID2, ut.GetEmployeeIds()[1]) + require.Len(t, ut.GetEmployeeIds()[1], max(32, len(originalEmpID2))) + + // Profile should be cleared + require.False(t, ut.HasProfile()) + + // Structured name should be anonymized with 16 char hash + require.True(t, ut.HasStructuredName()) + require.NotEqual(t, originalGivenName, ut.GetStructuredName().GetGivenName()) + require.Len(t, ut.GetStructuredName().GetGivenName(), 16) + require.NotEqual(t, originalFamilyName, ut.GetStructuredName().GetFamilyName()) + require.Len(t, ut.GetStructuredName().GetFamilyName(), 16) + require.Len(t, ut.GetStructuredName().GetMiddleNames(), 1) + require.NotEqual(t, originalMiddleName, ut.GetStructuredName().GetMiddleNames()[0]) + require.Len(t, ut.GetStructuredName().GetMiddleNames()[0], 16) +} + +func TestAnonymizeResource_WithGroupTrait(t *testing.T) { + a := newWithDefaults() + + originalDisplayName := "Engineering Team" + + groupTrait := &v2.GroupTrait{ + Profile: func() *structpb.Struct { + s, _ := structpb.NewStruct(map[string]interface{}{ + "department": "Engineering", + }) + return s + }(), + } + + annos := annotations.New(groupTrait) + r := &v2.Resource{ + Id: &v2.ResourceId{ + ResourceType: "group", + Resource: "engineering-team", + }, + DisplayName: originalDisplayName, + Annotations: annos, + } + + err := a.AnonymizeResource(r) + require.NoError(t, err) + + // Check that GroupTrait profile was cleared + gt := &v2.GroupTrait{} + updatedAnnos := annotations.Annotations(r.GetAnnotations()) + found, err := updatedAnnos.Pick(gt) + require.NoError(t, err) + require.True(t, found) + require.False(t, gt.HasProfile()) + + // Display name should be anonymized with 16 char hash + require.NotEqual(t, originalDisplayName, r.GetDisplayName()) + require.Len(t, r.GetDisplayName(), 16) +} + +func TestAnonymizeResource_WithAppTrait(t *testing.T) { + a := newWithDefaults() + + appTrait := &v2.AppTrait{ + HelpUrl: "https://help.example.com/docs", + Profile: func() *structpb.Struct { + s, _ := structpb.NewStruct(map[string]interface{}{ + "version": "1.0", + }) + return s + }(), + } + + annos := annotations.New(appTrait) + r := &v2.Resource{ + Id: &v2.ResourceId{ + ResourceType: "app", + Resource: "my-app", + }, + DisplayName: "My Application", + Annotations: annos, + } + + err := a.AnonymizeResource(r) + require.NoError(t, err) + + // Check that AppTrait was anonymized + at := &v2.AppTrait{} + updatedAnnos := annotations.Annotations(r.GetAnnotations()) + found, err := updatedAnnos.Pick(at) + require.NoError(t, err) + require.True(t, found) + + // HelpUrl should be anonymized + require.Contains(t, at.GetHelpUrl(), "https://example.com/") + + // Profile should be cleared + require.False(t, at.HasProfile()) +} + +func TestAnonymizeResource_WithExternalID(t *testing.T) { + a := newWithDefaults() + + originalExtID := "external-123" + + r := &v2.Resource{ + Id: &v2.ResourceId{ + ResourceType: "user", + Resource: "user-123", + }, + DisplayName: "Test User", + ExternalId: &v2.ExternalId{ + Id: originalExtID, + Link: "https://example.com/user/123", + Description: "External user link", + }, + } + + err := a.AnonymizeResource(r) + require.NoError(t, err) + + // External ID should be anonymized with max(32, len) chars + require.NotEqual(t, originalExtID, r.GetExternalId().GetId()) + require.Len(t, r.GetExternalId().GetId(), max(32, len(originalExtID))) + require.Contains(t, r.GetExternalId().GetLink(), "https://example.com/") + require.Equal(t, "[ANONYMIZED]", r.GetExternalId().GetDescription()) +} + +func TestAnonymizeResource_WithParentResourceID(t *testing.T) { + a := newWithDefaults() + + originalParentType := "organization" + originalParentResource := "org-456" + + r := &v2.Resource{ + Id: &v2.ResourceId{ + ResourceType: "user", + Resource: "user-123", + }, + ParentResourceId: &v2.ResourceId{ + ResourceType: originalParentType, + Resource: originalParentResource, + }, + DisplayName: "Test User", + } + + err := a.AnonymizeResource(r) + require.NoError(t, err) + + // Parent resource ID should be fully anonymized + require.NotEqual(t, originalParentType, r.GetParentResourceId().GetResourceType()) + require.Len(t, r.GetParentResourceId().GetResourceType(), 8) + require.NotEqual(t, originalParentResource, r.GetParentResourceId().GetResource()) + require.Len(t, r.GetParentResourceId().GetResource(), max(32, len(originalParentResource))) +} + +func TestAnonymizeResource_NilResource(t *testing.T) { + a := newWithDefaults() + + err := a.AnonymizeResource(nil) + require.NoError(t, err) +} + +func TestAnonymizeResource_Deterministic(t *testing.T) { + a := newWithDefaults() + + r1 := &v2.Resource{ + Id: &v2.ResourceId{ + ResourceType: "user", + Resource: "john.doe@example.com", + }, + DisplayName: "John Doe", + } + + r2 := &v2.Resource{ + Id: &v2.ResourceId{ + ResourceType: "user", + Resource: "john.doe@example.com", + }, + DisplayName: "John Doe", + } + + err := a.AnonymizeResource(r1) + require.NoError(t, err) + + err = a.AnonymizeResource(r2) + require.NoError(t, err) + + // Same input should produce same output + require.Equal(t, r1.GetDisplayName(), r2.GetDisplayName()) + require.Equal(t, r1.GetId().GetResource(), r2.GetId().GetResource()) +} diff --git a/pkg/anonymize/table_coverage_test.go b/pkg/anonymize/table_coverage_test.go new file mode 100644 index 000000000..49c58bbf2 --- /dev/null +++ b/pkg/anonymize/table_coverage_test.go @@ -0,0 +1,57 @@ +package anonymize + +import ( + "testing" + + "github.com/conductorone/baton-sdk/pkg/dotc1z" + "github.com/stretchr/testify/require" +) + +// knownAnonymizedTables lists all tables that are handled by the anonymization processor. +// When adding a new table to the database, you MUST add it here and implement +// the appropriate anonymization logic in processor.go. +// Note: Table names include version prefixes (e.g., "v1_resources"). +var knownAnonymizedTables = map[string]string{ + "v1_resource_types": "processResourceTypes - anonymizes all fields", + "v1_resources": "processResources - anonymizes all fields", + "v1_entitlements": "processEntitlements - anonymizes all fields", + "v1_grants": "processGrants - anonymizes all fields", + "v1_assets": "processAssets - deletes all assets", + "v1_connector_sessions": "processSessions - clears all sessions", + "v1_sync_runs": "processSyncRuns - clears timestamps", +} + +// TestTableCoverage ensures all database tables are handled by the anonymization processor. +// If this test fails after adding a new table, you MUST: +// 1. Add the table to knownAnonymizedTables with a description of how it's handled. +// 2. Implement the anonymization logic in processor.go. +func TestTableCoverage(t *testing.T) { + allTables := dotc1z.AllTableNames() + require.NotEmpty(t, allTables, "Expected AllTableNames() to return table names") + + for _, table := range allTables { + description, ok := knownAnonymizedTables[table] + require.True(t, ok, + "Table %q is not handled by anonymization! "+ + "Add anonymization logic in processor.go and register it in knownAnonymizedTables", + table) + require.NotEmpty(t, description, + "Table %q has empty description in knownAnonymizedTables", table) + } +} + +// TestNoOrphanedTableHandlers ensures we don't have handlers for non-existent tables. +func TestNoOrphanedTableHandlers(t *testing.T) { + allTables := dotc1z.AllTableNames() + require.NotEmpty(t, allTables, "Expected AllTableNames() to return table names") + + tableSet := make(map[string]bool) + for _, table := range allTables { + tableSet[table] = true + } + + for table := range knownAnonymizedTables { + require.True(t, tableSet[table], + "knownAnonymizedTables references non-existent table %q - remove it", table) + } +} diff --git a/pkg/dotc1z/anonymize_helpers.go b/pkg/dotc1z/anonymize_helpers.go new file mode 100644 index 000000000..8ca023380 --- /dev/null +++ b/pkg/dotc1z/anonymize_helpers.go @@ -0,0 +1,91 @@ +package dotc1z + +import ( + "context" + "time" + + "github.com/doug-martin/goqu/v9" +) + +// ClearAssets removes all assets from the database. +func (c *C1File) ClearAssets(ctx context.Context) error { + ctx, span := tracer.Start(ctx, "C1File.ClearAssets") + defer span.End() + + err := c.validateDb(ctx) + if err != nil { + return err + } + + q := c.db.Delete(assets.Name()) + query, args, err := q.ToSQL() + if err != nil { + return err + } + + _, err = c.db.ExecContext(ctx, query, args...) + if err != nil { + return err + } + + c.dbUpdated = true + return nil +} + +// ClearAllSessions removes all session data from the database. +func (c *C1File) ClearAllSessions(ctx context.Context) error { + ctx, span := tracer.Start(ctx, "C1File.ClearAllSessions") + defer span.End() + + err := c.validateDb(ctx) + if err != nil { + return err + } + + q := c.db.Delete(sessionStore.Name()) + query, args, err := q.ToSQL() + if err != nil { + return err + } + + _, err = c.db.ExecContext(ctx, query, args...) + if err != nil { + return err + } + + c.dbUpdated = true + return nil +} + +// ClearSyncRunTimestamps clears all timestamps from sync runs. +// This removes timing information that could potentially be used to identify when syncs occurred. +func (c *C1File) ClearSyncRunTimestamps(ctx context.Context) error { + ctx, span := tracer.Start(ctx, "C1File.ClearSyncRunTimestamps") + defer span.End() + + err := c.validateDb(ctx) + if err != nil { + return err + } + + // Clear timestamps by setting them to a fixed epoch value + epoch := time.Unix(0, 0).Format("2006-01-02 15:04:05.999999999") + q := c.db.Update(syncRuns.Name()) + q = q.Set(goqu.Record{ + "started_at": epoch, + "ended_at": epoch, + }) + + query, args, err := q.ToSQL() + if err != nil { + return err + } + + _, err = c.db.ExecContext(ctx, query, args...) + if err != nil { + return err + } + + c.dbUpdated = true + return nil +} diff --git a/pkg/dotc1z/sql_helpers.go b/pkg/dotc1z/sql_helpers.go index 8b6e0bfc9..54a1c4e55 100644 --- a/pkg/dotc1z/sql_helpers.go +++ b/pkg/dotc1z/sql_helpers.go @@ -39,6 +39,17 @@ var allTableDescriptors = []tableDescriptor{ sessionStore, } +// AllTableNames returns the names of all tables in the c1z database schema. +// This is the single source of truth for table names, used by tests to ensure +// all tables are properly handled during anonymization. +func AllTableNames() []string { + names := make([]string, len(allTableDescriptors)) + for i, t := range allTableDescriptors { + names[i] = t.Name() + } + return names +} + type tableDescriptor interface { Name() string Schema() (string, []any)