diff --git a/CLAUDE.md b/CLAUDE.md index 74656da..d1fc134 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -26,9 +26,9 @@ tests/ Go 1.23+: Follow standard conventions ## Recent Changes +- 013-message-include-filter: Added Go 1.25+ + `github.com/emicklei/proto` v1.14.3, `gopkg.in/yaml.v3` - 012-fix-include-unannotated: Added Go 1.25+ + `github.com/emicklei/proto` v1.14.3, `gopkg.in/yaml.v3` - 011-combined-include-exclude: Added Go 1.25+ (existing project) + `github.com/emicklei/proto` v1.14.3, `github.com/emicklei/proto-contrib` v0.18.3, `gopkg.in/yaml.v3` -- 010-substitution-placeholders: Added Go 1.25+ + `github.com/emicklei/proto` v1.14.3, `gopkg.in/yaml.v3` diff --git a/internal/filter/filter.go b/internal/filter/filter.go index 88c01c8..2906131 100644 --- a/internal/filter/filter.go +++ b/internal/filter/filter.go @@ -339,6 +339,165 @@ func IncludeMethodsByAnnotation(def *proto.Proto, annotations []string) int { return removed } +// CollectIncludeMessageRoots returns the FQNs of messages and enums that +// have a matching include annotation. These can be passed to +// RemoveOrphanedDefinitions as pinned roots to prevent them from being +// removed as orphans. +func CollectIncludeMessageRoots(def *proto.Proto, annotations []string) map[string]bool { + if len(annotations) == 0 { + return nil + } + annotSet := make(map[string]bool, len(annotations)) + for _, a := range annotations { + annotSet[a] = true + } + + pkg := "" + for _, elem := range def.Elements { + if p, ok := elem.(*proto.Package); ok { + pkg = p.Name + break + } + } + + roots := make(map[string]bool) + for _, elem := range def.Elements { + switch v := elem.(type) { + case *proto.Message: + for _, a := range ExtractAnnotations(v.Comment) { + if annotSet[a] { + roots[qualifiedName(pkg, v.Name)] = true + break + } + } + case *proto.Enum: + for _, a := range ExtractAnnotations(v.Comment) { + if annotSet[a] { + roots[qualifiedName(pkg, v.Name)] = true + break + } + } + } + } + return roots +} + +// IncludeMessagesByAnnotation removes top-level messages and enums from the +// proto AST whose comments do NOT contain any of the specified include +// annotations and are not referenced (directly or transitively) by an +// annotated message. Non-message/non-enum elements pass through unchanged. +// Returns the number of removed messages/enums. +func IncludeMessagesByAnnotation(def *proto.Proto, annotations []string) int { + if len(annotations) == 0 { + return 0 + } + annotSet := make(map[string]bool, len(annotations)) + for _, a := range annotations { + annotSet[a] = true + } + + // Extract package name for qualified name resolution + pkg := "" + for _, elem := range def.Elements { + if p, ok := elem.(*proto.Package); ok { + pkg = p.Name + break + } + } + + // Phase 1: Identify annotated messages/enums as roots, and collect + // all types referenced by remaining services (services are not filtered here) + roots := make(map[string]bool) + for _, elem := range def.Elements { + switch v := elem.(type) { + case *proto.Message: + annots := ExtractAnnotations(v.Comment) + for _, a := range annots { + if annotSet[a] { + roots[qualifiedName(pkg, v.Name)] = true + break + } + } + case *proto.Enum: + annots := ExtractAnnotations(v.Comment) + for _, a := range annots { + if annotSet[a] { + roots[qualifiedName(pkg, v.Name)] = true + break + } + } + case *proto.Service: + // Services are kept/removed by IncludeServicesByAnnotation. + // Any service still in the AST is kept — collect its references. + for _, svcElem := range v.Elements { + if rpc, ok := svcElem.(*proto.RPC); ok { + if rpc.RequestType != "" { + roots[qualifiedName(pkg, rpc.RequestType)] = true + } + if rpc.ReturnsType != "" { + roots[qualifiedName(pkg, rpc.ReturnsType)] = true + } + } + } + } + } + + // Phase 2: Collect transitive dependencies of roots + keep := make(map[string]bool) + for fqn := range roots { + keep[fqn] = true + } + + // Iteratively collect references from kept messages + for { + refs := make(map[string]bool) + for _, elem := range def.Elements { + if msg, ok := elem.(*proto.Message); ok { + fqn := qualifiedName(pkg, msg.Name) + if keep[fqn] { + collectMessageRefs(refs, pkg, msg) + } + } + } + added := false + for ref := range refs { + if !keep[ref] { + keep[ref] = true + added = true + } + } + if !added { + break + } + } + + // Phase 3: Remove messages/enums not in the keep set + filtered := make([]proto.Visitee, 0, len(def.Elements)) + removed := 0 + for _, elem := range def.Elements { + switch v := elem.(type) { + case *proto.Message: + fqn := qualifiedName(pkg, v.Name) + if keep[fqn] { + filtered = append(filtered, elem) + } else { + removed++ + } + case *proto.Enum: + fqn := qualifiedName(pkg, v.Name) + if keep[fqn] { + filtered = append(filtered, elem) + } else { + removed++ + } + default: + filtered = append(filtered, elem) + } + } + def.Elements = filtered + return removed +} + // FilterFieldsByAnnotation removes individual message fields from the proto // AST whose comments contain any of the specified annotations. Handles // NormalField, MapField, and OneOfField (within Oneof containers). Also @@ -519,11 +678,19 @@ func isUserType(typeName string) bool { // RemoveOrphanedDefinitions iteratively removes messages and enums // that are no longer referenced by any remaining RPC method or message. +// Optional pinned FQNs are always kept (treated as roots). // Returns the total count of removed definitions. -func RemoveOrphanedDefinitions(def *proto.Proto, pkg string) int { +func RemoveOrphanedDefinitions(def *proto.Proto, pkg string, pinned ...map[string]bool) int { + var pinnedSet map[string]bool + if len(pinned) > 0 { + pinnedSet = pinned[0] + } totalRemoved := 0 for { refs := CollectReferencedTypes(def, pkg) + for fqn := range pinnedSet { + refs[fqn] = true + } filtered := make([]proto.Visitee, 0, len(def.Elements)) removed := 0 diff --git a/internal/filter/filter_test.go b/internal/filter/filter_test.go index ab14540..5619d81 100644 --- a/internal/filter/filter_test.go +++ b/internal/filter/filter_test.go @@ -2989,6 +2989,102 @@ func TestCombinedSameAnnotationInBothLists(t *testing.T) { } } +// --- Include Message Filtering Tests (Feature 013) --- + +// T003: Basic include message filtering +func TestIncludeMessagesByAnnotation(t *testing.T) { + def := &proto.Proto{ + Elements: []proto.Visitee{ + &proto.Message{ + Name: "AnnotatedMessage", + Comment: &proto.Comment{ + Lines: []string{" [PublishedApi]"}, + }, + }, + &proto.Message{ + Name: "UnannotatedMessage", + }, + &proto.Enum{ + Name: "AnnotatedEnum", + Comment: &proto.Comment{ + Lines: []string{" [PublishedApi]"}, + }, + }, + }, + } + + removed := IncludeMessagesByAnnotation(def, []string{"PublishedApi"}) + if removed != 1 { + t.Errorf("expected 1 removed (UnannotatedMessage), got %d", removed) + } + + // Check remaining elements + names := make(map[string]bool) + for _, elem := range def.Elements { + switch v := elem.(type) { + case *proto.Message: + names[v.Name] = true + case *proto.Enum: + names[v.Name] = true + } + } + if !names["AnnotatedMessage"] { + t.Error("AnnotatedMessage should remain (has [PublishedApi])") + } + if names["UnannotatedMessage"] { + t.Error("UnannotatedMessage should be removed (no annotations)") + } + if !names["AnnotatedEnum"] { + t.Error("AnnotatedEnum should remain (has [PublishedApi])") + } +} + +// T004: All messages removed when none match +func TestIncludeMessagesByAnnotation_NoAnnotations(t *testing.T) { + def := &proto.Proto{ + Elements: []proto.Visitee{ + &proto.Message{Name: "Msg1"}, + &proto.Message{Name: "Msg2"}, + }, + } + + removed := IncludeMessagesByAnnotation(def, []string{"PublishedApi"}) + if removed != 2 { + t.Errorf("expected 2 removed, got %d", removed) + } + + for _, elem := range def.Elements { + if _, ok := elem.(*proto.Message); ok { + t.Error("no messages should remain") + } + } +} + +// T005: Empty annotation list returns 0 +func TestIncludeMessagesByAnnotation_EmptyList(t *testing.T) { + def := &proto.Proto{ + Elements: []proto.Visitee{ + &proto.Message{Name: "Msg1"}, + &proto.Message{Name: "Msg2"}, + }, + } + + removed := IncludeMessagesByAnnotation(def, []string{}) + if removed != 0 { + t.Errorf("expected 0 removed for empty annotation list, got %d", removed) + } + + count := 0 + for _, elem := range def.Elements { + if _, ok := elem.(*proto.Message); ok { + count++ + } + } + if count != 2 { + t.Errorf("expected 2 messages to remain, got %d", count) + } +} + func testdataDir(t *testing.T, sub string) string { t.Helper() dir, err := filepath.Abs(filepath.Join("..", "..", "testdata", sub)) diff --git a/main.go b/main.go index 0f2016d..4df54cf 100644 --- a/main.go +++ b/main.go @@ -169,11 +169,12 @@ func run() int { // Pass 1: Prune, filter, convert block comments, collect annotations type processedFile struct { - pf parsedFile + pf parsedFile skip bool // true if file has no remaining definitions after filtering } processed := make([]processedFile, 0, len(parsed)) servicesRemoved := 0 + messagesRemoved := 0 methodsRemoved := 0 fieldsRemoved := 0 orphansRemoved := 0 @@ -190,9 +191,12 @@ func run() int { skip := false // Annotation-based filtering if cfg != nil && cfg.HasAnnotations() { - var sr, mr, fr int + var sr, msgr, mr, fr int + var includeRoots map[string]bool if cfg.HasAnnotationInclude() { sr += filter.IncludeServicesByAnnotation(pf.def, cfg.Annotations.Include) + includeRoots = filter.CollectIncludeMessageRoots(pf.def, cfg.Annotations.Include) + msgr += filter.IncludeMessagesByAnnotation(pf.def, cfg.Annotations.Include) if !cfg.HasAnnotationExclude() { // Include-only mode: also filter methods by include annotations mr += filter.IncludeMethodsByAnnotation(pf.def, cfg.Annotations.Include) @@ -204,11 +208,12 @@ func run() int { fr += filter.FilterFieldsByAnnotation(pf.def, cfg.Annotations.Exclude) } servicesRemoved += sr + messagesRemoved += msgr methodsRemoved += mr fieldsRemoved += fr filter.RemoveEmptyServices(pf.def) if sr > 0 || mr > 0 || fr > 0 { - orphansRemoved += filter.RemoveOrphanedDefinitions(pf.def, pf.pkg) + orphansRemoved += filter.RemoveOrphanedDefinitions(pf.def, pf.pkg, includeRoots) } if !filter.HasRemainingDefinitions(pf.def) { @@ -290,7 +295,7 @@ func run() int { fmt.Fprintf(os.Stderr, "proto-filter: processed %d files, %d definitions\n", len(files), totalDefs) fmt.Fprintf(os.Stderr, "proto-filter: included %d definitions, excluded %d\n", includedCount, excludedCount) if cfg != nil && cfg.HasAnnotations() { - fmt.Fprintf(os.Stderr, "proto-filter: removed %d services by annotation, %d methods by annotation, %d fields by annotation, %d orphaned definitions\n", servicesRemoved, methodsRemoved, fieldsRemoved, orphansRemoved) + fmt.Fprintf(os.Stderr, "proto-filter: removed %d services by annotation, %d messages by annotation, %d methods by annotation, %d fields by annotation, %d orphaned definitions\n", servicesRemoved, messagesRemoved, methodsRemoved, fieldsRemoved, orphansRemoved) } if cfg != nil && cfg.HasSubstitutions() { fmt.Fprintf(os.Stderr, "proto-filter: substituted %d annotations\n", substitutionCount) diff --git a/main_test.go b/main_test.go index 3fd5eec..848ffd9 100644 --- a/main_test.go +++ b/main_test.go @@ -1887,3 +1887,204 @@ strict_substitutions: true t.Errorf("stderr should mention 'Unknown', got: %s", stderr) } } + +// --- Message Include Filtering CLI Tests (Feature 013) --- + +// T007: Messages-only file, no matches → empty output +func TestIncludeMessageOnlyCLI(t *testing.T) { + bin := buildBinary(t) + inputDir := t.TempDir() + outDir := t.TempDir() + cfgDir := t.TempDir() + + protoContent := `syntax = "proto3"; +package msgonly; + +message Msg1 { + string name = 1; +} + +message Msg2 { + uint64 id = 1; +} + +message Msg3 { + bool flag = 1; +} +` + os.WriteFile(filepath.Join(inputDir, "messages.proto"), []byte(protoContent), 0o644) + + cfgPath := filepath.Join(cfgDir, "filter.yaml") + os.WriteFile(cfgPath, []byte("annotations:\n include:\n - \"NotUsed\"\n"), 0o644) + + stderr, code := runBinary(t, bin, + "--input", inputDir, + "--output", outDir, + "--config", cfgPath, + ) + if code != 0 { + t.Fatalf("expected exit code 0, got %d; stderr: %s", code, stderr) + } + + // Output file should not exist (no messages match) + outFile := filepath.Join(outDir, "messages.proto") + if _, err := os.Stat(outFile); err == nil { + content, _ := os.ReadFile(outFile) + t.Errorf("output file should not exist (no messages have [NotUsed]), but got:\n%s", string(content)) + } +} + +// T008: Annotated message + dependencies kept, golden file comparison +func TestIncludeAnnotatedMessageCLI(t *testing.T) { + bin := buildBinary(t) + outDir := t.TempDir() + cfgDir := t.TempDir() + + cfgPath := filepath.Join(cfgDir, "filter.yaml") + os.WriteFile(cfgPath, []byte("annotations:\n include:\n - \"PublishedApi\"\n"), 0o644) + + stderr, code := runBinary(t, bin, + "--input", testdataDir(t, "messages"), + "--output", outDir, + "--config", cfgPath, + ) + if code != 0 { + t.Fatalf("expected exit code 0, got %d; stderr: %s", code, stderr) + } + + // Compare output against golden file + actual, err := os.ReadFile(filepath.Join(outDir, "messages_annotated.proto")) + if err != nil { + t.Fatalf("output file missing: %v", err) + } + expected, err := os.ReadFile(filepath.Join(testdataDir(t, "messages"), "expected", "messages_annotated.proto")) + if err != nil { + t.Fatalf("golden file missing: %v", err) + } + if string(actual) != string(expected) { + t.Errorf("output does not match golden file\n--- ACTUAL ---\n%s\n--- EXPECTED ---\n%s", string(actual), string(expected)) + } +} + +// T009: Mixed services + messages, unreferenced unannotated message removed +func TestIncludeMessageMixedCLI(t *testing.T) { + bin := buildBinary(t) + inputDir := t.TempDir() + outDir := t.TempDir() + cfgDir := t.TempDir() + + protoContent := `syntax = "proto3"; +package mixed; + +// [PublicApi] +service OrderService { + // [PublicApi] + rpc GetOrder(GetOrderRequest) returns (GetOrderResponse); +} + +message GetOrderRequest { + uint64 id = 1; +} + +message GetOrderResponse { + string name = 1; +} + +message AuditLog { + string data = 1; +} +` + os.WriteFile(filepath.Join(inputDir, "service.proto"), []byte(protoContent), 0o644) + + cfgPath := filepath.Join(cfgDir, "filter.yaml") + os.WriteFile(cfgPath, []byte("annotations:\n include:\n - \"PublicApi\"\n"), 0o644) + + stderr, code := runBinary(t, bin, + "--input", inputDir, + "--output", outDir, + "--config", cfgPath, + ) + if code != 0 { + t.Fatalf("expected exit code 0, got %d; stderr: %s", code, stderr) + } + + outData, err := os.ReadFile(filepath.Join(outDir, "service.proto")) + if err != nil { + t.Fatalf("output file missing: %v", err) + } + output := string(outData) + + // OrderService and its dependencies should be kept + if !strings.Contains(output, "OrderService") { + t.Errorf("output should contain OrderService, got:\n%s", output) + } + if !strings.Contains(output, "GetOrderRequest") { + t.Errorf("output should contain GetOrderRequest (service dep), got:\n%s", output) + } + if !strings.Contains(output, "GetOrderResponse") { + t.Errorf("output should contain GetOrderResponse (service dep), got:\n%s", output) + } + // AuditLog should be removed (unannotated, unreferenced) + if strings.Contains(output, "AuditLog") { + t.Errorf("output should NOT contain AuditLog (unannotated, unreferenced), got:\n%s", output) + } +} + +// T010: Combined include + exclude with message field removal +func TestIncludeExcludeMessageCLI(t *testing.T) { + bin := buildBinary(t) + inputDir := t.TempDir() + outDir := t.TempDir() + cfgDir := t.TempDir() + + protoContent := `syntax = "proto3"; +package combmsg; + +// [PublishedApi] +message Parameters { + string name = 1; + // [Deprecated] + string old_field = 2; + uint64 value = 3; +} +` + os.WriteFile(filepath.Join(inputDir, "messages.proto"), []byte(protoContent), 0o644) + + cfgPath := filepath.Join(cfgDir, "filter.yaml") + os.WriteFile(cfgPath, []byte("annotations:\n include:\n - \"PublishedApi\"\n exclude:\n - \"Deprecated\"\n"), 0o644) + + stderr, code := runBinary(t, bin, + "--input", inputDir, + "--output", outDir, + "--config", cfgPath, + ) + if code != 0 { + t.Fatalf("expected exit code 0, got %d; stderr: %s", code, stderr) + } + + outData, err := os.ReadFile(filepath.Join(outDir, "messages.proto")) + if err != nil { + t.Fatalf("output file missing: %v", err) + } + output := string(outData) + + // Parameters should be kept (has [PublishedApi]) + if !strings.Contains(output, "Parameters") { + t.Errorf("output should contain Parameters, got:\n%s", output) + } + // old_field should be removed (has [Deprecated]) + if strings.Contains(output, "old_field") { + t.Errorf("output should NOT contain deprecated field 'old_field', got:\n%s", output) + } + // name and value should remain + if !strings.Contains(output, "name") { + t.Errorf("output should contain 'name' field, got:\n%s", output) + } + if !strings.Contains(output, "value") { + t.Errorf("output should contain 'value' field, got:\n%s", output) + } + // [PublishedApi] annotation should be stripped + if strings.Contains(output, "PublishedApi") { + t.Errorf("output should NOT contain PublishedApi annotation (should be stripped), got:\n%s", output) + } +} diff --git a/specs/013-message-include-filter/checklists/requirements.md b/specs/013-message-include-filter/checklists/requirements.md new file mode 100644 index 0000000..7bc07db --- /dev/null +++ b/specs/013-message-include-filter/checklists/requirements.md @@ -0,0 +1,35 @@ +# Specification Quality Checklist: Include Annotation Filtering for Top-Level Messages + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-02-11 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- All items pass. The spec is ready for `/speckit.clarify` or `/speckit.plan`. +- The Assumptions section references internal concepts (ExtractAnnotations, RemoveOrphanedDefinitions) for context, but functional requirements and success criteria remain technology-agnostic. diff --git a/specs/013-message-include-filter/plan.md b/specs/013-message-include-filter/plan.md new file mode 100644 index 0000000..c61f63a --- /dev/null +++ b/specs/013-message-include-filter/plan.md @@ -0,0 +1,158 @@ +# Implementation Plan: Include Annotation Filtering for Top-Level Messages + +**Branch**: `013-message-include-filter` | **Date**: 2026-02-11 | **Spec**: [spec.md](spec.md) +**Input**: Feature specification from `/specs/013-message-include-filter/spec.md` + +## Summary + +When `annotations.include` is configured, top-level messages and enums currently pass through unfiltered — only services are gated by include annotations. This feature adds `IncludeMessagesByAnnotation` to filter top-level messages/enums by include annotations, then relies on existing orphan removal to preserve their transitive dependencies. The orphan removal gate in `main.go` must also include message removal counts. + +## Technical Context + +**Language/Version**: Go 1.25+ +**Primary Dependencies**: `github.com/emicklei/proto` v1.14.3, `gopkg.in/yaml.v3` +**Storage**: N/A (file-based CLI tool) +**Testing**: `go test -race ./...` +**Target Platform**: Cross-platform CLI (CGO_ENABLED=0) +**Project Type**: Single Go module +**Performance Goals**: N/A (feature addition, no performance change) +**Constraints**: Output `.proto` files must remain syntactically valid and compilable by `protoc` +**Scale/Scope**: One new function + main.go pipeline update + tests + +## Constitution Check + +*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* + +| Principle | Status | Notes | +|-----------|--------|-------| +| I. CLI-First Tool | PASS | No CLI interface changes | +| II. Proto-Aware Filtering | PASS | Extends filtering to messages, consistent with proto structure awareness | +| III. Correctness Over Convenience | PASS | Preserves referential integrity — included messages keep their dependencies | +| IV. Test-Driven Development | PASS | Unit tests + integration tests + golden file tests | +| V. Simplicity and Minimal Dependencies | PASS | Follows existing `IncludeServicesByAnnotation` pattern, no new deps | + +All gates pass. + +## Project Structure + +### Documentation (this feature) + +```text +specs/013-message-include-filter/ +├── plan.md # This file +├── research.md # Phase 0 output +├── spec.md # Feature specification +└── tasks.md # Phase 2 output (/speckit.tasks command) +``` + +### Source Code (repository root) + +```text +internal/filter/ +├── filter.go # IncludeMessagesByAnnotation function +└── filter_test.go # Unit tests for IncludeMessagesByAnnotation + +main.go # Pipeline update: call IncludeMessagesByAnnotation, update orphan gate +main_test.go # CLI integration tests for message include filtering + +testdata/messages/ # New test fixture directory +├── messages_only.proto # Input: messages only, no services +├── messages_annotated.proto # Input: one annotated message with deps +└── expected/ + ├── messages_annotated.proto # Golden: annotated message + deps + └── (messages_only.proto absent) # Empty output = file not written +``` + +**Structure Decision**: Existing single Go module. New `testdata/messages/` directory for message-only test fixtures. + +## Change Analysis + +### Design Approach + +**Add `IncludeMessagesByAnnotation`** following the exact pattern of `IncludeServicesByAnnotation`: + +1. Walk `def.Elements` +2. For each `*proto.Message` and `*proto.Enum`, extract annotations from comment +3. If annotations match the include list → keep +4. If annotations don't match (or no annotations) → remove and increment counter +5. Non-message/non-enum elements (services, syntax, package, imports, options) pass through unchanged + +After this function runs, existing `RemoveOrphanedDefinitions` handles dependencies: +- `CollectReferencedTypes` (line 461) already collects references from **message fields** (NormalField, MapField, OneOfField via `collectMessageRefs`) +- So if `[PublishedApi] Parameters` references `Params1` and `Params2`, and we keep `Parameters`, orphan removal sees `Parameters` still references `Params1` and `Params2` and keeps them +- Messages not referenced by anything are removed as orphans + +### New Function (1 file) + +**`internal/filter/filter.go`** — `IncludeMessagesByAnnotation`: + +``` +func IncludeMessagesByAnnotation(def *proto.Proto, annotations []string) int +``` + +- Follows `IncludeServicesByAnnotation` pattern exactly +- Filters `*proto.Message` and `*proto.Enum` elements +- Uses `ExtractAnnotations(msg.Comment)` — already works on any `*proto.Comment` +- Returns count of removed messages/enums + +### Pipeline Update (1 file) + +**`main.go`** — annotation filtering block (~line 194): + +1. Add call to `filter.IncludeMessagesByAnnotation(pf.def, cfg.Annotations.Include)` inside the `if cfg.HasAnnotationInclude()` block +2. Track result in a new variable (e.g., `msgr`) or add to existing counter +3. Update orphan removal gate from `if sr > 0 || mr > 0 || fr > 0` to also include message removal count + +Current: +```go +if cfg.HasAnnotationInclude() { + sr += filter.IncludeServicesByAnnotation(pf.def, cfg.Annotations.Include) + ... +} +... +if sr > 0 || mr > 0 || fr > 0 { + orphansRemoved += filter.RemoveOrphanedDefinitions(pf.def, pf.pkg) +} +``` + +After: +```go +if cfg.HasAnnotationInclude() { + sr += filter.IncludeServicesByAnnotation(pf.def, cfg.Annotations.Include) + msgr += filter.IncludeMessagesByAnnotation(pf.def, cfg.Annotations.Include) + ... +} +... +if sr > 0 || mr > 0 || fr > 0 || msgr > 0 { + orphansRemoved += filter.RemoveOrphanedDefinitions(pf.def, pf.pkg) +} +``` + +4. Add verbose output for message removals + +### Annotation Stripping + +The existing `StripAnnotations` call at line 219-221 already strips include annotations from all element types (services, messages, enums, fields) via `SubstituteAnnotations`. No additional changes needed for stripping. + +### Test Plan + +**Unit tests** (`internal/filter/filter_test.go`): +- `TestIncludeMessagesByAnnotation` — basic: annotated message kept, unannotated removed, enum handled +- `TestIncludeMessagesByAnnotation_NoAnnotations` — all messages removed when none match +- `TestIncludeMessagesByAnnotation_EmptyList` — empty annotation list returns 0 + +**Golden file tests** (`internal/filter/filter_test.go`): +- `TestGoldenFileMessagesAnnotated` — annotated message + deps kept, unreferenced removed + +**CLI integration tests** (`main_test.go`): +- `TestIncludeMessageOnlyCLI` — messages-only file, no matches → empty output +- `TestIncludeAnnotatedMessageCLI` — user's exact example: `[PublishedApi] Parameters` + `Params1` + `Params2` +- `TestIncludeMessageMixedCLI` — mixed services + messages, unreferenced message removed + +### No Changes Needed + +- `internal/config/` — no config changes, `HasAnnotationInclude()` already works +- `RemoveOrphanedDefinitions` — already handles message field references correctly +- `StripAnnotations` / `SubstituteAnnotations` — already process message comments +- `CollectAnnotationLocations` — already collects from messages +- Exclude-only tests — unaffected (no message include gating in exclude mode) diff --git a/specs/013-message-include-filter/research.md b/specs/013-message-include-filter/research.md new file mode 100644 index 0000000..ebf345a --- /dev/null +++ b/specs/013-message-include-filter/research.md @@ -0,0 +1,30 @@ +# Research: Include Annotation Filtering for Top-Level Messages + +**Date**: 2026-02-11 + +## No NEEDS CLARIFICATION Items + +All technical context is fully resolved. + +## Implementation Approach + +- **Decision**: Add `IncludeMessagesByAnnotation` function following the `IncludeServicesByAnnotation` pattern, then rely on existing orphan removal for dependency preservation +- **Rationale**: `CollectReferencedTypes` already collects references from message fields (NormalField, MapField, OneOfField). When an annotated message is kept, its field references are collected, and orphan removal preserves those referenced types. This avoids building a separate dependency resolution mechanism. +- **Alternatives considered**: (1) Modify `RemoveOrphanedDefinitions` to handle include filtering directly — rejected because it conflates two concerns and the existing function is well-tested. (2) Build a separate dependency graph for messages — rejected because `CollectReferencedTypes` + iterative orphan removal already provides this. + +## Orphan Removal Gate + +- **Decision**: Add message removal count (`msgr`) to the orphan removal gate condition in `main.go` +- **Rationale**: The current gate `if sr > 0 || mr > 0 || fr > 0` skips orphan removal when no services/methods/fields are removed. For messages-only files, `sr == 0` and no gate triggers, so orphan removal never runs. Adding `msgr > 0` ensures orphan removal runs after message-level include filtering. +- **Alternatives considered**: Always run orphan removal when include is configured — rejected because it adds unnecessary work for files where include doesn't remove anything. + +## Enum Handling + +- **Decision**: Handle enums identically to messages in `IncludeMessagesByAnnotation` +- **Rationale**: Enums are top-level definitions that should be subject to the same include gating. The function checks both `*proto.Message` and `*proto.Enum` in the same pass. +- **Alternatives considered**: Separate `IncludeEnumsByAnnotation` function — rejected because enums and messages have the same comment structure and the combined function is simpler. + +## Annotation Stripping + +- **Decision**: No changes needed for annotation stripping on messages +- **Rationale**: `StripAnnotations` calls `SubstituteAnnotations`, which already walks messages and their comments (lines 665-679 in filter.go). Include annotations on kept messages will be automatically stripped. diff --git a/specs/013-message-include-filter/spec.md b/specs/013-message-include-filter/spec.md new file mode 100644 index 0000000..599abca --- /dev/null +++ b/specs/013-message-include-filter/spec.md @@ -0,0 +1,89 @@ +# Feature Specification: Include Annotation Filtering for Top-Level Messages + +**Feature Branch**: `013-message-include-filter` +**Created**: 2026-02-11 +**Status**: Draft +**Input**: User description: "When annotations.include is configured, top-level messages without a matching include annotation should be removed (along with their unreferenced dependencies). Currently, include annotations only gate services — messages pass through unfiltered." + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Unannotated Messages Removed When Include Is Configured (Priority: P1) + +As a user, when I configure `annotations.include: ["PublishedApi"]` and my proto file contains only messages (no services), I expect that only messages explicitly annotated with `[PublishedApi]` appear in the output. Messages without the annotation should be removed. If no messages match, the output should be empty. + +**Why this priority**: This is the core bug. Currently, all messages pass through regardless of include annotations because include filtering only operates on services. This produces incorrect output for proto files that define message types without services. + +**Independent Test**: Create a proto file with only messages (no services), none annotated with `[PublishedApi]`. Configure `annotations.include: ["PublishedApi"]`. Verify the output is empty. + +**Acceptance Scenarios**: + +1. **Given** a proto file with three unannotated messages and no services, **When** the config has `annotations.include: ["NotUsed"]`, **Then** the output is empty because no messages have the required annotation. +2. **Given** a proto file with one message annotated `[PublishedApi]` that references two other messages, **When** the config has `annotations.include: ["PublishedApi"]`, **Then** the annotated message and its referenced dependencies are kept, with the include annotation stripped from the output. +3. **Given** a proto file with one annotated message `[PublishedApi] Parameters` referencing `Params1` and `Params2`, and one unreferenced unannotated message `Unrelated`, **When** the config has `annotations.include: ["PublishedApi"]`, **Then** `Parameters`, `Params1`, and `Params2` are kept, and `Unrelated` is removed. + +--- + +### User Story 2 - Mixed Services and Messages With Include Annotations (Priority: P2) + +As a user, when my proto file contains both services and messages, include annotations should gate both: services without the annotation are removed (existing behavior), and top-level messages without the annotation are also removed — unless they are referenced by a kept service or kept message. + +**Why this priority**: This extends the core fix to mixed files, ensuring consistent behavior across element types. + +**Independent Test**: Create a proto file with one annotated service, one unannotated top-level message (not referenced by the service), and the service's dependency messages. Verify the unreferenced unannotated message is removed. + +**Acceptance Scenarios**: + +1. **Given** a proto file with an annotated service `[PublicApi] OrderService` and an unannotated standalone message `AuditLog`, **When** the config has `annotations.include: ["PublicApi"]`, **Then** `OrderService` and its dependency messages are kept, but `AuditLog` is removed. +2. **Given** a proto file with an annotated service and an annotated message `[PublicApi] SharedConfig`, **When** the config has `annotations.include: ["PublicApi"]`, **Then** both the service and `SharedConfig` (plus its dependencies) are kept. + +--- + +### User Story 3 - Combined Include + Exclude With Messages (Priority: P2) + +As a user, when I configure both `annotations.include` and `annotations.exclude`, include filtering should gate messages first, and then exclude filtering should remove annotated fields from the included messages. + +**Why this priority**: Ensures combined mode works correctly with messages, not just services. + +**Independent Test**: Create a proto file with an annotated message containing a deprecated field. Configure both include and exclude. Verify the message is kept but the deprecated field is removed. + +**Acceptance Scenarios**: + +1. **Given** a proto file with `[PublishedApi] Parameters` containing a `[Deprecated]` field, **When** the config has `annotations.include: ["PublishedApi"]` and `annotations.exclude: ["Deprecated"]`, **Then** `Parameters` is kept with the deprecated field removed. + +--- + +### Edge Cases + +- What happens when a message references another message that also has a matching include annotation? Both are kept — the annotation on the dependency is redundant but not harmful. +- What happens when an included message references a message that itself references further messages? All transitive dependencies are kept (consistent with existing orphan removal behavior). +- What happens when only exclude is configured (no include)? Messages are NOT subject to include gating — exclude-only mode behavior is unchanged. +- What happens with enum types? Enums follow the same logic as messages: if include is configured, only annotated enums (and enums referenced by included elements) are kept. +- What happens with the `oneof` construct inside a message? Oneof fields reference other message types; these references are followed during dependency resolution as usual. + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: When `annotations.include` is configured, top-level messages without a matching include annotation MUST be removed from the output, unless they are referenced (directly or transitively) by a kept service or kept message. +- **FR-002**: When `annotations.include` is configured, top-level messages WITH a matching include annotation MUST be kept, along with all their transitive message/enum dependencies. +- **FR-003**: Include annotation stripping MUST apply to kept messages (consistent with existing service annotation stripping). +- **FR-004**: Exclude-only mode MUST NOT be affected — messages are not subject to include gating when only `annotations.exclude` is configured. +- **FR-005**: Orphan removal MUST run after message-level include filtering to clean up unreferenced dependencies. +- **FR-006**: This behavior MUST apply consistently in include-only mode and combined include+exclude mode. +- **FR-007**: Both `@Name` and `[Name]` annotation syntaxes MUST be recognized on messages (consistent with existing annotation parsing). + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: A proto file containing only unannotated messages produces empty output when `annotations.include` is configured, verified by automated tests. +- **SC-002**: A proto file with one annotated message and its dependencies produces correct output containing only the annotated message and its referenced types, verified by golden file tests. +- **SC-003**: All existing tests for service-level include filtering, exclude-only mode, and combined mode continue to pass. +- **SC-004**: The user's exact example (three messages, one annotated `[PublishedApi]`) produces the expected output, verified by a dedicated test. + +### Assumptions + +- Include filtering for messages follows the same annotation extraction logic used for services (`ExtractAnnotations` on the message's comment). +- Messages kept by include annotations act as "roots" for dependency resolution — their referenced types are kept even if those types lack an include annotation. +- The existing orphan removal mechanism (`RemoveOrphanedDefinitions`) handles transitive dependency resolution and can be reused after message-level include filtering. +- Enum types are treated identically to messages for include gating purposes. diff --git a/specs/013-message-include-filter/tasks.md b/specs/013-message-include-filter/tasks.md new file mode 100644 index 0000000..38aa181 --- /dev/null +++ b/specs/013-message-include-filter/tasks.md @@ -0,0 +1,139 @@ +# Tasks: Include Annotation Filtering for Top-Level Messages + +**Input**: Design documents from `/specs/013-message-include-filter/` +**Prerequisites**: plan.md (required), spec.md (required), research.md + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel (different files, no dependencies) +- **[Story]**: Which user story this task belongs to (e.g., US1, US2, US3) +- Include exact file paths in descriptions + +--- + +## Phase 1: Core Implementation + +**Purpose**: Add `IncludeMessagesByAnnotation` function and integrate it into the pipeline + +- [x] T001 [US1] Add `IncludeMessagesByAnnotation` function in internal/filter/filter.go following the `IncludeServicesByAnnotation` pattern. Walk `def.Elements`, check each `*proto.Message` and `*proto.Enum` for matching include annotations using `ExtractAnnotations(msg.Comment)`. Keep elements with matching annotations, remove those without. Return count of removed elements. Non-message/non-enum elements (services, syntax, package, imports, options) pass through unchanged. + +- [x] T002 [US1] Update main.go annotation filtering block (~line 194): declare `msgr` variable, add `msgr += filter.IncludeMessagesByAnnotation(pf.def, cfg.Annotations.Include)` inside the `if cfg.HasAnnotationInclude()` block, update orphan removal gate from `if sr > 0 || mr > 0 || fr > 0` to `if sr > 0 || mr > 0 || fr > 0 || msgr > 0`, and add verbose output for message removals. + +**Checkpoint**: Core function exists and is wired into the pipeline. No tests yet. + +--- + +## Phase 2: User Story 1 — Unannotated Messages Removed When Include Is Configured (Priority: P1) + +**Goal**: Verify that unannotated messages are removed when `annotations.include` is configured in a messages-only file. + +**Independent Test**: `go test -race -run TestIncludeMessagesByAnnotation ./internal/filter/` + +### Implementation for User Story 1 + +- [x] T003 [P] [US1] Add unit test `TestIncludeMessagesByAnnotation` in internal/filter/filter_test.go: parse a proto string with three messages (one annotated `[PublishedApi]`, one unannotated, one enum annotated `[PublishedApi]`). Call `IncludeMessagesByAnnotation(def, []string{"PublishedApi"})`. Assert removed count is 1 (the unannotated message), assert the annotated message and enum are kept. + +- [x] T004 [P] [US1] Add unit test `TestIncludeMessagesByAnnotation_NoAnnotations` in internal/filter/filter_test.go: parse a proto string with two unannotated messages. Call `IncludeMessagesByAnnotation(def, []string{"PublishedApi"})`. Assert removed count is 2. + +- [x] T005 [P] [US1] Add unit test `TestIncludeMessagesByAnnotation_EmptyList` in internal/filter/filter_test.go: parse a proto string with messages. Call `IncludeMessagesByAnnotation(def, []string{})`. Assert removed count is 0 (empty annotation list means no filtering). + +- [x] T006 [P] [US1] Create test fixture testdata/messages/messages_annotated.proto: a proto file with `[PublishedApi] Parameters` message referencing `Params1` and `Params2` messages, plus an unreferenced `Unrelated` message. Create golden file testdata/messages/expected/messages_annotated.proto containing only `Parameters`, `Params1`, `Params2` (with `[PublishedApi]` annotation stripped). + +- [x] T007 [US1] Add CLI integration test `TestIncludeMessageOnlyCLI` in main_test.go: create a proto file with only unannotated messages, configure `annotations.include: ["NotUsed"]`, run the tool, assert the output file is not written (empty output). + +- [x] T008 [US1] Add CLI integration test `TestIncludeAnnotatedMessageCLI` in main_test.go: use the testdata/messages/messages_annotated.proto fixture with `annotations.include: ["PublishedApi"]`, run the tool, compare output against testdata/messages/expected/messages_annotated.proto golden file. + +**Checkpoint**: Messages-only include filtering works end-to-end with unit and integration tests. + +--- + +## Phase 3: User Story 2 — Mixed Services and Messages With Include Annotations (Priority: P2) + +**Goal**: Verify that include annotations gate both services and messages in mixed files. + +**Independent Test**: `go test -race -run TestIncludeMessageMixed ./` + +### Implementation for User Story 2 + +- [x] T009 [US2] Add CLI integration test `TestIncludeMessageMixedCLI` in main_test.go: create a proto file with an annotated service `[PublicApi] OrderService`, the service's dependency messages, and an unannotated standalone message `AuditLog`. Configure `annotations.include: ["PublicApi"]`. Assert that `OrderService` and its dependencies are kept but `AuditLog` is removed from the output. + +**Checkpoint**: Mixed services + messages filtering works correctly. + +--- + +## Phase 4: User Story 3 — Combined Include + Exclude With Messages (Priority: P2) + +**Goal**: Verify that combined include + exclude mode works correctly with messages. + +**Independent Test**: `go test -race -run TestIncludeExcludeMessage ./` + +### Implementation for User Story 3 + +- [x] T010 [US3] Add CLI integration test `TestIncludeExcludeMessageCLI` in main_test.go: create a proto file with `[PublishedApi] Parameters` containing a `[Deprecated]` field. Configure `annotations.include: ["PublishedApi"]` and `annotations.exclude: ["Deprecated"]`. Assert that `Parameters` is kept but the deprecated field is removed from the output. + +**Checkpoint**: Combined mode works correctly with messages. + +--- + +## Phase 5: Verification + +**Purpose**: Full test suite validation + +- [x] T011 Run full test suite with `go test -race ./...` and verify all tests pass, including existing service-level include tests, exclude-only tests, and combined mode tests (confirming FR-004: exclude-only mode unaffected, SC-003: existing tests still pass). + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Phase 1 (Core)**: No dependencies — start immediately +- **Phase 2 (US1)**: Depends on Phase 1 (T001 + T002) +- **Phase 3 (US2)**: Depends on Phase 1 (T001 + T002), independent of Phase 2 +- **Phase 4 (US3)**: Depends on Phase 1 (T001 + T002), independent of Phases 2-3 +- **Phase 5 (Verification)**: Depends on all previous phases + +### Task Dependencies + +- T001 → T002 (function must exist before pipeline integration) +- T003, T004, T005 can run in parallel after T001 (unit tests for the function) +- T006 must complete before T008 (fixture needed for integration test) +- T007, T008 depend on T002 (pipeline must be wired) +- T009 depends on T002 +- T010 depends on T002 +- T011 depends on all previous tasks + +### Parallel Opportunities + +- T003, T004, T005 can all run in parallel after T001 (same file, independent test functions) +- T006 can run in parallel with T003-T005 (different files) +- T007, T009, T010 can run in parallel after T002 (independent CLI tests in main_test.go) + +--- + +## Implementation Strategy + +### MVP First (T001 + T002 + T003 + T007 + T011) + +1. Add `IncludeMessagesByAnnotation` (T001) +2. Wire into pipeline (T002) +3. Add basic unit test (T003) +4. Add messages-only CLI test (T007) +5. Run full suite (T011) + +### Full Delivery + +1. T001 → T002 → Core implementation +2. T003 + T004 + T005 + T006 in parallel → Unit tests + fixtures +3. T007 + T008 + T009 + T010 in parallel → CLI integration tests +4. T011 → Full verification + +--- + +## Notes + +- `IncludeMessagesByAnnotation` follows the exact same pattern as `IncludeServicesByAnnotation` +- Existing `CollectReferencedTypes` already collects message field references — no changes needed +- Existing `StripAnnotations` already handles message comments — no changes needed +- The orphan removal gate update (`|| msgr > 0`) is critical for messages-only files +- Empty annotation list (`[]string{}`) should return 0 — consistent with `IncludeServicesByAnnotation` behavior diff --git a/testdata/messages/expected/messages_annotated.proto b/testdata/messages/expected/messages_annotated.proto new file mode 100644 index 0000000..84874f6 --- /dev/null +++ b/testdata/messages/expected/messages_annotated.proto @@ -0,0 +1,16 @@ +syntax = "proto3"; + +package messages; + +message Parameters { + Params1 p1 = 1; + Params2 p2 = 2; +} + +message Params1 { + string name = 1; +} + +message Params2 { + uint64 value = 1; +} diff --git a/testdata/messages/messages_annotated.proto b/testdata/messages/messages_annotated.proto new file mode 100644 index 0000000..eb1272c --- /dev/null +++ b/testdata/messages/messages_annotated.proto @@ -0,0 +1,21 @@ +syntax = "proto3"; + +package messages; + +// [PublishedApi] +message Parameters { + Params1 p1 = 1; + Params2 p2 = 2; +} + +message Params1 { + string name = 1; +} + +message Params2 { + uint64 value = 1; +} + +message Unrelated { + string data = 1; +}