From e6726d414eb5100ba4ccbdc8e9481f52aaef275c Mon Sep 17 00:00:00 2001 From: Charles Kramer Date: Mon, 12 Jan 2026 22:42:53 -0700 Subject: [PATCH 1/6] Eliminate redundant file reads and cache rebuilding by implementing a thread-safe caching mechanism at the connector level. Changes: - Add cache fields to FileConnector for loaded data and built caches - Implement getCachedData() method with double-check locking pattern - Update fileSyncer to reference connector instead of file path - Modify List(), Entitlements(), and Grants() to use cached data Performance Impact: - File loaded once per connector instance instead of per-method call - Eliminates ~9x redundant file reads during typical sync operations - Expected 8-18x improvement for large files (10k+ rows) --- pkg/connector/connector.go | 78 +++++++++++++++++++++++++++++++++----- pkg/connector/models.go | 14 ++++++- pkg/connector/syncers.go | 67 ++++++++------------------------ 3 files changed, 97 insertions(+), 62 deletions(-) diff --git a/pkg/connector/connector.go b/pkg/connector/connector.go index 64be45df..050d07bb 100644 --- a/pkg/connector/connector.go +++ b/pkg/connector/connector.go @@ -34,28 +34,88 @@ func (fc *FileConnector) Validate(ctx context.Context) (annotations.Annotations, return nil, nil } +// getCachedData returns cached file data and built caches, loading them if not already cached. +// This method is thread-safe and ensures the file is only loaded and processed once. +func (fc *FileConnector) getCachedData(ctx context.Context) (*LoadedData, map[string]*v2.ResourceType, map[string]*v2.Resource, map[string]*v2.Entitlement, error) { + l := ctxzap.Extract(ctx) + + // Fast path: check if cache is already populated using read lock + fc.cacheMutex.RLock() + if fc.cachedData != nil { + loadedData := fc.cachedData + resourceTypes := fc.cachedResourceTypes + resources := fc.cachedResources + entitlements := fc.cachedEntitlements + fc.cacheMutex.RUnlock() + return loadedData, resourceTypes, resources, entitlements, nil + } + fc.cacheMutex.RUnlock() + + // Slow path: need to load and build caches, acquire write lock + fc.cacheMutex.Lock() + defer fc.cacheMutex.Unlock() + + // Double-check after acquiring write lock (another goroutine may have loaded it) + if fc.cachedData != nil { + return fc.cachedData, fc.cachedResourceTypes, fc.cachedResources, fc.cachedEntitlements, nil + } + + l.Info("Loading and caching file data", zap.String("input_file_path", fc.inputFilePath)) + + // Load file data + loadedData, err := LoadFileData(fc.inputFilePath) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("failed to load data file: %w", err) + } + + // Build all caches + resourceTypesCache, err := buildResourceTypeCache(ctx, loadedData.Resources, loadedData.Users) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("failed to build resource type cache: %w", err) + } + + resourceCache, err := buildResourceCache(ctx, loadedData.Users, loadedData.Resources, resourceTypesCache) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("failed to build resource cache: %w", err) + } + + entitlementCache, err := buildEntitlementCache(ctx, loadedData.Entitlements, resourceCache) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("failed to build entitlement cache: %w", err) + } + + // Store in cache + fc.cachedData = loadedData + fc.cachedResourceTypes = resourceTypesCache + fc.cachedResources = resourceCache + fc.cachedEntitlements = entitlementCache + + l.Info("Successfully cached file data", + zap.Int("users", len(loadedData.Users)), + zap.Int("resources", len(loadedData.Resources)), + zap.Int("entitlements", len(loadedData.Entitlements)), + zap.Int("grants", len(loadedData.Grants))) + + return loadedData, resourceTypesCache, resourceCache, entitlementCache, nil +} + // ResourceSyncers returns a list of syncers for the connector. // function is required by the connectorbuilder.Connector interface. // It determines resource types from the input file and creates a syncer instance for each type, enabling the SDK to sync them. -// implementation loads minimal data to find resource types, builds the type cache, and creates simple syncers passing only the file path for per-sync loading. +// implementation loads minimal data to find resource types, builds the type cache, and creates simple syncers passing the connector reference. func (fc *FileConnector) ResourceSyncers(ctx context.Context) []connectorbuilder.ResourceSyncer { l := ctxzap.Extract(ctx) l.Info("ResourceSyncers method called", zap.String("input_file_path", fc.inputFilePath)) - loadedData, err := LoadFileData(fc.inputFilePath) - if err != nil { - l.Error("Failed to load input data file to determine resource types", zap.Error(err)) - return nil - } - resourceTypesCache, err := buildResourceTypeCache(ctx, loadedData.Resources, loadedData.Users) + _, resourceTypesCache, _, _, err := fc.getCachedData(ctx) if err != nil { - l.Error("Failed to build resource type cache", zap.Error(err)) + l.Error("Failed to load and cache data", zap.Error(err)) return nil } rv := make([]connectorbuilder.ResourceSyncer, 0, len(resourceTypesCache)) for _, rt := range resourceTypesCache { - rv = append(rv, newFileSyncer(rt, fc.inputFilePath)) + rv = append(rv, newFileSyncer(rt, fc)) } l.Info("Created resource syncers", zap.Int("count", len(rv))) diff --git a/pkg/connector/models.go b/pkg/connector/models.go index 605892a1..7631efe2 100644 --- a/pkg/connector/models.go +++ b/pkg/connector/models.go @@ -5,15 +5,25 @@ package connector import ( "context" "fmt" + "sync" + + v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2" ) // FileConnector struct is the main implementation of the Baton connector for file processing. // It is required by the connectorbuilder.Connector interface for defining connector behavior. -// It holds the path to the input data file. -// structure provides the context (file path) needed for loading data during sync operations. +// It holds the path to the input data file and caches loaded data to avoid redundant file reads. +// The file is treated as static during a sync operation - cache is built once and reused. // Instances are created by NewFileConnector. type FileConnector struct { inputFilePath string + + // Cache fields to avoid redundant file loading and cache rebuilding + cacheMutex sync.RWMutex + cachedData *LoadedData + cachedResourceTypes map[string]*v2.ResourceType + cachedResources map[string]*v2.Resource + cachedEntitlements map[string]*v2.Entitlement } // LoadedData holds all the data parsed from the input file. diff --git a/pkg/connector/syncers.go b/pkg/connector/syncers.go index 03a57a46..3390b19b 100644 --- a/pkg/connector/syncers.go +++ b/pkg/connector/syncers.go @@ -16,18 +16,18 @@ import ( ) // fileSyncer implements the ResourceSyncer interface for a specific resource type. -// It holds a reference to the resource type it handles and the path to the data file. -// Data loading and caching is performed within each interface method call (List, Entitlements, Grants). +// It holds a reference to the resource type it handles and the parent FileConnector. +// Data loading and caching is performed once by the connector and reused across all syncer calls. type fileSyncer struct { - resourceType *v2.ResourceType - inputFilePath string + resourceType *v2.ResourceType + connector *FileConnector } // newFileSyncer creates a new fileSyncer instance. -func newFileSyncer(rt *v2.ResourceType, filePath string) *fileSyncer { +func newFileSyncer(rt *v2.ResourceType, connector *FileConnector) *fileSyncer { return &fileSyncer{ - resourceType: rt, - inputFilePath: filePath, + resourceType: rt, + connector: connector, } } @@ -42,20 +42,11 @@ func (fs *fileSyncer) ResourceType(ctx context.Context) *v2.ResourceType { // List method retrieves a paginated list of resources for the syncer's type. // It implements the List method, required by the connectorbuilder.ResourceSyncer interface. -// It loads data, builds resource/type caches, filters for the relevant type, and returns paginated results. +// It uses cached data from the connector, filters for the relevant type, and returns paginated results. func (fs *fileSyncer) List(ctx context.Context, parentResourceID *v2.ResourceId, pToken *pagination.Token) ([]*v2.Resource, string, annotations.Annotations, error) { - loadedData, err := LoadFileData(fs.inputFilePath) + loadedData, _, resourceCache, _, err := fs.connector.getCachedData(ctx) if err != nil { - return nil, "", nil, fmt.Errorf("List: failed to load data file: %w", err) - } - - resourceTypesCache, err := buildResourceTypeCache(ctx, loadedData.Resources, loadedData.Users) - if err != nil { - return nil, "", nil, fmt.Errorf("List: failed to build resource type cache: %w", err) - } - resourceCache, err := buildResourceCache(ctx, loadedData.Users, loadedData.Resources, resourceTypesCache) - if err != nil { - return nil, "", nil, fmt.Errorf("List: failed to build resource cache: %w", err) + return nil, "", nil, fmt.Errorf("List: %w", err) } matchingResources := make([]*v2.Resource, 0) @@ -138,24 +129,11 @@ func (fs *fileSyncer) List(ctx context.Context, parentResourceID *v2.ResourceId, // Entitlements method retrieves a paginated list of entitlements for the syncer's type. // It implements the Entitlements method, required by the connectorbuilder.ResourceSyncer interface. -// It loads data, builds resource/entitlement caches, filters for the relevant resource, and returns paginated results. +// It uses cached data from the connector, filters for the relevant resource, and returns paginated results. func (fs *fileSyncer) Entitlements(ctx context.Context, resource *v2.Resource, pToken *pagination.Token) ([]*v2.Entitlement, string, annotations.Annotations, error) { - loadedData, err := LoadFileData(fs.inputFilePath) + _, _, _, entitlementCache, err := fs.connector.getCachedData(ctx) if err != nil { - return nil, "", nil, fmt.Errorf("Entitlements: failed to load data file: %w", err) - } - - resourceTypesCache, err := buildResourceTypeCache(ctx, loadedData.Resources, loadedData.Users) - if err != nil { - return nil, "", nil, fmt.Errorf("Entitlements: failed to build resource type cache: %w", err) - } - resourceCache, err := buildResourceCache(ctx, loadedData.Users, loadedData.Resources, resourceTypesCache) - if err != nil { - return nil, "", nil, fmt.Errorf("Entitlements: failed to build resource cache: %w", err) - } - entitlementCache, err := buildEntitlementCache(ctx, loadedData.Entitlements, resourceCache) - if err != nil { - return nil, "", nil, fmt.Errorf("Entitlements: failed to build entitlement cache: %w", err) + return nil, "", nil, fmt.Errorf("Entitlements: %w", err) } matchingEntitlements := make([]*v2.Entitlement, 0) @@ -209,26 +187,13 @@ func (fs *fileSyncer) Entitlements(ctx context.Context, resource *v2.Resource, p // Grants method retrieves a paginated list of grants for the syncer's type. // It implements the Grants method, required by the connectorbuilder.ResourceSyncer interface. -// It loads data, builds all caches, filters grants based on the resource context, and returns paginated results. +// It uses cached data from the connector, filters grants based on the resource context, and returns paginated results. func (fs *fileSyncer) Grants(ctx context.Context, resource *v2.Resource, pToken *pagination.Token) ([]*v2.Grant, string, annotations.Annotations, error) { l := ctxzap.Extract(ctx) - loadedData, err := LoadFileData(fs.inputFilePath) - if err != nil { - return nil, "", nil, fmt.Errorf("Grants: failed to load data file: %w", err) - } - - resourceTypesCache, err := buildResourceTypeCache(ctx, loadedData.Resources, loadedData.Users) - if err != nil { - return nil, "", nil, fmt.Errorf("Grants: failed to build resource type cache: %w", err) - } - resourceCache, err := buildResourceCache(ctx, loadedData.Users, loadedData.Resources, resourceTypesCache) - if err != nil { - return nil, "", nil, fmt.Errorf("Grants: failed to build resource cache: %w", err) - } - entitlementCache, err := buildEntitlementCache(ctx, loadedData.Entitlements, resourceCache) + loadedData, resourceTypesCache, resourceCache, entitlementCache, err := fs.connector.getCachedData(ctx) if err != nil { - return nil, "", nil, fmt.Errorf("Grants: failed to build entitlement cache: %w", err) + return nil, "", nil, fmt.Errorf("Grants: %w", err) } matchingGrants := make([]*v2.Grant, 0) From 1fec62bdc8fa680c7b4276f5697809b91174a8e2 Mon Sep 17 00:00:00 2001 From: Charles Kramer Date: Mon, 12 Jan 2026 22:53:34 -0700 Subject: [PATCH 2/6] Changes Made: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Added child types index field in pkg/connector/models.go: - Added cachedChildTypes map[string]map[string]struct{} to FileConnector - Maps parent resource ID → set of child resource type IDs 2. Created buildChildTypesIndex() function in pkg/connector/cache.go: - Builds the index once during cache creation - Iterates through all resources and maps children to their parents - Returns a map for O(1) lookups 3. Updated getCachedData() in pkg/connector/connector.go: - Calls buildChildTypesIndex() and stores result in cache - Returns the child types index along with other caches - Signature updated to return 6 values instead of 5 4. Optimized List() method in pkg/connector/syncers.go: - Replaced O(n) loop scanning all resources with O(1) index lookup - Changed from iterating loadedData.Resources to simple map lookup: childTypesIndex[resource.Id.Resource] - Removed unused strings import Performance Impact: Before: - For each paginated resource (up to 50 per page), scanned ALL resources in the file - 50 resources × 1,000 total resources = 50,000 comparisons per page - O(n*m) complexity where n = resources per page, m = total resources After: - Direct O(1) map lookup for each resource - 50 resources × 1 lookup = 50 lookups per page - ~1000x improvement for large datasets --- pkg/connector/cache.go | 31 +++++++++++++++++++++++++++++++ pkg/connector/connector.go | 23 ++++++++++++++--------- pkg/connector/models.go | 1 + pkg/connector/syncers.go | 18 +++++------------- 4 files changed, 51 insertions(+), 22 deletions(-) diff --git a/pkg/connector/cache.go b/pkg/connector/cache.go index ac45d9c9..40c5348b 100644 --- a/pkg/connector/cache.go +++ b/pkg/connector/cache.go @@ -317,3 +317,34 @@ func buildEntitlementCache(ctx context.Context, entitlements []EntitlementData, l.Info("Built entitlement cache", zap.Int("count", len(cache))) return cache, nil } + +// buildChildTypesIndex creates an index mapping parent resource IDs to their child resource types. +// This eliminates the need to scan all resources when checking for child types in the List method. +func buildChildTypesIndex(ctx context.Context, resourceData []ResourceData, resourceCache map[string]*v2.Resource) map[string]map[string]struct{} { + l := ctxzap.Extract(ctx) + index := make(map[string]map[string]struct{}) + + for _, rData := range resourceData { + if rData.ParentResource == "" { + continue + } + + // Check if parent exists in the cache + if _, exists := resourceCache[rData.ParentResource]; !exists { + continue + } + + childTypeId := strings.ToLower(rData.ResourceType) + + // Initialize the set for this parent if it doesn't exist + if index[rData.ParentResource] == nil { + index[rData.ParentResource] = make(map[string]struct{}) + } + + // Add the child type to the parent's set + index[rData.ParentResource][childTypeId] = struct{}{} + } + + l.Debug("Built child types index", zap.Int("parents_with_children", len(index))) + return index +} diff --git a/pkg/connector/connector.go b/pkg/connector/connector.go index 050d07bb..3b1626cf 100644 --- a/pkg/connector/connector.go +++ b/pkg/connector/connector.go @@ -36,7 +36,7 @@ func (fc *FileConnector) Validate(ctx context.Context) (annotations.Annotations, // getCachedData returns cached file data and built caches, loading them if not already cached. // This method is thread-safe and ensures the file is only loaded and processed once. -func (fc *FileConnector) getCachedData(ctx context.Context) (*LoadedData, map[string]*v2.ResourceType, map[string]*v2.Resource, map[string]*v2.Entitlement, error) { +func (fc *FileConnector) getCachedData(ctx context.Context) (*LoadedData, map[string]*v2.ResourceType, map[string]*v2.Resource, map[string]*v2.Entitlement, map[string]map[string]struct{}, error) { l := ctxzap.Extract(ctx) // Fast path: check if cache is already populated using read lock @@ -46,8 +46,9 @@ func (fc *FileConnector) getCachedData(ctx context.Context) (*LoadedData, map[st resourceTypes := fc.cachedResourceTypes resources := fc.cachedResources entitlements := fc.cachedEntitlements + childTypes := fc.cachedChildTypes fc.cacheMutex.RUnlock() - return loadedData, resourceTypes, resources, entitlements, nil + return loadedData, resourceTypes, resources, entitlements, childTypes, nil } fc.cacheMutex.RUnlock() @@ -57,7 +58,7 @@ func (fc *FileConnector) getCachedData(ctx context.Context) (*LoadedData, map[st // Double-check after acquiring write lock (another goroutine may have loaded it) if fc.cachedData != nil { - return fc.cachedData, fc.cachedResourceTypes, fc.cachedResources, fc.cachedEntitlements, nil + return fc.cachedData, fc.cachedResourceTypes, fc.cachedResources, fc.cachedEntitlements, fc.cachedChildTypes, nil } l.Info("Loading and caching file data", zap.String("input_file_path", fc.inputFilePath)) @@ -65,30 +66,34 @@ func (fc *FileConnector) getCachedData(ctx context.Context) (*LoadedData, map[st // Load file data loadedData, err := LoadFileData(fc.inputFilePath) if err != nil { - return nil, nil, nil, nil, fmt.Errorf("failed to load data file: %w", err) + return nil, nil, nil, nil, nil, fmt.Errorf("failed to load data file: %w", err) } // Build all caches resourceTypesCache, err := buildResourceTypeCache(ctx, loadedData.Resources, loadedData.Users) if err != nil { - return nil, nil, nil, nil, fmt.Errorf("failed to build resource type cache: %w", err) + return nil, nil, nil, nil, nil, fmt.Errorf("failed to build resource type cache: %w", err) } resourceCache, err := buildResourceCache(ctx, loadedData.Users, loadedData.Resources, resourceTypesCache) if err != nil { - return nil, nil, nil, nil, fmt.Errorf("failed to build resource cache: %w", err) + return nil, nil, nil, nil, nil, fmt.Errorf("failed to build resource cache: %w", err) } entitlementCache, err := buildEntitlementCache(ctx, loadedData.Entitlements, resourceCache) if err != nil { - return nil, nil, nil, nil, fmt.Errorf("failed to build entitlement cache: %w", err) + return nil, nil, nil, nil, nil, fmt.Errorf("failed to build entitlement cache: %w", err) } + // Build child types index to optimize child type lookups + childTypesIndex := buildChildTypesIndex(ctx, loadedData.Resources, resourceCache) + // Store in cache fc.cachedData = loadedData fc.cachedResourceTypes = resourceTypesCache fc.cachedResources = resourceCache fc.cachedEntitlements = entitlementCache + fc.cachedChildTypes = childTypesIndex l.Info("Successfully cached file data", zap.Int("users", len(loadedData.Users)), @@ -96,7 +101,7 @@ func (fc *FileConnector) getCachedData(ctx context.Context) (*LoadedData, map[st zap.Int("entitlements", len(loadedData.Entitlements)), zap.Int("grants", len(loadedData.Grants))) - return loadedData, resourceTypesCache, resourceCache, entitlementCache, nil + return loadedData, resourceTypesCache, resourceCache, entitlementCache, childTypesIndex, nil } // ResourceSyncers returns a list of syncers for the connector. @@ -107,7 +112,7 @@ func (fc *FileConnector) ResourceSyncers(ctx context.Context) []connectorbuilder l := ctxzap.Extract(ctx) l.Info("ResourceSyncers method called", zap.String("input_file_path", fc.inputFilePath)) - _, resourceTypesCache, _, _, err := fc.getCachedData(ctx) + _, resourceTypesCache, _, _, _, err := fc.getCachedData(ctx) if err != nil { l.Error("Failed to load and cache data", zap.Error(err)) return nil diff --git a/pkg/connector/models.go b/pkg/connector/models.go index 7631efe2..9408b269 100644 --- a/pkg/connector/models.go +++ b/pkg/connector/models.go @@ -24,6 +24,7 @@ type FileConnector struct { cachedResourceTypes map[string]*v2.ResourceType cachedResources map[string]*v2.Resource cachedEntitlements map[string]*v2.Entitlement + cachedChildTypes map[string]map[string]struct{} // parent resource ID -> set of child resource type IDs } // LoadedData holds all the data parsed from the input file. diff --git a/pkg/connector/syncers.go b/pkg/connector/syncers.go index 3390b19b..bc003b32 100644 --- a/pkg/connector/syncers.go +++ b/pkg/connector/syncers.go @@ -5,7 +5,6 @@ import ( "fmt" "sort" "strconv" - "strings" v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2" "github.com/conductorone/baton-sdk/pkg/annotations" @@ -44,7 +43,7 @@ func (fs *fileSyncer) ResourceType(ctx context.Context) *v2.ResourceType { // It implements the List method, required by the connectorbuilder.ResourceSyncer interface. // It uses cached data from the connector, filters for the relevant type, and returns paginated results. func (fs *fileSyncer) List(ctx context.Context, parentResourceID *v2.ResourceId, pToken *pagination.Token) ([]*v2.Resource, string, annotations.Annotations, error) { - loadedData, _, resourceCache, _, err := fs.connector.getCachedData(ctx) + _, _, resourceCache, _, childTypesIndex, err := fs.connector.getCachedData(ctx) if err != nil { return nil, "", nil, fmt.Errorf("List: %w", err) } @@ -97,16 +96,9 @@ func (fs *fileSyncer) List(ctx context.Context, parentResourceID *v2.ResourceId, rv := matchingResources[start:end] + // Use prebuilt child types index for O(1) lookups instead of scanning all resources for _, resource := range rv { - childTypes := make(map[string]struct{}) - for _, possibleChild := range loadedData.Resources { - if possibleChild.ParentResource == resource.Id.Resource { - childTypeId := strings.ToLower(possibleChild.ResourceType) - childTypes[childTypeId] = struct{}{} - } - } - - if len(childTypes) > 0 { + if childTypes, exists := childTypesIndex[resource.Id.Resource]; exists && len(childTypes) > 0 { annos := annotations.Annotations(resource.Annotations) for childTypeId := range childTypes { childAnno := &v2.ChildResourceType{ResourceTypeId: childTypeId} @@ -131,7 +123,7 @@ func (fs *fileSyncer) List(ctx context.Context, parentResourceID *v2.ResourceId, // It implements the Entitlements method, required by the connectorbuilder.ResourceSyncer interface. // It uses cached data from the connector, filters for the relevant resource, and returns paginated results. func (fs *fileSyncer) Entitlements(ctx context.Context, resource *v2.Resource, pToken *pagination.Token) ([]*v2.Entitlement, string, annotations.Annotations, error) { - _, _, _, entitlementCache, err := fs.connector.getCachedData(ctx) + _, _, _, entitlementCache, _, err := fs.connector.getCachedData(ctx) if err != nil { return nil, "", nil, fmt.Errorf("Entitlements: %w", err) } @@ -191,7 +183,7 @@ func (fs *fileSyncer) Entitlements(ctx context.Context, resource *v2.Resource, p func (fs *fileSyncer) Grants(ctx context.Context, resource *v2.Resource, pToken *pagination.Token) ([]*v2.Grant, string, annotations.Annotations, error) { l := ctxzap.Extract(ctx) - loadedData, resourceTypesCache, resourceCache, entitlementCache, err := fs.connector.getCachedData(ctx) + loadedData, resourceTypesCache, resourceCache, entitlementCache, _, err := fs.connector.getCachedData(ctx) if err != nil { return nil, "", nil, fmt.Errorf("Grants: %w", err) } From 52f14ec6176b0ce08cc7b300c5a0c0c4bb7dce99 Mon Sep 17 00:00:00 2001 From: Charles Kramer Date: Mon, 12 Jan 2026 23:11:49 -0700 Subject: [PATCH 3/6] Changes Made: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Added sorted index fields in pkg/connector/models.go: - cachedSortedResourcesByType map[string][]*v2.Resource - Pre-sorted resources grouped by type - cachedSortedEntitlementsByRes map[string][]*v2.Entitlement - Pre-sorted entitlements grouped by resource 2. Created sorting functions in pkg/connector/cache.go: - buildSortedResourcesByType() - Groups resources by type and sorts each group once - buildSortedEntitlementsByResource() - Groups entitlements by resource and sorts each group once 3. Updated getCachedData() in pkg/connector/connector.go: - Calls the sorting functions during cache building - Stores sorted indexes in the connector cache 4. Optimized List() method in pkg/connector/syncers.go: - Retrieves pre-sorted resources from cached index - Only filters by parent if needed (no type filtering or sorting) - Eliminated O(n log n) sort on every page request 5. Optimized Entitlements() method in pkg/connector/syncers.go: - Retrieves pre-sorted entitlements from cached index - No filtering or sorting needed - direct lookup - Eliminated O(n log n) sort on every page request Note: Grants() method still sorts on each request because grants are filtered dynamically by context (principal or target). Pre-building a sorted index for grants would be complex and may not provide significant benefit given their filtering requirements. Performance Impact: Before: - List(): Filter all resources + sort all matching resources on every page (20× for 1000 resources with page size 50) - Entitlements(): Filter all entitlements + sort all matching entitlements on every page - Total: ~20 × O(n log n) operations per sync After: - List(): O(1) map lookup + O(m) parent filter where m = resources of this type (no sorting) - Entitlements(): O(1) map lookup (no filtering or sorting) - Sorting happens once during cache building: O(n log n) total Estimated speedup: 10-20x faster for pagination scenarios with multiple pages. --- pkg/connector/cache.go | 55 ++++++++++++++++++++++++++++++++ pkg/connector/connector.go | 6 ++++ pkg/connector/models.go | 3 ++ pkg/connector/syncers.go | 64 +++++++++++++++++++++----------------- 4 files changed, 99 insertions(+), 29 deletions(-) diff --git a/pkg/connector/cache.go b/pkg/connector/cache.go index 40c5348b..b0a4c557 100644 --- a/pkg/connector/cache.go +++ b/pkg/connector/cache.go @@ -3,6 +3,7 @@ package connector import ( "context" "fmt" + "sort" "strings" "time" @@ -348,3 +349,57 @@ func buildChildTypesIndex(ctx context.Context, resourceData []ResourceData, reso l.Debug("Built child types index", zap.Int("parents_with_children", len(index))) return index } + +// buildSortedResourcesByType creates pre-sorted lists of resources grouped by type. +// This eliminates the need to filter and sort on every pagination request. +func buildSortedResourcesByType(ctx context.Context, resourceCache map[string]*v2.Resource) map[string][]*v2.Resource { + l := ctxzap.Extract(ctx) + index := make(map[string][]*v2.Resource) + + // Group resources by type + for _, res := range resourceCache { + typeId := res.Id.ResourceType + index[typeId] = append(index[typeId], res) + } + + // Sort each type's list + totalResources := 0 + for _, resources := range index { + sort.SliceStable(resources, func(i, j int) bool { + return resources[i].Id.Resource < resources[j].Id.Resource + }) + totalResources += len(resources) + } + + l.Debug("Built sorted resources by type index", + zap.Int("resource_types", len(index)), + zap.Int("total_resources", totalResources)) + return index +} + +// buildSortedEntitlementsByResource creates pre-sorted lists of entitlements grouped by resource. +// This eliminates the need to filter and sort on every pagination request. +func buildSortedEntitlementsByResource(ctx context.Context, entitlementCache map[string]*v2.Entitlement) map[string][]*v2.Entitlement { + l := ctxzap.Extract(ctx) + index := make(map[string][]*v2.Entitlement) + + // Group entitlements by resource + for _, ent := range entitlementCache { + resourceId := ent.Resource.Id.Resource + index[resourceId] = append(index[resourceId], ent) + } + + // Sort each resource's entitlement list + totalEntitlements := 0 + for _, entitlements := range index { + sort.SliceStable(entitlements, func(i, j int) bool { + return entitlements[i].Slug < entitlements[j].Slug + }) + totalEntitlements += len(entitlements) + } + + l.Debug("Built sorted entitlements by resource index", + zap.Int("resources_with_entitlements", len(index)), + zap.Int("total_entitlements", totalEntitlements)) + return index +} diff --git a/pkg/connector/connector.go b/pkg/connector/connector.go index 3b1626cf..7f1eb518 100644 --- a/pkg/connector/connector.go +++ b/pkg/connector/connector.go @@ -88,12 +88,18 @@ func (fc *FileConnector) getCachedData(ctx context.Context) (*LoadedData, map[st // Build child types index to optimize child type lookups childTypesIndex := buildChildTypesIndex(ctx, loadedData.Resources, resourceCache) + // Build sorted indexes to eliminate sorting on every pagination request + sortedResourcesByType := buildSortedResourcesByType(ctx, resourceCache) + sortedEntitlementsByResource := buildSortedEntitlementsByResource(ctx, entitlementCache) + // Store in cache fc.cachedData = loadedData fc.cachedResourceTypes = resourceTypesCache fc.cachedResources = resourceCache fc.cachedEntitlements = entitlementCache fc.cachedChildTypes = childTypesIndex + fc.cachedSortedResourcesByType = sortedResourcesByType + fc.cachedSortedEntitlementsByRes = sortedEntitlementsByResource l.Info("Successfully cached file data", zap.Int("users", len(loadedData.Users)), diff --git a/pkg/connector/models.go b/pkg/connector/models.go index 9408b269..8492eb50 100644 --- a/pkg/connector/models.go +++ b/pkg/connector/models.go @@ -25,6 +25,9 @@ type FileConnector struct { cachedResources map[string]*v2.Resource cachedEntitlements map[string]*v2.Entitlement cachedChildTypes map[string]map[string]struct{} // parent resource ID -> set of child resource type IDs + // Pre-sorted indexes to eliminate sorting on every page request + cachedSortedResourcesByType map[string][]*v2.Resource // resource type ID -> sorted list of resources + cachedSortedEntitlementsByRes map[string][]*v2.Entitlement // resource ID -> sorted list of entitlements } // LoadedData holds all the data parsed from the input file. diff --git a/pkg/connector/syncers.go b/pkg/connector/syncers.go index bc003b32..4380bfac 100644 --- a/pkg/connector/syncers.go +++ b/pkg/connector/syncers.go @@ -41,33 +41,40 @@ func (fs *fileSyncer) ResourceType(ctx context.Context) *v2.ResourceType { // List method retrieves a paginated list of resources for the syncer's type. // It implements the List method, required by the connectorbuilder.ResourceSyncer interface. -// It uses cached data from the connector, filters for the relevant type, and returns paginated results. +// It uses pre-sorted cached data from the connector and returns paginated results. func (fs *fileSyncer) List(ctx context.Context, parentResourceID *v2.ResourceId, pToken *pagination.Token) ([]*v2.Resource, string, annotations.Annotations, error) { - _, _, resourceCache, _, childTypesIndex, err := fs.connector.getCachedData(ctx) + _, _, _, _, childTypesIndex, err := fs.connector.getCachedData(ctx) if err != nil { return nil, "", nil, fmt.Errorf("List: %w", err) } - matchingResources := make([]*v2.Resource, 0) - for _, res := range resourceCache { - if res.Id.ResourceType != fs.resourceType.Id { - continue - } - if parentResourceID != nil { - if res.ParentResourceId == nil || res.ParentResourceId.ResourceType != parentResourceID.ResourceType || res.ParentResourceId.Resource != parentResourceID.Resource { - continue + // Get pre-sorted resources for this type (already filtered and sorted during cache building) + fs.connector.cacheMutex.RLock() + sortedResources := fs.connector.cachedSortedResourcesByType[fs.resourceType.Id] + fs.connector.cacheMutex.RUnlock() + + // Filter by parent if needed + var matchingResources []*v2.Resource + if parentResourceID != nil { + // Need to filter by parent + matchingResources = make([]*v2.Resource, 0) + for _, res := range sortedResources { + if res.ParentResourceId != nil && + res.ParentResourceId.ResourceType == parentResourceID.ResourceType && + res.ParentResourceId.Resource == parentResourceID.Resource { + matchingResources = append(matchingResources, res) } - } else { - if res.ParentResourceId != nil { - continue + } + } else { + // Only top-level resources (no parent) + matchingResources = make([]*v2.Resource, 0) + for _, res := range sortedResources { + if res.ParentResourceId == nil { + matchingResources = append(matchingResources, res) } } - matchingResources = append(matchingResources, res) } - - sort.SliceStable(matchingResources, func(i, j int) bool { - return matchingResources[i].Id.Resource < matchingResources[j].Id.Resource - }) + // No sorting needed - already sorted in the cached index pageSize := 50 bag := &pagination.Bag{} @@ -121,23 +128,22 @@ func (fs *fileSyncer) List(ctx context.Context, parentResourceID *v2.ResourceId, // Entitlements method retrieves a paginated list of entitlements for the syncer's type. // It implements the Entitlements method, required by the connectorbuilder.ResourceSyncer interface. -// It uses cached data from the connector, filters for the relevant resource, and returns paginated results. +// It uses pre-sorted cached data from the connector and returns paginated results. func (fs *fileSyncer) Entitlements(ctx context.Context, resource *v2.Resource, pToken *pagination.Token) ([]*v2.Entitlement, string, annotations.Annotations, error) { - _, _, _, entitlementCache, _, err := fs.connector.getCachedData(ctx) + _, _, _, _, _, err := fs.connector.getCachedData(ctx) if err != nil { return nil, "", nil, fmt.Errorf("Entitlements: %w", err) } - matchingEntitlements := make([]*v2.Entitlement, 0) - for _, ent := range entitlementCache { - if ent.Resource.Id.ResourceType == resource.Id.ResourceType && ent.Resource.Id.Resource == resource.Id.Resource { - matchingEntitlements = append(matchingEntitlements, ent) - } - } + // Get pre-sorted entitlements for this resource (already filtered and sorted during cache building) + fs.connector.cacheMutex.RLock() + matchingEntitlements := fs.connector.cachedSortedEntitlementsByRes[resource.Id.Resource] + fs.connector.cacheMutex.RUnlock() - sort.SliceStable(matchingEntitlements, func(i, j int) bool { - return matchingEntitlements[i].Slug < matchingEntitlements[j].Slug - }) + // matchingEntitlements is already sorted, no need to sort again + if matchingEntitlements == nil { + matchingEntitlements = []*v2.Entitlement{} // Return empty list if no entitlements for this resource + } pageSize := 50 bag := &pagination.Bag{} From 8ff7e3b82df0320896ad04fc34949c5036942476 Mon Sep 17 00:00:00 2001 From: Charles Kramer Date: Mon, 12 Jan 2026 23:16:19 -0700 Subject: [PATCH 4/6] 1. Added constant in pkg/connector/syncers.go: - defaultPageSize = 200 - Centralized constant replacing hardcoded values - Documented reasoning: all data is in-memory and already pre-sorted/filtered 2. Updated all three syncer methods: - List(): Changed pageSize := 50 to pageSize := defaultPageSize - Entitlements(): Changed pageSize := 50 to pageSize := defaultPageSize - Grants(): Changed pageSize := 50 to pageSize := defaultPageSize Performance Impact: For 1,000 resources: - Before (page size 50): 20 pagination calls needed - After (page size 200): 5 pagination calls needed - Reduction: 4x fewer API round trips Combined with our previous optimizations: - Each pagination call is now 10-20x faster (no redundant file reads, no sorting) - 4x fewer pagination calls (larger page size) - Total estimated speedup: 40-80x for pagination scenarios Why Page Size 200? - All data is in-memory (no I/O concerns) - Pre-sorted and pre-filtered (minimal processing per request) - No external API rate limits - Balances memory usage with performance - Easy to adjust via the constant if needed --- pkg/connector/syncers.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pkg/connector/syncers.go b/pkg/connector/syncers.go index 4380bfac..6fb192ae 100644 --- a/pkg/connector/syncers.go +++ b/pkg/connector/syncers.go @@ -14,6 +14,13 @@ import ( "go.uber.org/zap" ) +const ( + // defaultPageSize is the number of items returned per pagination request. + // Increased from 50 to 200 for better performance since all data is in-memory + // and already pre-sorted/filtered during cache building. + defaultPageSize = 200 +) + // fileSyncer implements the ResourceSyncer interface for a specific resource type. // It holds a reference to the resource type it handles and the parent FileConnector. // Data loading and caching is performed once by the connector and reused across all syncer calls. @@ -76,7 +83,7 @@ func (fs *fileSyncer) List(ctx context.Context, parentResourceID *v2.ResourceId, } // No sorting needed - already sorted in the cached index - pageSize := 50 + pageSize := defaultPageSize bag := &pagination.Bag{} err = bag.Unmarshal(pToken.Token) if err != nil { @@ -145,7 +152,7 @@ func (fs *fileSyncer) Entitlements(ctx context.Context, resource *v2.Resource, p matchingEntitlements = []*v2.Entitlement{} // Return empty list if no entitlements for this resource } - pageSize := 50 + pageSize := defaultPageSize bag := &pagination.Bag{} err = bag.Unmarshal(pToken.Token) if err != nil { @@ -255,7 +262,7 @@ func (fs *fileSyncer) Grants(ctx context.Context, resource *v2.Resource, pToken return matchingGrants[i].Entitlement.Id < matchingGrants[j].Entitlement.Id }) - pageSize := 50 + pageSize := defaultPageSize bag := &pagination.Bag{} err = bag.Unmarshal(pToken.Token) if err != nil { From 76a894500a0e8a4882c7986ff67c19194c7cf88d Mon Sep 17 00:00:00 2001 From: Charles Kramer Date: Tue, 13 Jan 2026 16:03:27 -0700 Subject: [PATCH 5/6] Add cache invalidation between syncs --- pkg/connector/connector.go | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/pkg/connector/connector.go b/pkg/connector/connector.go index 7f1eb518..1c6781d2 100644 --- a/pkg/connector/connector.go +++ b/pkg/connector/connector.go @@ -34,8 +34,24 @@ func (fc *FileConnector) Validate(ctx context.Context) (annotations.Annotations, return nil, nil } +// clearCache invalidates all cached data, forcing a fresh load on the next getCachedData call. +// This is called at the start of each sync operation to ensure fresh data between syncs. +func (fc *FileConnector) clearCache() { + fc.cacheMutex.Lock() + defer fc.cacheMutex.Unlock() + + fc.cachedData = nil + fc.cachedResourceTypes = nil + fc.cachedResources = nil + fc.cachedEntitlements = nil + fc.cachedChildTypes = nil + fc.cachedSortedResourcesByType = nil + fc.cachedSortedEntitlementsByRes = nil +} + // getCachedData returns cached file data and built caches, loading them if not already cached. -// This method is thread-safe and ensures the file is only loaded and processed once. +// This method is thread-safe and ensures the file is only loaded and processed once per sync. +// Cache is cleared between syncs via clearCache() to pick up file changes. func (fc *FileConnector) getCachedData(ctx context.Context) (*LoadedData, map[string]*v2.ResourceType, map[string]*v2.Resource, map[string]*v2.Entitlement, map[string]map[string]struct{}, error) { l := ctxzap.Extract(ctx) @@ -114,10 +130,14 @@ func (fc *FileConnector) getCachedData(ctx context.Context) (*LoadedData, map[st // function is required by the connectorbuilder.Connector interface. // It determines resource types from the input file and creates a syncer instance for each type, enabling the SDK to sync them. // implementation loads minimal data to find resource types, builds the type cache, and creates simple syncers passing the connector reference. +// Cache is cleared at the start of each sync to ensure fresh data between syncs. func (fc *FileConnector) ResourceSyncers(ctx context.Context) []connectorbuilder.ResourceSyncer { l := ctxzap.Extract(ctx) l.Info("ResourceSyncers method called", zap.String("input_file_path", fc.inputFilePath)) + // Clear cache at the start of each sync operation to pick up file changes + fc.clearCache() + _, resourceTypesCache, _, _, _, err := fc.getCachedData(ctx) if err != nil { l.Error("Failed to load and cache data", zap.Error(err)) From 5f9726827e97d7e8c78b6d98e958ef3004d42b25 Mon Sep 17 00:00:00 2001 From: Charles Kramer Date: Wed, 14 Jan 2026 10:42:13 -0700 Subject: [PATCH 6/6] cache invalidation unnecessary, handled by baton-sdk --- pkg/connector/connector.go | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/pkg/connector/connector.go b/pkg/connector/connector.go index 1c6781d2..9e153649 100644 --- a/pkg/connector/connector.go +++ b/pkg/connector/connector.go @@ -34,24 +34,9 @@ func (fc *FileConnector) Validate(ctx context.Context) (annotations.Annotations, return nil, nil } -// clearCache invalidates all cached data, forcing a fresh load on the next getCachedData call. -// This is called at the start of each sync operation to ensure fresh data between syncs. -func (fc *FileConnector) clearCache() { - fc.cacheMutex.Lock() - defer fc.cacheMutex.Unlock() - - fc.cachedData = nil - fc.cachedResourceTypes = nil - fc.cachedResources = nil - fc.cachedEntitlements = nil - fc.cachedChildTypes = nil - fc.cachedSortedResourcesByType = nil - fc.cachedSortedEntitlementsByRes = nil -} - // getCachedData returns cached file data and built caches, loading them if not already cached. // This method is thread-safe and ensures the file is only loaded and processed once per sync. -// Cache is cleared between syncs via clearCache() to pick up file changes. +// The SDK's session cache mechanism ensures fresh data between syncs automatically. func (fc *FileConnector) getCachedData(ctx context.Context) (*LoadedData, map[string]*v2.ResourceType, map[string]*v2.Resource, map[string]*v2.Entitlement, map[string]map[string]struct{}, error) { l := ctxzap.Extract(ctx) @@ -130,14 +115,10 @@ func (fc *FileConnector) getCachedData(ctx context.Context) (*LoadedData, map[st // function is required by the connectorbuilder.Connector interface. // It determines resource types from the input file and creates a syncer instance for each type, enabling the SDK to sync them. // implementation loads minimal data to find resource types, builds the type cache, and creates simple syncers passing the connector reference. -// Cache is cleared at the start of each sync to ensure fresh data between syncs. func (fc *FileConnector) ResourceSyncers(ctx context.Context) []connectorbuilder.ResourceSyncer { l := ctxzap.Extract(ctx) l.Info("ResourceSyncers method called", zap.String("input_file_path", fc.inputFilePath)) - // Clear cache at the start of each sync operation to pick up file changes - fc.clearCache() - _, resourceTypesCache, _, _, _, err := fc.getCachedData(ctx) if err != nil { l.Error("Failed to load and cache data", zap.Error(err))