Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions pkg/dotc1z/c1file.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package dotc1z
import (
"context"
"database/sql"
"errors"
"fmt"
"math"
"os"
"path/filepath"
"sync"
Expand All @@ -21,6 +23,7 @@ import (

v2 "github.com/conductorone/baton-sdk/pb/c1/connector/v2"
"github.com/conductorone/baton-sdk/pkg/connectorstore"
"github.com/conductorone/baton-sdk/pkg/progress"
)

type pragma struct {
Expand Down Expand Up @@ -209,6 +212,102 @@ func (c *C1File) init(ctx context.Context) error {
return nil
}

func (c *C1File) ProgressCounts(ctx context.Context, syncID string) (*progress.ProgressCounts, error) {
ctx, span := tracer.Start(ctx, "C1File.ProgressCounts")
defer span.End()

// Check if sync ID exists
_, err := c.getSync(ctx, syncID)
if err != nil {
return nil, err
}

counts := progress.NewProgressCounts()

var rtStats []*v2.ResourceType
pageToken := ""
for {
resp, err := c.ListResourceTypes(ctx, &v2.ResourceTypesServiceListResourceTypesRequest{PageToken: pageToken})
if err != nil {
return nil, err
}

rtStats = append(rtStats, resp.List...)

if resp.NextPageToken == "" {
break
}

pageToken = resp.NextPageToken
}
counts.ResourceTypes = len(rtStats)
for _, rt := range rtStats {
resourceCount, err := c.db.From(resources.Name()).
Where(goqu.C("resource_type_id").Eq(rt.Id)).
Where(goqu.C("sync_id").Eq(syncID)).
CountContext(ctx)
if err != nil {
return nil, err
}
if resourceCount > math.MaxInt {
return nil, fmt.Errorf("resource count for %s is too large", rt.Id)
}
counts.Resources[rt.Id] = int(resourceCount)

query, args, err := c.db.From(entitlements.Name()).
Select(goqu.COUNT(goqu.DISTINCT(goqu.C("resource_id")))).
Where(goqu.C("resource_type_id").Eq(rt.Id)).
Where(goqu.C("sync_id").Eq(syncID)).
ToSQL()
if err != nil {
return nil, err
}

row := c.db.QueryRowContext(ctx, query, args...)
if row == nil {
continue
}
var entitlementsCount int64
err = row.Scan(&entitlementsCount)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
continue
}
return nil, err
}
counts.EntitlementsProgress[rt.Id] = int(entitlementsCount)

query, args, err = c.db.From(grants.Name()).
Select(goqu.COUNT(goqu.DISTINCT(goqu.C("resource_id")))).
Where(goqu.C("resource_type_id").Eq(rt.Id)).
Where(goqu.C("sync_id").Eq(syncID)).
ToSQL()
if err != nil {
return nil, err
}

row = c.db.QueryRowContext(ctx, query, args...)
if row == nil {
continue
}
var grantsCount int64
err = row.Scan(&grantsCount)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
continue
}
return nil, err
}

if grantsCount > math.MaxInt {
return nil, fmt.Errorf("grants count for %s is too large", rt.Id)
}
counts.GrantsProgress[rt.Id] = int(grantsCount)
}

return counts, nil
}
Comment on lines +215 to +309
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Verification agent

🧩 Analysis chain

Potential overflow and repeated queries.

  1. Storing large DB counts (int64) in an int field can lead to overflow on 32-bit systems if counts exceed 2,147,483,647. Consider refactoring the ProgressCounts struct to use int64 instead of int where feasible.
  2. The logic for counting entitlements and grants repeats similar queries. You could optimize by consolidating them (for example, in a single grouped query) or by modularizing the repeated code to reduce duplication.
- counts.Resources[rt.Id] = int(resourceCount)
- ...
- counts.GrantsProgress[rt.Id] = int(grantsCount)
+ counts.Resources[rt.Id] = resourceCount
+ ...
+ counts.GrantsProgress[rt.Id] = grantsCount

I'll begin by gathering additional context regarding the definition of the ProgressCounts struct and its initialization via the NewProgressCounts function to verify the potential overflow issue mentioned. This will help us confirm whether the struct fields are defined as int or int64, and if the repeated query logic can be optimized.

Below is a shell script to search for these definitions in the codebase:


🏁 Script executed:

#!/bin/bash
# Search for the definition of ProgressCounts struct
echo "Searching for ProgressCounts declaration:"
rg "type ProgressCounts" -n .

# Search for the NewProgressCounts function implementation
echo "Searching for NewProgressCounts function:"
rg "func NewProgressCounts" -n .

Length of output: 384


I'll now gather more details about the field types within the ProgressCounts struct to verify if the overflow concern is valid. Specifically, we need to check whether fields like Resources, GrantsProgress, and EntitlementsProgress are declared as type int (which may cause overflows on 32-bit systems) or as int64.

Below is a shell script to inspect the definition of ProgressCounts in the pkg/progress/progress.go file:


🏁 Script executed:

#!/bin/bash
echo "Inspecting ProgressCounts struct details:"
rg -C10 "type ProgressCounts struct" -n pkg/progress/progress.go

Length of output: 887


Action Required: Refactor Integer Types and Consolidate Query Logic

  • The ProgressCounts struct (in pkg/progress/progress.go) defines count fields as int. Given that the database queries return int64, casting with int(...) risks overflow on 32-bit systems—even though there’s a check against math.MaxInt. Consider refactoring these fields (and the corresponding initialization in NewProgressCounts) to use int64 to safely handle larger numbers.
  • The SQL query logic for counting entitlements and grants is duplicated. Consolidating these similar queries into a common helper or grouped query would improve maintainability and reduce redundancy.
- counts.Resources[rt.Id] = int(resourceCount)
- ...
- counts.GrantsProgress[rt.Id] = int(grantsCount)
+ counts.Resources[rt.Id] = resourceCount
+ ...
+ counts.GrantsProgress[rt.Id] = grantsCount

Please address these concerns to ensure robustness and cleaner code.

Committable suggestion skipped: line range outside the PR's diff.


// Stats introspects the database and returns the count of objects for the given sync run.
func (c *C1File) Stats(ctx context.Context) (map[string]int64, error) {
ctx, span := tracer.Start(ctx, "C1File.Stats")
Expand Down
144 changes: 144 additions & 0 deletions pkg/progress/progress.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package progress

import (
"context"
"time"

"github.com/conductorone/baton-sdk/pkg/sync/expand"
"github.com/grpc-ecosystem/go-grpc-middleware/logging/zap/ctxzap"
"go.uber.org/zap"
)

type ProgressCounts struct {
ResourceTypes int // count of resource types
Resources map[string]int // map of resource type id to resource count
EntitlementsProgress map[string]int // map of resource type id to entitlement count
LastEntitlementLog map[string]time.Time
GrantsProgress map[string]int // map of resource type id to grant count
LastGrantLog map[string]time.Time
LastActionLog time.Time
}
Comment on lines +12 to +20
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Consider using int64 for counts.

Storing counts in the int type can risk overflow on 32-bit builds. Using int64 for fields like ResourceTypes, Resources, EntitlementsProgress, and GrantsProgress is safer if large values are possible.

- type ProgressCounts struct {
-     ResourceTypes        int
-     Resources            map[string]int
-     EntitlementsProgress map[string]int
-     ...
- }
+ type ProgressCounts struct {
+     ResourceTypes        int64
+     Resources            map[string]int64
+     EntitlementsProgress map[string]int64
+     ...
+ }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
type ProgressCounts struct {
ResourceTypes int // count of resource types
Resources map[string]int // map of resource type id to resource count
EntitlementsProgress map[string]int // map of resource type id to entitlement count
LastEntitlementLog map[string]time.Time
GrantsProgress map[string]int // map of resource type id to grant count
LastGrantLog map[string]time.Time
LastActionLog time.Time
}
type ProgressCounts struct {
ResourceTypes int64 // count of resource types
Resources map[string]int64 // map of resource type id to resource count
EntitlementsProgress map[string]int64 // map of resource type id to entitlement count
LastEntitlementLog map[string]time.Time
GrantsProgress map[string]int64 // map of resource type id to grant count
LastGrantLog map[string]time.Time
LastActionLog time.Time
}


const maxLogFrequency = 10 * time.Second

// TODO: use a mutex or a syncmap for when syncer code becomes parallel
func NewProgressCounts() *ProgressCounts {
return &ProgressCounts{
Resources: make(map[string]int),
EntitlementsProgress: make(map[string]int),
LastEntitlementLog: make(map[string]time.Time),
GrantsProgress: make(map[string]int),
LastGrantLog: make(map[string]time.Time),
LastActionLog: time.Time{},
}
}

func (p *ProgressCounts) LogResourceTypesProgress(ctx context.Context) {
l := ctxzap.Extract(ctx)
l.Info("Synced resource types", zap.Int("count", p.ResourceTypes))
}

func (p *ProgressCounts) LogResourcesProgress(ctx context.Context, resourceType string) {
l := ctxzap.Extract(ctx)
resources := p.Resources[resourceType]
l.Info("Synced resources", zap.String("resource_type_id", resourceType), zap.Int("count", resources))
}

func (p *ProgressCounts) LogEntitlementsProgress(ctx context.Context, resourceType string) {
entitlementsProgress := p.EntitlementsProgress[resourceType]
resources := p.Resources[resourceType]

l := ctxzap.Extract(ctx)
if resources == 0 {
// if resuming sync, resource counts will be zero, so don't calculate percentage. just log every 10 seconds.
if time.Since(p.LastEntitlementLog[resourceType]) > maxLogFrequency {
l.Info("Syncing entitlements",
zap.String("resource_type_id", resourceType),
zap.Int("synced", entitlementsProgress),
)
p.LastEntitlementLog[resourceType] = time.Now()
}
return
}

percentComplete := (entitlementsProgress * 100) / resources

switch {
case entitlementsProgress > resources:
l.Error("more entitlement resources than resources",
zap.String("resource_type_id", resourceType),
zap.Int("synced", entitlementsProgress),
zap.Int("total", resources),
)
case percentComplete == 100:
l.Info("Synced entitlements",
zap.String("resource_type_id", resourceType),
zap.Int("count", entitlementsProgress),
zap.Int("total", resources),
)
p.LastEntitlementLog[resourceType] = time.Time{}
case time.Since(p.LastEntitlementLog[resourceType]) > maxLogFrequency:
l.Info("Syncing entitlements",
zap.String("resource_type_id", resourceType),
zap.Int("synced", entitlementsProgress),
zap.Int("total", resources),
zap.Int("percent_complete", percentComplete),
)
p.LastEntitlementLog[resourceType] = time.Now()
}
}

func (p *ProgressCounts) LogGrantsProgress(ctx context.Context, resourceType string) {
grantsProgress := p.GrantsProgress[resourceType]
resources := p.Resources[resourceType]

l := ctxzap.Extract(ctx)
if resources == 0 {
// if resuming sync, resource counts will be zero, so don't calculate percentage. just log every 10 seconds.
if time.Since(p.LastGrantLog[resourceType]) > maxLogFrequency {
l.Info("Syncing grants",
zap.String("resource_type_id", resourceType),
zap.Int("synced", grantsProgress),
)
p.LastGrantLog[resourceType] = time.Now()
}
return
}

percentComplete := (grantsProgress * 100) / resources

switch {
case grantsProgress > resources:
l.Error("more grant resources than resources",
zap.String("resource_type_id", resourceType),
zap.Int("synced", grantsProgress),
zap.Int("total", resources),
)
case percentComplete == 100:
l.Info("Synced grants",
zap.String("resource_type_id", resourceType),
zap.Int("count", grantsProgress),
zap.Int("total", resources),
)
p.LastGrantLog[resourceType] = time.Time{}
case time.Since(p.LastGrantLog[resourceType]) > maxLogFrequency:
l.Info("Syncing grants",
zap.String("resource_type_id", resourceType),
zap.Int("synced", grantsProgress),
zap.Int("total", resources),
zap.Int("percent_complete", percentComplete),
)
p.LastGrantLog[resourceType] = time.Now()
}
}

func (p *ProgressCounts) LogExpandProgress(ctx context.Context, actions []*expand.EntitlementGraphAction) {
actionsLen := len(actions)
if time.Since(p.LastActionLog) < maxLogFrequency {
return
}
p.LastActionLog = time.Now()

l := ctxzap.Extract(ctx)
l.Info("Expanding grants", zap.Int("actions_remaining", actionsLen))
}
Loading
Loading