Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,11 @@ require (
github.com/wasilibs/go-pgquery v0.0.0-20250409022910-10ac41983c07
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0
go.opentelemetry.io/contrib/propagators/b3 v1.20.0
go.opentelemetry.io/contrib/propagators/ot v1.20.0
go.opentelemetry.io/otel v1.43.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0
go.opentelemetry.io/otel/exporters/prometheus v0.62.0
go.opentelemetry.io/otel/sdk v1.43.0
go.opentelemetry.io/otel/sdk/metric v1.43.0
Expand Down Expand Up @@ -252,11 +256,7 @@ require (
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
go.opentelemetry.io/contrib/detectors/gcp v1.43.0 // indirect
go.opentelemetry.io/contrib/propagators/b3 v1.20.0 // indirect
go.opentelemetry.io/contrib/propagators/ot v1.20.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.34.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 // indirect
go.opentelemetry.io/otel/metric v1.43.0 // indirect
go.opentelemetry.io/proto/otlp v1.10.0 // indirect
go.uber.org/automaxprocs v1.6.0 // indirect
Expand Down
20 changes: 13 additions & 7 deletions pkg/cmd/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@

"github.com/fatih/color"
"github.com/jzelinskie/cobrautil/v2"
"github.com/jzelinskie/cobrautil/v2/cobraotel"
"github.com/spf13/cobra"

log "github.com/authzed/spicedb/internal/logging"
"github.com/authzed/spicedb/internal/telemetry"
"github.com/authzed/spicedb/pkg/cmd/datastore"
"github.com/authzed/spicedb/pkg/cmd/server"
Expand Down Expand Up @@ -219,11 +219,8 @@
return fmt.Errorf("could not register stored schema cache flags: %w", err)
}

tracingFlags := nfs.FlagSet(BoldBlue("Tracing"))
// Flags for tracing
// NOTE: cobraotel.New takes service name as an arg rather than command name.
otel := cobraotel.New("spicedb")
otel.RegisterFlags(tracingFlags)
server.RegisterOTelFlags(cmd)

loggingFlagSet := nfs.FlagSet(BoldBlue("Logging"))
loggingFlagSet.BoolVar(&config.EnableRequestLogs, "grpc-log-requests-enabled", false, "enable logging of API request payloads")
Expand Down Expand Up @@ -265,14 +262,23 @@
Long: "start a SpiceDB server",
PreRunE: server.DefaultPreRunE(programName),
RunE: termination.PublishError(func(cmd *cobra.Command, args []string) error {
server, err := config.Complete(cmd.Context())
srv, err := config.Complete(cmd.Context())

Check warning on line 265 in pkg/cmd/serve.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/serve.go#L265

Added line #L265 was not covered by tests
if err != nil {
return err
}
signalctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer stop()

return server.Run(signalctx)
defer func() {
// Shutdown OTel provider to ensure all traces are flushed
if provider := server.OTelProviderFromContext(cmd.Context()); provider != nil {
if err := server.ShutdownOTelProvider(context.Background(), provider); err != nil {
log.Warn().Err(err).Msg("failed to cleanly shutdown OpenTelemetry provider")
}

Check warning on line 277 in pkg/cmd/serve.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/serve.go#L272-L277

Added lines #L272 - L277 were not covered by tests
}
}()

return srv.Run(signalctx)

Check warning on line 281 in pkg/cmd/serve.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/serve.go#L281

Added line #L281 was not covered by tests
}),
Example: server.ServeExample(programName),
}
Expand Down
5 changes: 1 addition & 4 deletions pkg/cmd/server/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
grpclog "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging"
"github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/selector"
"github.com/jzelinskie/cobrautil/v2"
"github.com/jzelinskie/cobrautil/v2/cobraotel"
"github.com/jzelinskie/cobrautil/v2/cobraproclimits"
"github.com/jzelinskie/cobrautil/v2/cobrazerolog"
"github.com/prometheus/client_golang/prometheus"
Expand Down Expand Up @@ -82,9 +81,7 @@ func DefaultPreRunE(programName string) cobrautil.CobraRunFunc {
// and zero under the same load and 0.9
cobraproclimits.SetMemLimitRunE(memlimit.WithRatio(0.9)),
cobraproclimits.SetProcLimitRunE(),
cobraotel.New("spicedb",
cobraotel.WithLogger(zerologr.New(&logging.Logger)),
).RunE(),
OTelPreRunE,
releases.CheckAndLogRunE(),
runtime.RunE(),
)
Expand Down
252 changes: 252 additions & 0 deletions pkg/cmd/server/otel.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
// pkg/cmd/server/otel.go
//
// This file replicates the OpenTelemetry provider initialization previously
// provided by github.com/jzelinskie/cobrautil/v2/cobraotel.
//
// Motivation:
// - Issue #712: otel-provider defaults to "none" so OTEL_* env vars alone
// cannot activate tracing. Native ownership lets us address this properly.
// - Issue #3095: cobraotel owns the TracerProvider with no way for the
// signal handler to call Shutdown/ForceFlush. Traces are dropped on
// SIGTERM. Native ownership closes this lifecycle gap.

package server

import (
"context"
"fmt"
"strings"
"time"

"github.com/spf13/cobra"
"go.opentelemetry.io/contrib/propagators/b3"
"go.opentelemetry.io/contrib/propagators/ot"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.7.0"
)

// otelProviderContextKey is the unexported context key for the TracerProvider.
type otelProviderContextKey struct{}

// OTelShutdownTimeout is the maximum time for flushing spans on shutdown.
const OTelShutdownTimeout = 30 * time.Second

// otelShutdowner is the interface satisfied by *sdktrace.TracerProvider.
// Using a local interface instead of the concrete type makes
// ShutdownOTelProvider testable with a mock without importing sdktrace.
type otelShutdowner interface {
Shutdown(ctx context.Context) error
ForceFlush(ctx context.Context) error
}

// RegisterOTelFlags registers all OpenTelemetry flags on cmd.
// The flags registered here are identical in name and default value to those
// previously registered by cobraotel.RegisterFlags.
func RegisterOTelFlags(cmd *cobra.Command) {
f := cmd.Flags()
f.String("otel-provider", "none",
`OpenTelemetry provider for tracing ("none", "otlpgrpc", "otlphttp")`)
f.String("otel-endpoint", "",
`OpenTelemetry collector endpoint - the endpoint can also be set by using enviroment variables`)
f.String("otel-service-name", "spicedb",
`service name for trace data`)
f.String("otel-trace-propagator", "w3c",
`OpenTelemetry trace propagation format ("b3", "w3c", "ottrace"). Add multiple propagators separated by comma.`)
f.Bool("otel-insecure", false,
`connect to the OpenTelemetry collector in plaintext`)
f.StringToString("otel-headers", nil,
`key=value pairs sent as headers to the OTel provider`)
f.Float64("otel-sample-ratio", 0.01,
`ratio of traces that are sampled`)

// Legacy flags
f.String("otel-jaeger-endpoint", "", "OpenTelemetry collector endpoint - the endpoint can also be set by using enviroment variables")
_ = f.MarkHidden("otel-jaeger-endpoint")
f.String("otel-jaeger-service-name", "spicedb", "service name for trace data")
_ = f.MarkHidden("otel-jaeger-service-name")
}

// InitOTelProvider reads otel-* flags from cmd, builds a TracerProvider, sets
// it as the global OTel provider, and returns it for lifecycle management.
//
// Returns (nil, nil) when otel-provider is "none". Callers must handle nil.
func InitOTelProvider(cmd *cobra.Command) (otelShutdowner, error) {
flags := cmd.Flags()

// cobra commands may have no context when called outside Execute()
ctx := cmd.Context()
if ctx == nil {
ctx = context.Background()
}

Check warning on line 85 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L84-L85

Added lines #L84 - L85 were not covered by tests

providerName, err := flags.GetString("otel-provider")
if err != nil {
return nil, fmt.Errorf("reading otel-provider: %w", err)
}

Check warning on line 90 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L89-L90

Added lines #L89 - L90 were not covered by tests
providerName = strings.TrimSpace(strings.ToLower(providerName))

if providerName == "none" || providerName == "" {
return nil, nil
}

endpoint, err := flags.GetString("otel-endpoint")
if err != nil {
return nil, fmt.Errorf("reading otel-endpoint: %w", err)
}

Check warning on line 100 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L99-L100

Added lines #L99 - L100 were not covered by tests

serviceName, err := flags.GetString("otel-service-name")
if err != nil {
return nil, fmt.Errorf("reading otel-service-name: %w", err)
}

Check warning on line 105 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L104-L105

Added lines #L104 - L105 were not covered by tests

propagatorName, err := flags.GetString("otel-trace-propagator")
if err != nil {
return nil, fmt.Errorf("reading otel-trace-propagator: %w", err)
}

Check warning on line 110 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L109-L110

Added lines #L109 - L110 were not covered by tests

insecureConn, err := flags.GetBool("otel-insecure")
if err != nil {
return nil, fmt.Errorf("reading otel-insecure: %w", err)
}

Check warning on line 115 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L114-L115

Added lines #L114 - L115 were not covered by tests

headers, err := flags.GetStringToString("otel-headers")
if err != nil {
return nil, fmt.Errorf("reading otel-headers: %w", err)
}

Check warning on line 120 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L119-L120

Added lines #L119 - L120 were not covered by tests

sampleRatio, err := flags.GetFloat64("otel-sample-ratio")
if err != nil {
return nil, fmt.Errorf("reading otel-sample-ratio: %w", err)
}

Check warning on line 125 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L124-L125

Added lines #L124 - L125 were not covered by tests

res, err := resource.New(ctx,
resource.WithAttributes(semconv.ServiceNameKey.String(serviceName)),
resource.WithProcess(),
resource.WithOS(),
resource.WithHost(),
)
if err != nil {
return nil, fmt.Errorf("building OTel resource: %w", err)
}

Check warning on line 135 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L134-L135

Added lines #L134 - L135 were not covered by tests

var exporter sdktrace.SpanExporter

switch providerName {
case "otlpgrpc":
opts := []otlptracegrpc.Option{}
if endpoint != "" {
opts = append(opts, otlptracegrpc.WithEndpoint(endpoint))
}
if insecureConn {
opts = append(opts, otlptracegrpc.WithInsecure())
}
if len(headers) > 0 {
opts = append(opts, otlptracegrpc.WithHeaders(headers))
}

Check warning on line 150 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L149-L150

Added lines #L149 - L150 were not covered by tests
exp, err := otlptracegrpc.New(ctx, opts...)
if err != nil {
return nil, fmt.Errorf("creating otlpgrpc exporter: %w", err)
}

Check warning on line 154 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L153-L154

Added lines #L153 - L154 were not covered by tests
exporter = exp

case "otlphttp":
opts := []otlptracehttp.Option{}
if endpoint != "" {
opts = append(opts, otlptracehttp.WithEndpoint(endpoint))
}
if insecureConn {
opts = append(opts, otlptracehttp.WithInsecure())
}
if len(headers) > 0 {
opts = append(opts, otlptracehttp.WithHeaders(headers))
}

Check warning on line 167 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L166-L167

Added lines #L166 - L167 were not covered by tests
exp, err := otlptracehttp.New(ctx, opts...)
if err != nil {
return nil, fmt.Errorf("creating otlphttp exporter: %w", err)
}

Check warning on line 171 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L170-L171

Added lines #L170 - L171 were not covered by tests
exporter = exp

default:
return nil, fmt.Errorf(
"unknown otel-provider %q: must be one of: none, otlpgrpc, otlphttp",
providerName,
)
}

tp := sdktrace.NewTracerProvider(
sdktrace.WithSampler(sdktrace.ParentBased(sdktrace.TraceIDRatioBased(sampleRatio))),
sdktrace.WithBatcher(exporter),
sdktrace.WithResource(res),
)

otel.SetTracerProvider(tp)
otel.SetTextMapPropagator(buildPropagator(propagatorName))

return tp, nil
}

// ShutdownOTelProvider flushes all pending spans then shuts the provider down.
// ForceFlush is always called before Shutdown. Safe to call with nil (no-op).
func ShutdownOTelProvider(ctx context.Context, provider otelShutdowner) error {
if provider == nil {
return nil
}

flushCtx, flushCancel := context.WithTimeout(ctx, OTelShutdownTimeout)
defer flushCancel()
if err := provider.ForceFlush(flushCtx); err != nil {
// Log but continue — Shutdown must still be attempted.
fmt.Printf("otel: ForceFlush error (continuing to Shutdown): %v\n", err)
}

shutCtx, shutCancel := context.WithTimeout(ctx, OTelShutdownTimeout)
defer shutCancel()
return provider.Shutdown(shutCtx)
}

// OTelPreRunE is a cobra.PreRunE function that initializes the OTel provider
// and stores it on the command context so the shutdown handler can retrieve it.
func OTelPreRunE(cmd *cobra.Command, _ []string) error {
provider, err := InitOTelProvider(cmd)
if err != nil {
return fmt.Errorf("initializing OTel provider: %w", err)
}

Check warning on line 218 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L217-L218

Added lines #L217 - L218 were not covered by tests
parent := cmd.Context()
if parent == nil {
parent = context.Background()
}

Check warning on line 222 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L221-L222

Added lines #L221 - L222 were not covered by tests
ctx := context.WithValue(parent, otelProviderContextKey{}, provider)
cmd.SetContext(ctx)
return nil
}

// OTelProviderFromContext retrieves the otelShutdowner stored by OTelPreRunE.
// Returns nil if no provider was initialized (e.g. otel-provider was "none").
func OTelProviderFromContext(ctx context.Context) otelShutdowner {
v, _ := ctx.Value(otelProviderContextKey{}).(otelShutdowner)
return v
}

// buildPropagator returns the TextMapPropagator for the given name.
func buildPropagator(names string) propagation.TextMapPropagator {
var tmPropagators []propagation.TextMapPropagator
for _, p := range strings.Split(names, ",") {
switch strings.ToLower(strings.TrimSpace(p)) {
case "b3":
tmPropagators = append(tmPropagators, b3.New())
case "ottrace":
tmPropagators = append(tmPropagators, ot.OT{})

Check warning on line 243 in pkg/cmd/server/otel.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/otel.go#L240-L243

Added lines #L240 - L243 were not covered by tests
case "w3c":
fallthrough
default:
tmPropagators = append(tmPropagators, propagation.Baggage{})
tmPropagators = append(tmPropagators, propagation.TraceContext{})
}
}
return propagation.NewCompositeTextMapPropagator(tmPropagators...)
}
Loading
Loading