diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 98ad6c5..ca61ec8 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -5,7 +5,7 @@ description: A Gradle Build Cache server with Redis backend for Theia IDE deploy type: application # Chart version - bump for breaking changes -version: 0.3.1 +version: 0.4.0 # Application version - matches the cache server version appVersion: "0.1.0" diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index bef45c2..2ca153b 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -23,7 +23,8 @@ data: addr: "{{ .Release.Name }}-redis:6379" cache: - max_entry_size_mb: 100 + max_entry_size_mb: {{ .Values.cache.maxEntrySizeMB }} + verify_cas_hash: {{ .Values.cache.verifyCASHash }} auth: enabled: {{ .Values.auth.enabled }} diff --git a/chart/values.yaml b/chart/values.yaml index b87db7a..0269a8a 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -43,6 +43,13 @@ resources: memory: "2Gi" cpu: "1000m" +# Cache settings (shared by Gradle and Bazel) +cache: + # Maximum entry size in MB + maxEntrySizeMB: 100 + # Verify SHA-256 hash of Bazel CAS blobs on PUT + verifyCASHash: true + tls: # Enable TLS for the cache server enabled: false diff --git a/src/configs/config.yaml b/src/configs/config.yaml index aa5217e..223e24e 100644 --- a/src/configs/config.yaml +++ b/src/configs/config.yaml @@ -10,6 +10,7 @@ storage: cache: max_entry_size_mb: 100 + verify_cas_hash: true auth: enabled: true diff --git a/src/internal/config/config.go b/src/internal/config/config.go index d797ce3..5079439 100644 --- a/src/internal/config/config.go +++ b/src/internal/config/config.go @@ -39,6 +39,7 @@ type StorageConfig struct { type CacheConfig struct { MaxEntrySizeMB int64 `mapstructure:"max_entry_size_mb"` + VerifyCASHash bool `mapstructure:"verify_cas_hash"` } type AuthConfig struct { @@ -82,6 +83,7 @@ func Load(configPath string) (*Config, error) { v.SetDefault("storage.db", 0) v.SetDefault("cache.max_entry_size_mb", 100) + v.SetDefault("cache.verify_cas_hash", true) v.SetDefault("auth.enabled", true) @@ -146,3 +148,4 @@ func (c *Config) Validate() error { func (c *Config) MaxEntrySizeBytes() int64 { return c.Cache.MaxEntrySizeMB * 1024 * 1024 } + diff --git a/src/internal/handler/bazel_get.go b/src/internal/handler/bazel_get.go new file mode 100644 index 0000000..8f58b2c --- /dev/null +++ b/src/internal/handler/bazel_get.go @@ -0,0 +1,47 @@ +package handler + +import ( + "errors" + "net/http" + + "github.com/gin-gonic/gin" + "github.com/kevingruber/gradle-cache/internal/storage" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" +) + +// GetAC handles GET requests for Bazel action cache entries. +func (h *BazelHandler) GetAC(c *gin.Context) { + h.get(c, h.acStorage, "ac") +} + +// GetCAS handles GET requests for Bazel content-addressable storage entries. +func (h *BazelHandler) GetCAS(c *gin.Context) { + h.get(c, h.casStorage, "cas") +} + +func (h *BazelHandler) get(c *gin.Context, store storage.Storage, cacheType string) { + hash := c.Param("hash") + if !isValidSHA256Hex(hash) { + c.Status(http.StatusBadRequest) + return + } + + attrs := metric.WithAttributes(attribute.String("cache_type", cacheType)) + + reader, size, err := store.Get(c.Request.Context(), hash) + if err != nil { + if errors.Is(err, storage.ErrNotFound) { + h.metrics.CacheMisses.Add(c.Request.Context(), 1, attrs) + c.Status(http.StatusNotFound) + return + } + h.logger.Error().Err(err).Str("hash", hash).Str("cache_type", cacheType).Msg("failed to get bazel cache entry") + c.Status(http.StatusInternalServerError) + return + } + defer reader.Close() + + h.metrics.CacheHits.Add(c.Request.Context(), 1, attrs) + c.DataFromReader(http.StatusOK, size, "application/octet-stream", reader, nil) +} diff --git a/src/internal/handler/bazel_handler.go b/src/internal/handler/bazel_handler.go new file mode 100644 index 0000000..53b5cb8 --- /dev/null +++ b/src/internal/handler/bazel_handler.go @@ -0,0 +1,35 @@ +package handler + +import ( + "github.com/kevingruber/gradle-cache/internal/storage" + "github.com/rs/zerolog" +) + +// BazelHandler handles Bazel HTTP remote cache requests. +// Bazel uses two namespaces: /ac/ (action cache) and /cas/ (content-addressable storage). +type BazelHandler struct { + acStorage storage.Storage + casStorage storage.Storage + maxEntrySize int64 + verifyCAS bool + logger zerolog.Logger + metrics *BazelMetrics +} + +// NewBazelHandler creates a new Bazel cache handler. +// The store must implement NamespacedStorage to isolate AC and CAS keys. +func NewBazelHandler(store storage.NamespacedStorage, maxEntrySize int64, verifyCAS bool, logger zerolog.Logger) (*BazelHandler, error) { + metrics, err := NewBazelMetrics() + if err != nil { + return nil, err + } + + return &BazelHandler{ + acStorage: store.WithNamespace("bazel:ac"), + casStorage: store.WithNamespace("bazel:cas"), + maxEntrySize: maxEntrySize, + verifyCAS: verifyCAS, + logger: logger, + metrics: metrics, + }, nil +} diff --git a/src/internal/handler/bazel_metrics.go b/src/internal/handler/bazel_metrics.go new file mode 100644 index 0000000..fd9b23f --- /dev/null +++ b/src/internal/handler/bazel_metrics.go @@ -0,0 +1,52 @@ +package handler + +import ( + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/metric" +) + +type BazelMetrics struct { + CacheHits metric.Int64Counter + CacheMisses metric.Int64Counter + HashMismatches metric.Int64Counter + EntrySize metric.Float64Histogram +} + +func NewBazelMetrics() (*BazelMetrics, error) { + meter := otel.Meter("bazel-cache") + + cacheHits, err := meter.Int64Counter( + "bazel_cache.cache_hits", + metric.WithDescription("Total number of Bazel cache hits")) + if err != nil { + return nil, err + } + + cacheMisses, err := meter.Int64Counter( + "bazel_cache.cache_misses", + metric.WithDescription("Total number of Bazel cache misses")) + if err != nil { + return nil, err + } + + hashMismatches, err := meter.Int64Counter( + "bazel_cache.hash_mismatches", + metric.WithDescription("Total number of CAS hash verification failures")) + if err != nil { + return nil, err + } + + entrySize, err := meter.Float64Histogram( + "bazel_cache.entry_size", + metric.WithDescription("Size of Bazel cache entries in bytes")) + if err != nil { + return nil, err + } + + return &BazelMetrics{ + CacheHits: cacheHits, + CacheMisses: cacheMisses, + HashMismatches: hashMismatches, + EntrySize: entrySize, + }, nil +} diff --git a/src/internal/handler/bazel_put.go b/src/internal/handler/bazel_put.go new file mode 100644 index 0000000..34c34f5 --- /dev/null +++ b/src/internal/handler/bazel_put.go @@ -0,0 +1,177 @@ +package handler + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "net/http" + "os" + + "github.com/gin-gonic/gin" + "github.com/kevingruber/gradle-cache/internal/storage" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" +) + +// PutAC handles PUT requests to store Bazel action cache entries. +func (h *BazelHandler) PutAC(c *gin.Context) { + h.put(c, h.acStorage, "ac", false) +} + +// PutCAS handles PUT requests to store Bazel CAS entries. +// If verifyCAS is enabled, the content hash is verified against the URL hash. +func (h *BazelHandler) PutCAS(c *gin.Context) { + h.put(c, h.casStorage, "cas", h.verifyCAS) +} + +func (h *BazelHandler) put(c *gin.Context, store storage.Storage, cacheType string, verifyHash bool) { + hash := c.Param("hash") + if !isValidSHA256Hex(hash) { + c.Status(http.StatusBadRequest) + return + } + + attrs := metric.WithAttributes(attribute.String("cache_type", cacheType)) + + // Early rejection if Content-Length is known and too large + contentLength := c.Request.ContentLength + if contentLength > h.maxEntrySize { + h.logger.Warn(). + Str("hash", hash). + Str("cache_type", cacheType). + Int64("size", contentLength). + Int64("max_size", h.maxEntrySize). + Msg("bazel cache entry too large") + c.Status(http.StatusRequestEntityTooLarge) + return + } + + if verifyHash { + h.putWithVerify(c, store, hash, cacheType, attrs) + } else { + h.putDirect(c, store, hash, cacheType, contentLength, attrs) + } +} + +// putDirect streams the request body to storage without hash verification. +// If Content-Length is known, streams directly. Otherwise spools to a temp file. +func (h *BazelHandler) putDirect(c *gin.Context, store storage.Storage, hash, cacheType string, contentLength int64, attrs metric.MeasurementOption) { + if contentLength >= 0 { + // Content-Length known: stream directly to storage + limited := io.LimitReader(c.Request.Body, contentLength) + h.metrics.EntrySize.Record(c.Request.Context(), float64(contentLength), attrs) + + if err := store.Put(c.Request.Context(), hash, limited, contentLength); err != nil { + h.logger.Error().Err(err).Str("hash", hash).Str("cache_type", cacheType).Msg("failed to store bazel cache entry") + c.Status(http.StatusInternalServerError) + return + } + c.Status(http.StatusOK) + return + } + + // Chunked transfer: spool to temp file to determine size + size, reader, cleanup, err := h.spoolToTempFile(c.Request.Body) + if cleanup != nil { + defer cleanup() + } + if err != nil { + h.logger.Error().Err(err).Str("hash", hash).Str("cache_type", cacheType).Msg("failed to read request body") + c.Status(http.StatusInternalServerError) + return + } + if size > h.maxEntrySize { + c.Status(http.StatusRequestEntityTooLarge) + return + } + + h.metrics.EntrySize.Record(c.Request.Context(), float64(size), attrs) + + if err := store.Put(c.Request.Context(), hash, reader, size); err != nil { + h.logger.Error().Err(err).Str("hash", hash).Str("cache_type", cacheType).Msg("failed to store bazel cache entry") + c.Status(http.StatusInternalServerError) + return + } + c.Status(http.StatusOK) +} + +// putWithVerify spools the upload to a temp file while computing the SHA-256 hash, +// then verifies the hash before storing. +func (h *BazelHandler) putWithVerify(c *gin.Context, store storage.Storage, hash, cacheType string, attrs metric.MeasurementOption) { + f, err := os.CreateTemp("", "bazel-cas-*") + if err != nil { + h.logger.Error().Err(err).Msg("failed to create temp file for CAS verification") + c.Status(http.StatusInternalServerError) + return + } + defer os.Remove(f.Name()) + defer f.Close() + + hasher := sha256.New() + limited := io.LimitReader(c.Request.Body, h.maxEntrySize+1) + tee := io.TeeReader(limited, hasher) + + written, err := io.Copy(f, tee) + if err != nil { + h.logger.Error().Err(err).Str("hash", hash).Str("cache_type", cacheType).Msg("failed to read request body") + c.Status(http.StatusInternalServerError) + return + } + + if written > h.maxEntrySize { + c.Status(http.StatusRequestEntityTooLarge) + return + } + + computedHex := hex.EncodeToString(hasher.Sum(nil)) + if computedHex != hash { + h.metrics.HashMismatches.Add(c.Request.Context(), 1, attrs) + h.logger.Warn(). + Str("expected", hash). + Str("computed", computedHex). + Msg("bazel CAS hash mismatch") + c.Status(http.StatusBadRequest) + return + } + + if _, err := f.Seek(0, io.SeekStart); err != nil { + h.logger.Error().Err(err).Msg("failed to seek temp file") + c.Status(http.StatusInternalServerError) + return + } + + h.metrics.EntrySize.Record(c.Request.Context(), float64(written), attrs) + + if err := store.Put(c.Request.Context(), hash, f, written); err != nil { + h.logger.Error().Err(err).Str("hash", hash).Str("cache_type", cacheType).Msg("failed to store bazel cache entry") + c.Status(http.StatusInternalServerError) + return + } + c.Status(http.StatusOK) +} + +// spoolToTempFile copies from r (limited to maxEntrySize+1) into a temp file +// and returns the written size, a reader seeked to start, and a cleanup function. +func (h *BazelHandler) spoolToTempFile(r io.Reader) (int64, io.Reader, func(), error) { + f, err := os.CreateTemp("", "bazel-spool-*") + if err != nil { + return 0, nil, nil, fmt.Errorf("create temp file: %w", err) + } + cleanup := func() { + f.Close() + os.Remove(f.Name()) + } + + limited := io.LimitReader(r, h.maxEntrySize+1) + written, err := io.Copy(f, limited) + if err != nil { + return 0, nil, cleanup, fmt.Errorf("spool to temp file: %w", err) + } + + if _, err := f.Seek(0, io.SeekStart); err != nil { + return 0, nil, cleanup, fmt.Errorf("seek temp file: %w", err) + } + + return written, f, cleanup, nil +} diff --git a/src/internal/handler/validation.go b/src/internal/handler/validation.go new file mode 100644 index 0000000..f1fc98a --- /dev/null +++ b/src/internal/handler/validation.go @@ -0,0 +1,14 @@ +package handler + +// isValidSHA256Hex returns true if s is a valid lowercase hex-encoded SHA-256 hash (64 characters). +func isValidSHA256Hex(s string) bool { + if len(s) != 64 { + return false + } + for _, c := range s { + if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')) { + return false + } + } + return true +} diff --git a/src/internal/server/server.go b/src/internal/server/server.go index e92a2f8..7edd1b3 100644 --- a/src/internal/server/server.go +++ b/src/internal/server/server.go @@ -86,12 +86,33 @@ func (s *Server) setupRoutes() { s.logger.Fatal().Err(err).Msg("Failed to initialize cache") } - // Create cache group with optional auth - cacheGroup := s.router.Group("/cache") + // Gradle cache endpoints + gradleGroup := s.router.Group("/gradle") + gradleGroup.GET("/:key", s.cacheAuth(false), cacheHandler.Get) + gradleGroup.HEAD("/:key", s.cacheAuth(false), cacheHandler.Head) + gradleGroup.PUT("/:key", s.cacheAuth(true), cacheHandler.Put) + + // Bazel HTTP remote cache endpoints + nsStorage, ok := s.storage.(storage.NamespacedStorage) + if !ok { + s.logger.Fatal().Msg("Storage backend does not support namespaces, required for Bazel cache") + } + + bazelHandler, err := handler.NewBazelHandler( + nsStorage, + s.cfg.MaxEntrySizeBytes(), + s.cfg.Cache.VerifyCASHash, + s.logger, + ) + if err != nil { + s.logger.Fatal().Err(err).Msg("Failed to initialize Bazel cache handler") + } - cacheGroup.GET("/:key", s.cacheAuth(false), cacheHandler.Get) - cacheGroup.HEAD("/:key", s.cacheAuth(false), cacheHandler.Head) - cacheGroup.PUT("/:key", s.cacheAuth(true), cacheHandler.Put) + bazelGroup := s.router.Group("/bazel") + bazelGroup.GET("/ac/:hash", s.cacheAuth(false), bazelHandler.GetAC) + bazelGroup.PUT("/ac/:hash", s.cacheAuth(true), bazelHandler.PutAC) + bazelGroup.GET("/cas/:hash", s.cacheAuth(false), bazelHandler.GetCAS) + bazelGroup.PUT("/cas/:hash", s.cacheAuth(true), bazelHandler.PutCAS) } func (s *Server) cacheAuth(requireWriter bool) gin.HandlerFunc {