diff --git a/cachew.hcl b/cachew.hcl index cc217cf..6ab6c7f 100644 --- a/cachew.hcl +++ b/cachew.hcl @@ -44,3 +44,5 @@ gomod { } hermit { } + +proxy { } diff --git a/cmd/cachewd/main.go b/cmd/cachewd/main.go index 18151e3..3e8a2b9 100644 --- a/cmd/cachewd/main.go +++ b/cmd/cachewd/main.go @@ -119,6 +119,7 @@ func newRegistries(scheduler jobscheduler.Provider, cloneManagerProvider gitclon strategy.RegisterGitHubReleases(sr, tokenManagerProvider) strategy.RegisterHermit(sr) strategy.RegisterHost(sr) + strategy.RegisterHTTPProxy(sr) git.Register(sr, scheduler, cloneManagerProvider, tokenManagerProvider) gomod.Register(sr, cloneManagerProvider) @@ -138,7 +139,7 @@ func printSchema(kctx *kong.Context, cr *cache.Registry, sr *strategy.Registry) } } -func newMux(ctx context.Context, cr *cache.Registry, sr *strategy.Registry, providersConfigHCL *hcl.AST, vars map[string]string) (*http.ServeMux, error) { +func newMux(ctx context.Context, cr *cache.Registry, sr *strategy.Registry, providersConfigHCL *hcl.AST, vars map[string]string) (http.Handler, error) { mux := http.NewServeMux() mux.HandleFunc("GET /_liveness", func(w http.ResponseWriter, _ *http.Request) { @@ -171,11 +172,12 @@ func newMux(ctx context.Context, cr *cache.Registry, sr *strategy.Registry, prov http.DefaultServeMux.ServeHTTP(w, r) })) - if err := config.Load(ctx, cr, sr, providersConfigHCL, mux, vars); err != nil { + handler, err := config.Load(ctx, cr, sr, providersConfigHCL, mux, vars) + if err != nil { return nil, errors.Errorf("load config: %w", err) } - return mux, nil + return handler, nil } // extractPathPrefix extracts the strategy name, path prefix from a request path. @@ -189,13 +191,13 @@ func extractPathPrefix(path string) string { return prefix } -func newServer(ctx context.Context, mux *http.ServeMux, bind string, metricsConfig metrics.Config) *http.Server { +func newServer(ctx context.Context, muxHandler http.Handler, bind string, metricsConfig metrics.Config) *http.Server { logger := logging.FromContext(ctx) var handler http.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { labeler, _ := otelhttp.LabelerFromContext(r.Context()) labeler.Add(attribute.String("cachew.http.path.prefix", extractPathPrefix(r.URL.Path))) - mux.ServeHTTP(w, r) + muxHandler.ServeHTTP(w, r) }) // Add standard otelhttp middleware diff --git a/internal/config/config.go b/internal/config/config.go index 0c512ea..67792f2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -88,6 +88,9 @@ func Split[GlobalConfig any](ast *hcl.AST) (global, providers *hcl.AST) { } // Load HCL configuration and use that to construct the cache backend, and proxy strategies. +// It returns an http.Handler that wraps mux — any loaded strategies that implement +// strategy.Interceptor are applied as middleware before ServeMux route matching, so +// that they can inspect r.RequestURI rather than the path-only r.URL.Path. func Load( ctx context.Context, cr *cache.Registry, @@ -95,7 +98,7 @@ func Load( ast *hcl.AST, mux *http.ServeMux, vars map[string]string, -) error { +) (http.Handler, error) { logger := logging.FromContext(ctx) expandVars(ast, vars) @@ -114,16 +117,16 @@ func Load( strategyCandidates = append(strategyCandidates, node) continue } else if err != nil { - return errors.Errorf("%s: %w", node.Pos, err) + return nil, errors.Errorf("%s: %w", node.Pos, err) } caches = append(caches, c) case *hcl.Attribute: - return errors.Errorf("%s: attributes are not allowed", node.Pos) + return nil, errors.Errorf("%s: attributes are not allowed", node.Pos) } } if len(caches) == 0 { - return errors.Errorf("%s: expected at least one cache backend", ast.Pos) + return nil, errors.Errorf("%s: expected at least one cache backend", ast.Pos) } cache := cache.MaybeNewTiered(ctx, caches) @@ -131,16 +134,29 @@ func Load( logger.DebugContext(ctx, "Cache backend", "cache", cache) // Second pass, instantiate strategies and bind them to the mux. + // Collect strategies that implement Interceptor separately — they need + // to run before ServeMux route matching, not as mux routes. + var interceptors []strategy.Interceptor for _, block := range strategyCandidates { - strategy := block.Name - logger := logger.With("strategy", strategy) - mlog := &loggingMux{logger: logger, mux: mux} - _, err := sr.Create(ctx, strategy, block, cache, mlog, vars) + name := block.Name + slogger := logger.With("strategy", name) + mlog := &loggingMux{logger: slogger, mux: mux} + s, err := sr.Create(ctx, name, block, cache, mlog, vars) if err != nil { - return errors.Errorf("%s: %w", block.Pos, err) + return nil, errors.Errorf("%s: %w", block.Pos, err) + } + if interceptor, ok := s.(strategy.Interceptor); ok { + interceptors = append(interceptors, interceptor) } } - return nil + + // Wrap the mux with interceptors. The last-registered interceptor runs + // outermost so that registration order matches interception order. + var h http.Handler = mux + for i := len(interceptors) - 1; i >= 0; i-- { + h = interceptors[i].Intercept(h) + } + return h, nil } // ExpandVars expands environment variable references in HCL strings and heredocs. diff --git a/internal/strategy/api.go b/internal/strategy/api.go index c2730c1..4bd59f8 100644 --- a/internal/strategy/api.go +++ b/internal/strategy/api.go @@ -97,3 +97,15 @@ func (r *Registry) Create( type Strategy interface { String() string } + +// Interceptor is an optional interface a Strategy may implement to intercept +// incoming HTTP requests before ServeMux route matching. This is necessary for +// strategies like the HTTP proxy that need to inspect r.RequestURI rather than +// r.URL.Path — registering a "/" catch-all on the mux is insufficient because +// more-specific routes (e.g. /api/v1/) still win for overlapping paths. +type Interceptor interface { + Strategy + // Intercept wraps next, returning a handler that intercepts matching + // requests and delegates all others to next. + Intercept(next http.Handler) http.Handler +} diff --git a/internal/strategy/proxy.go b/internal/strategy/proxy.go new file mode 100644 index 0000000..953fa41 --- /dev/null +++ b/internal/strategy/proxy.go @@ -0,0 +1,117 @@ +package strategy + +import ( + "context" + "log/slog" + "net/http" + "net/url" + "strings" + + "github.com/block/cachew/internal/cache" + "github.com/block/cachew/internal/logging" + "github.com/block/cachew/internal/strategy/handler" +) + +// RegisterHTTPProxy registers a caching HTTP proxy strategy. It intercepts +// absolute-form proxy requests (e.g. from sdkmanager with --proxy_host / +// --proxy_port) where the client sends: +// +// GET http://dl.google.com/some/path HTTP/1.1 +// +// The request URI is upgraded to HTTPS and the response is fetched and cached. +// Only GET requests are intercepted; other methods are passed through. +func RegisterHTTPProxy(r *Registry) { + Register(r, "proxy", "Caching HTTP proxy for absolute-form proxy requests.", func(ctx context.Context, _ ProxyConfig, c cache.Cache, mux Mux) (*HTTPProxy, error) { + return NewHTTPProxy(ctx, c, mux) + }) +} + +// ProxyConfig holds configuration for the HTTP proxy strategy. +// Currently no options are required. +type ProxyConfig struct{} + +// HTTPProxy is a caching HTTP proxy strategy that handles standard HTTP proxy +// requests in absolute form (GET http://host/path HTTP/1.1). +// +// It implements the Interceptor interface rather than registering on the mux +// directly, so that absolute-form request detection happens before ServeMux +// route matching. This prevents overlap with more-specific routes such as +// /api/v1/ or /admin/ when the proxied upstream path happens to match them. +type HTTPProxy struct { + logger *slog.Logger + handler http.Handler +} + +var ( + _ Strategy = (*HTTPProxy)(nil) + _ Interceptor = (*HTTPProxy)(nil) +) + +func NewHTTPProxy(ctx context.Context, c cache.Cache, _ Mux) (*HTTPProxy, error) { + logger := logging.FromContext(ctx) + client := &http.Client{} + p := &HTTPProxy{logger: logger} + + p.handler = handler.New(client, c). + CacheKey(func(r *http.Request) string { + target := p.parseProxyURI(r) + if target == nil { + return "" + } + return target.String() + }). + Transform(func(r *http.Request) (*http.Request, error) { + target := p.parseProxyURI(r) + if target == nil { + return r, nil + } + return http.NewRequestWithContext(r.Context(), http.MethodGet, target.String(), nil) + }). + OnError(func(err error, w http.ResponseWriter, r *http.Request) { + target := p.parseProxyURI(r) + if target == nil { + http.NotFound(w, r) + return + } + p.logger.ErrorContext(r.Context(), "Proxy request failed", + slog.String("url", target.String()), + slog.String("error", err.Error())) + http.Error(w, "proxy error: "+err.Error(), http.StatusBadGateway) + }) + + logger.InfoContext(ctx, "HTTP proxy strategy initialized") + return p, nil +} + +func (p *HTTPProxy) String() string { return "proxy" } + +// Intercept returns an http.Handler that intercepts absolute-form GET proxy +// requests before they reach the ServeMux, delegating all other requests to +// next. This ensures that a proxied path like /api/v1/... is not accidentally +// routed to cachew's own API handler. +func (p *HTTPProxy) Intercept(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Only intercept absolute-form GET requests. Non-GET requests + // (HEAD, POST, …) are not cached and are passed through. + if r.Method == http.MethodGet && strings.HasPrefix(r.RequestURI, "http://") { + p.handler.ServeHTTP(w, r) + return + } + next.ServeHTTP(w, r) + }) +} + +// parseProxyURI returns the HTTPS upstream URL for an absolute-form proxy +// request, or nil if the request is not a proxy request. +func (p *HTTPProxy) parseProxyURI(r *http.Request) *url.URL { + if !strings.HasPrefix(r.RequestURI, "http://") { + return nil + } + target, err := url.Parse(r.RequestURI) + if err != nil || target.Host == "" { + return nil + } + // Upgrade to HTTPS for the upstream fetch. + target.Scheme = "https" + return target +} diff --git a/internal/strategy/proxy_test.go b/internal/strategy/proxy_test.go new file mode 100644 index 0000000..dd0fb24 --- /dev/null +++ b/internal/strategy/proxy_test.go @@ -0,0 +1,290 @@ +package strategy_test + +import ( + "context" + "log/slog" + "net/http" + "net/http/httptest" + "sync" + "testing" + "time" + + "github.com/alecthomas/assert/v2" + + "github.com/block/cachew/internal/cache" + "github.com/block/cachew/internal/logging" + "github.com/block/cachew/internal/strategy" +) + +// httpTransportMutexProxy prevents concurrent modification of http.DefaultTransport +// across proxy tests, mirroring the same pattern used in hermit_test.go. +var httpTransportMutexProxy sync.Mutex //nolint:gochecknoglobals + +// setupProxyTest creates an HTTPProxy and returns the handler that results from +// wrapping a fresh ServeMux via proxy.Intercept. This mirrors how config.Load +// wires up interceptor strategies in production. +func setupProxyTest(t *testing.T) (http.Handler, context.Context, cache.Cache) { + t.Helper() + + _, ctx := logging.Configure(context.Background(), logging.Config{Level: slog.LevelError}) + memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{MaxTTL: time.Hour}) + assert.NoError(t, err) + t.Cleanup(func() { memCache.Close() }) + + mux := http.NewServeMux() + p, err := strategy.NewHTTPProxy(ctx, memCache, mux) + assert.NoError(t, err) + + // Wrap the mux with the proxy interceptor, just as config.Load does. + return p.Intercept(mux), ctx, memCache +} + +// TestHTTPProxyCaching verifies that a second identical proxy request is served +// from cache without hitting the upstream server. +func TestHTTPProxyCaching(t *testing.T) { + httpTransportMutexProxy.Lock() + defer httpTransportMutexProxy.Unlock() + + callCount := 0 + backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + callCount++ + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("android-repository-content")) + })) + defer backend.Close() + + originalTransport := http.DefaultTransport + defer func() { http.DefaultTransport = originalTransport }() //nolint:reassign + http.DefaultTransport = &mockTransport{backend: backend, originalTransport: originalTransport} //nolint:reassign + + handler, ctx, _ := setupProxyTest(t) + + // Simulate an absolute-form proxy request: GET http://dl.google.com/... HTTP/1.1 + req1 := httptest.NewRequestWithContext(ctx, http.MethodGet, "http://dl.google.com/android/repository/addons_list-5.xml", nil) + w1 := httptest.NewRecorder() + handler.ServeHTTP(w1, req1) + + assert.Equal(t, http.StatusOK, w1.Code) + assert.Equal(t, "android-repository-content", w1.Body.String()) + assert.Equal(t, 1, callCount) + + // Second request — must be a cache hit. + req2 := httptest.NewRequestWithContext(ctx, http.MethodGet, "http://dl.google.com/android/repository/addons_list-5.xml", nil) + w2 := httptest.NewRecorder() + handler.ServeHTTP(w2, req2) + + assert.Equal(t, http.StatusOK, w2.Code) + assert.Equal(t, "android-repository-content", w2.Body.String()) + assert.Equal(t, 1, callCount, "second request should be served from cache") +} + +// TestHTTPProxyNonAbsoluteRequest verifies that requests without an absolute +// http:// URI (i.e. normal relative-path requests to cachew itself) are passed +// through to the next handler (returning 404 from the empty mux in tests). +func TestHTTPProxyNonAbsoluteRequest(t *testing.T) { + handler, ctx, _ := setupProxyTest(t) + + req := httptest.NewRequestWithContext(ctx, http.MethodGet, "/some/local/path", nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + + assert.Equal(t, http.StatusNotFound, w.Code) +} + +// TestHTTPProxyNonGETNotIntercepted verifies that absolute-form requests with +// methods other than GET are passed through to the next handler, not cached. +func TestHTTPProxyNonGETNotIntercepted(t *testing.T) { + handler, ctx, _ := setupProxyTest(t) + + // POST with an absolute-form URI should NOT be intercepted by the proxy. + req := httptest.NewRequestWithContext(ctx, http.MethodPost, "http://dl.google.com/some/path", nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + + // Falls through to the empty mux → 404 (not 502 bad gateway from the proxy). + assert.Equal(t, http.StatusNotFound, w.Code) +} + +// TestHTTPProxyDoesNotShadowSpecificRoutes verifies that more-specific mux +// routes still win for relative-path requests even when the proxy intercepts. +func TestHTTPProxyDoesNotShadowSpecificRoutes(t *testing.T) { + _, ctx := logging.Configure(context.Background(), logging.Config{Level: slog.LevelError}) + memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{MaxTTL: time.Hour}) + assert.NoError(t, err) + defer memCache.Close() + + mux := http.NewServeMux() + mux.HandleFunc("GET /api/v1/health", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("healthy")) + }) + + p, err := strategy.NewHTTPProxy(ctx, memCache, mux) + assert.NoError(t, err) + handler := p.Intercept(mux) + + // A normal relative-path request to the API route should still be served. + req := httptest.NewRequestWithContext(ctx, http.MethodGet, "/api/v1/health", nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + + assert.Equal(t, http.StatusOK, w.Code) + assert.Equal(t, "healthy", w.Body.String()) +} + +// TestHTTPProxyNonOKStatus verifies that non-200 responses from the upstream +// are proxied back to the client but are NOT stored in the cache. +func TestHTTPProxyNonOKStatus(t *testing.T) { + httpTransportMutexProxy.Lock() + defer httpTransportMutexProxy.Unlock() + + backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusNotFound) + _, _ = w.Write([]byte("resource not found")) + })) + defer backend.Close() + + originalTransport := http.DefaultTransport + defer func() { http.DefaultTransport = originalTransport }() //nolint:reassign + http.DefaultTransport = &mockTransport{backend: backend, originalTransport: originalTransport} //nolint:reassign + + handler, ctx, memCache := setupProxyTest(t) + + req := httptest.NewRequestWithContext(ctx, http.MethodGet, "http://dl.google.com/android/repository/missing.xml", nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + + assert.Equal(t, http.StatusNotFound, w.Code) + assert.Equal(t, "resource not found", w.Body.String()) + + // Nothing should have been written to the cache. + key := cache.NewKey("https://dl.google.com/android/repository/missing.xml") + _, _, err := memCache.Open(context.Background(), key) + assert.Error(t, err, "non-OK responses should not be cached") +} + +// TestHTTPProxyHTTPSUpgrade verifies that incoming http:// proxy requests are +// fetched from upstream over HTTPS (not HTTP). +func TestHTTPProxyHTTPSUpgrade(t *testing.T) { + httpTransportMutexProxy.Lock() + defer httpTransportMutexProxy.Unlock() + + backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ok")) + })) + defer backend.Close() + + originalTransport := http.DefaultTransport + defer func() { http.DefaultTransport = originalTransport }() //nolint:reassign + http.DefaultTransport = &mockTransport{backend: backend, originalTransport: originalTransport} //nolint:reassign + + handler, ctx, memCache := setupProxyTest(t) + + req := httptest.NewRequestWithContext(ctx, http.MethodGet, "http://dl.google.com/android/repository/sdkmanager.jar", nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + + assert.Equal(t, http.StatusOK, w.Code) + + // The cache key should use the HTTPS-upgraded URL, not the original http:// one. + httpsKey := cache.NewKey("https://dl.google.com/android/repository/sdkmanager.jar") + cr, _, err := memCache.Open(context.Background(), httpsKey) + assert.NoError(t, err, "response should be cached under the HTTPS URL key") + if cr != nil { + cr.Close() + } + + // Verify the original http:// key is NOT used for caching. + httpKey := cache.NewKey("http://dl.google.com/android/repository/sdkmanager.jar") + _, _, err = memCache.Open(context.Background(), httpKey) + assert.Error(t, err, "cache key should use HTTPS, not HTTP") +} + +// TestHTTPProxyDifferentURLs verifies that requests to distinct URLs are cached +// independently — each unique URL results in exactly one upstream call. +func TestHTTPProxyDifferentURLs(t *testing.T) { + httpTransportMutexProxy.Lock() + defer httpTransportMutexProxy.Unlock() + + callCount := 0 + backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + callCount++ + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("content for " + r.URL.Path)) + })) + defer backend.Close() + + originalTransport := http.DefaultTransport + defer func() { http.DefaultTransport = originalTransport }() //nolint:reassign + http.DefaultTransport = &mockTransport{backend: backend, originalTransport: originalTransport} //nolint:reassign + + handler, ctx, _ := setupProxyTest(t) + + urls := []string{ + "http://dl.google.com/android/repository/addons_list-5.xml", + "http://dl.google.com/android/repository/repository2-3.xml", + "http://dl.google.com/android/repository/sys-img/android/sys-img2-1.xml", + } + + for _, u := range urls { + req := httptest.NewRequestWithContext(ctx, http.MethodGet, u, nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code) + } + + assert.Equal(t, len(urls), callCount, "each distinct URL should hit upstream exactly once") + + // Repeat all requests — all should be cache hits now. + for _, u := range urls { + req := httptest.NewRequestWithContext(ctx, http.MethodGet, u, nil) + w := httptest.NewRecorder() + handler.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code) + } + + assert.Equal(t, len(urls), callCount, "repeated requests should be served from cache") +} + +// TestHTTPProxyString verifies the strategy identifier is "proxy". +func TestHTTPProxyString(t *testing.T) { + _, ctx := logging.Configure(context.Background(), logging.Config{Level: slog.LevelError}) + memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{MaxTTL: time.Hour}) + assert.NoError(t, err) + defer memCache.Close() + + mux := http.NewServeMux() + p, err := strategy.NewHTTPProxy(ctx, memCache, mux) + assert.NoError(t, err) + assert.Equal(t, "proxy", p.String()) +} + +// TestHTTPProxyQueryStringInCacheKey verifies that query parameters are included +// in the cache key so requests for different query strings are cached separately. +func TestHTTPProxyQueryStringInCacheKey(t *testing.T) { + httpTransportMutexProxy.Lock() + defer httpTransportMutexProxy.Unlock() + + callCount := 0 + backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + callCount++ + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("ok")) + })) + defer backend.Close() + + originalTransport := http.DefaultTransport + defer func() { http.DefaultTransport = originalTransport }() //nolint:reassign + http.DefaultTransport = &mockTransport{backend: backend, originalTransport: originalTransport} //nolint:reassign + + handler, ctx, _ := setupProxyTest(t) + + req1 := httptest.NewRequestWithContext(ctx, http.MethodGet, "http://dl.google.com/path?channel=stable", nil) + handler.ServeHTTP(httptest.NewRecorder(), req1) + + req2 := httptest.NewRequestWithContext(ctx, http.MethodGet, "http://dl.google.com/path?channel=beta", nil) + handler.ServeHTTP(httptest.NewRecorder(), req2) + + assert.Equal(t, 2, callCount, "different query strings should result in separate upstream fetches") +}