From c62b34c167e7b32d0485ed8cdecfa4a9f20fa406 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Fri, 30 Jan 2026 20:43:32 -0500 Subject: [PATCH 1/6] feat(cdp-proxy): add /json endpoint for Playwright connectOverCDP support The CDP proxy on port 9222 previously only implemented /json/version, which returned the WebSocket URL but didn't support target discovery. Playwright's connectOverCDP() fetches /json to discover browser targets before establishing a WebSocket connection. Without this endpoint, using `http://127.0.0.1:9222` with agent-browser or Playwright would fail, even though direct WebSocket connections (ws://127.0.0.1:9222) worked fine. This change: - Adds /json and /json/list endpoints that proxy to Chrome's /json - Rewrites webSocketDebuggerUrl and devtoolsFrontendUrl in the response to use the proxy's host instead of Chrome's internal host - Enables `agent-browser --cdp http://127.0.0.1:9222` to work correctly --- server/cmd/api/main.go | 74 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 3 deletions(-) diff --git a/server/cmd/api/main.go b/server/cmd/api/main.go index b5db5cf6..bb60350c 100644 --- a/server/cmd/api/main.go +++ b/server/cmd/api/main.go @@ -157,9 +157,9 @@ func main() { }, scaletozero.Middleware(stz), ) - // Expose a minimal /json/version endpoint so clients that attempt to - // resolve a browser websocket URL via HTTP can succeed. We map the - // upstream path onto this proxy's host:port so clients connect back to us. + // Expose /json/version endpoint so clients that attempt to resolve a browser + // websocket URL via HTTP can succeed. We map the upstream path onto this + // proxy's host:port so clients connect back to us. rDevtools.Get("/json/version", func(w http.ResponseWriter, r *http.Request) { current := upstreamMgr.Current() if current == "" { @@ -172,6 +172,61 @@ func main() { "webSocketDebuggerUrl": proxyWSURL, }) }) + + // Handler for /json and /json/list - proxies to Chrome and rewrites URLs. + // This is needed for Playwright's connectOverCDP which fetches /json for target discovery. + jsonTargetHandler := func(w http.ResponseWriter, r *http.Request) { + current := upstreamMgr.Current() + if current == "" { + http.Error(w, "upstream not ready", http.StatusServiceUnavailable) + return + } + + // Parse upstream URL to get Chrome's host (e.g., ws://127.0.0.1:9223/...) + parsed, err := url.Parse(current) + if err != nil { + http.Error(w, "invalid upstream URL", http.StatusInternalServerError) + return + } + + // Fetch /json from Chrome + chromeJSONURL := fmt.Sprintf("http://%s/json", parsed.Host) + resp, err := http.Get(chromeJSONURL) + if err != nil { + slogger.Error("failed to fetch /json from Chrome", "err", err, "url", chromeJSONURL) + http.Error(w, "failed to fetch target list from browser", http.StatusBadGateway) + return + } + defer resp.Body.Close() + + // Read and parse the JSON response + var targets []map[string]interface{} + if err := json.NewDecoder(resp.Body).Decode(&targets); err != nil { + slogger.Error("failed to decode /json response", "err", err) + http.Error(w, "failed to parse target list", http.StatusBadGateway) + return + } + + // Rewrite URLs to use this proxy's host instead of Chrome's + proxyHost := r.Host + chromeHost := parsed.Host + for i := range targets { + // Rewrite webSocketDebuggerUrl + if wsURL, ok := targets[i]["webSocketDebuggerUrl"].(string); ok { + targets[i]["webSocketDebuggerUrl"] = rewriteWSURL(wsURL, chromeHost, proxyHost) + } + // Rewrite devtoolsFrontendUrl if present + if frontendURL, ok := targets[i]["devtoolsFrontendUrl"].(string); ok { + targets[i]["devtoolsFrontendUrl"] = rewriteWSURL(frontendURL, chromeHost, proxyHost) + } + } + + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(targets) + } + rDevtools.Get("/json", jsonTargetHandler) + rDevtools.Get("/json/list", jsonTargetHandler) + rDevtools.Get("/*", func(w http.ResponseWriter, r *http.Request) { devtoolsproxy.WebSocketProxyHandler(upstreamMgr, slogger, config.LogCDPMessages, stz).ServeHTTP(w, r) }) @@ -227,3 +282,16 @@ func mustFFmpeg() { panic(fmt.Errorf("ffmpeg not found or not executable: %w", err)) } } + +// rewriteWSURL replaces the Chrome host with the proxy host in WebSocket URLs. +// e.g., "ws://127.0.0.1:9223/devtools/page/..." -> "ws://127.0.0.1:9222/devtools/page/..." +func rewriteWSURL(urlStr, chromeHost, proxyHost string) string { + parsed, err := url.Parse(urlStr) + if err != nil { + return urlStr + } + if parsed.Host == chromeHost { + parsed.Host = proxyHost + } + return parsed.String() +} From a1171a167bf8624d8b483dfd66d3f90b4dec2ef8 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Sun, 1 Feb 2026 17:20:34 -0500 Subject: [PATCH 2/6] test(e2e): add agent-browser CDP proxy connectivity tests Add comprehensive e2e tests verifying that agent-browser can connect to Chrome through the CDP proxy on port 9222. Tests validate: - /json endpoint returns targets with URLs rewritten to proxy port - /json/list endpoint works correctly with URL rewriting - /json/version endpoint continues to work - agent-browser works with various --cdp argument formats: - port only (9222) - http URL (http://127.0.0.1:9222) - localhost:port - 127.0.0.1:port - agent-browser snapshot and navigation commands work via proxy --- server/e2e/e2e_agent_browser_cdp_test.go | 231 +++++++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 server/e2e/e2e_agent_browser_cdp_test.go diff --git a/server/e2e/e2e_agent_browser_cdp_test.go b/server/e2e/e2e_agent_browser_cdp_test.go new file mode 100644 index 00000000..df689a98 --- /dev/null +++ b/server/e2e/e2e_agent_browser_cdp_test.go @@ -0,0 +1,231 @@ +package e2e + +import ( + "context" + "encoding/base64" + "net/http" + "os/exec" + "strings" + "testing" + "time" + + instanceoapi "github.com/onkernel/kernel-images/server/lib/oapi" + "github.com/stretchr/testify/require" +) + +// TestAgentBrowserCDPProxy tests that agent-browser can connect to Chrome via the CDP proxy on port 9222. +// This validates the /json and /json/list endpoints that the proxy exposes for target discovery, +// which is required for tools like agent-browser and Playwright's connectOverCDP. +func TestAgentBrowserCDPProxy(t *testing.T) { + t.Parallel() + + if _, err := exec.LookPath("docker"); err != nil { + t.Skipf("docker not available: %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + c := NewTestContainer(t, headlessImage) + require.NoError(t, c.Start(ctx, ContainerConfig{}), "failed to start container") + defer c.Stop(ctx) + + require.NoError(t, c.WaitReady(ctx), "api not ready") + require.NoError(t, c.WaitDevTools(ctx), "devtools not ready") + + client, err := c.APIClient() + require.NoError(t, err) + + // Install agent-browser globally inside the container + t.Log("Installing agent-browser...") + timeoutSec := 120 // npm install can take a while + installResult := execCommandWithTimeout(t, ctx, client, "npm", []string{"install", "-g", "agent-browser"}, &timeoutSec) + require.Zero(t, installResult.exitCode, "failed to install agent-browser: %s", installResult.output) + t.Log("agent-browser installed successfully") + + // First test the /json endpoints via curl to verify the proxy is working correctly + // before we test agent-browser + + // Test that /json endpoint returns proper target list with rewritten URLs + t.Run("json endpoint returns targets with rewritten URLs", func(t *testing.T) { + t.Log("Testing /json endpoint via curl") + + result := execCommand(t, ctx, client, "curl", []string{"-s", "http://127.0.0.1:9222/json"}) + require.Zero(t, result.exitCode, "curl /json failed: %s", result.output) + + // The response should be a JSON array containing targets + require.True(t, strings.HasPrefix(strings.TrimSpace(result.output), "["), + "expected JSON array from /json, got: %s", result.output) + + // The URLs should point to the proxy (port 9222), not Chrome directly (port 9223) + require.Contains(t, result.output, "9222", + "expected target URLs to be rewritten to proxy port 9222, got: %s", result.output) + require.NotContains(t, result.output, "9223", + "target URLs should not contain Chrome port 9223, got: %s", result.output) + }) + + // Test that /json/list endpoint also works + t.Run("json/list endpoint returns targets with rewritten URLs", func(t *testing.T) { + t.Log("Testing /json/list endpoint via curl") + + result := execCommand(t, ctx, client, "curl", []string{"-s", "http://127.0.0.1:9222/json/list"}) + require.Zero(t, result.exitCode, "curl /json/list failed: %s", result.output) + + // The response should be a JSON array containing targets + require.True(t, strings.HasPrefix(strings.TrimSpace(result.output), "["), + "expected JSON array from /json/list, got: %s", result.output) + + // The URLs should point to the proxy (port 9222), not Chrome directly (port 9223) + require.Contains(t, result.output, "9222", + "expected target URLs to be rewritten to proxy port 9222, got: %s", result.output) + require.NotContains(t, result.output, "9223", + "target URLs should not contain Chrome port 9223, got: %s", result.output) + }) + + // Test that /json/version endpoint works (this was already there) + t.Run("json/version endpoint works", func(t *testing.T) { + t.Log("Testing /json/version endpoint via curl") + + result := execCommand(t, ctx, client, "curl", []string{"-s", "http://127.0.0.1:9222/json/version"}) + require.Zero(t, result.exitCode, "curl /json/version failed: %s", result.output) + + // The response should be a JSON object with browser info + require.True(t, strings.HasPrefix(strings.TrimSpace(result.output), "{"), + "expected JSON object from /json/version, got: %s", result.output) + + // Should contain webSocketDebuggerUrl pointing to proxy + require.Contains(t, result.output, "webSocketDebuggerUrl", + "expected webSocketDebuggerUrl in response, got: %s", result.output) + require.Contains(t, result.output, "9222", + "expected webSocketDebuggerUrl to point to proxy port 9222, got: %s", result.output) + }) + + // Now test agent-browser with different CDP connection variations + // Each test connects to the browser, navigates, and gets the URL to verify connectivity + + testCases := []struct { + name string + cdpArg string + }{ + { + name: "port only (9222)", + cdpArg: "9222", + }, + { + name: "http URL", + cdpArg: "http://127.0.0.1:9222", + }, + { + name: "localhost:port", + cdpArg: "localhost:9222", + }, + { + name: "127.0.0.1:port", + cdpArg: "127.0.0.1:9222", + }, + } + + for _, tc := range testCases { + tc := tc // capture range variable + t.Run("agent-browser connect "+tc.name, func(t *testing.T) { + t.Logf("Testing agent-browser with --cdp %s", tc.cdpArg) + + // Navigate to example.com - this is the key test that verifies: + // 1. agent-browser can connect to the CDP proxy on port 9222 + // 2. The proxy's /json endpoint works for target discovery + // 3. The WebSocket connection through the proxy works + navResult := execCommand(t, ctx, client, "agent-browser", []string{"--cdp", tc.cdpArg, "open", "https://example.com"}) + t.Logf("Navigate result: exit=%d, output=%s", navResult.exitCode, navResult.output) + require.Zero(t, navResult.exitCode, "agent-browser --cdp %s open failed: %s", tc.cdpArg, navResult.output) + + // Get the current URL to verify we're connected and navigation worked + urlResult := execCommand(t, ctx, client, "agent-browser", []string{"--cdp", tc.cdpArg, "get", "url", "--json"}) + t.Logf("Get URL result: exit=%d, output=%s", urlResult.exitCode, urlResult.output) + require.Zero(t, urlResult.exitCode, "agent-browser --cdp %s get url failed: %s", tc.cdpArg, urlResult.output) + + // Verify we got a valid response containing example.com + require.Contains(t, urlResult.output, "example.com", + "expected URL to contain example.com, got: %s", urlResult.output) + }) + } + + // Test agent-browser snapshot command which uses /json for target discovery + t.Run("agent-browser snapshot via proxy", func(t *testing.T) { + t.Log("Testing agent-browser snapshot via CDP proxy") + + // Get snapshot - this exercises the /json endpoint to discover targets + result := execCommand(t, ctx, client, "agent-browser", []string{"--cdp", "9222", "snapshot", "-i", "--json"}) + t.Logf("Snapshot exit code: %d, Output length: %d", result.exitCode, len(result.output)) + + require.Zero(t, result.exitCode, "agent-browser snapshot failed: %s", result.output) + // Verify we got a valid snapshot response + require.True(t, strings.Contains(result.output, "success") || strings.Contains(result.output, "snapshot") || strings.Contains(result.output, "data"), + "expected valid snapshot response, got: %s", result.output) + }) + + // Test agent-browser get title to further verify connectivity + t.Run("agent-browser get title via proxy", func(t *testing.T) { + t.Log("Testing agent-browser get title via CDP proxy") + + result := execCommand(t, ctx, client, "agent-browser", []string{"--cdp", "9222", "get", "title", "--json"}) + t.Logf("Get title result: exit=%d, output=%s", result.exitCode, result.output) + + require.Zero(t, result.exitCode, "agent-browser get title failed: %s", result.output) + // Should contain "Example Domain" from previous navigation + require.Contains(t, result.output, "Example", + "expected title to contain 'Example', got: %s", result.output) + }) + + t.Log("All agent-browser CDP proxy tests passed") +} + +// execResult holds the result of a command execution +type execResult struct { + exitCode int + output string +} + +// execCommand runs a command via the container's process exec API and returns the result +func execCommand(t *testing.T, ctx context.Context, client *instanceoapi.ClientWithResponses, command string, args []string) execResult { + t.Helper() + return execCommandWithTimeout(t, ctx, client, command, args, nil) +} + +// execCommandWithTimeout runs a command with an optional timeout +func execCommandWithTimeout(t *testing.T, ctx context.Context, client *instanceoapi.ClientWithResponses, command string, args []string, timeoutSec *int) execResult { + t.Helper() + + req := instanceoapi.ProcessExecJSONRequestBody{ + Command: command, + Args: &args, + TimeoutSec: timeoutSec, + } + + rsp, err := client.ProcessExecWithResponse(ctx, req) + require.NoError(t, err, "process exec request error for %s", command) + require.Equal(t, http.StatusOK, rsp.StatusCode(), "unexpected status for %s: %s body=%s", + command, rsp.Status(), string(rsp.Body)) + require.NotNil(t, rsp.JSON200, "expected JSON200 response for %s", command) + + var stdout, stderr string + if rsp.JSON200.StdoutB64 != nil && *rsp.JSON200.StdoutB64 != "" { + if b, decErr := base64.StdEncoding.DecodeString(*rsp.JSON200.StdoutB64); decErr == nil { + stdout = string(b) + } + } + if rsp.JSON200.StderrB64 != nil && *rsp.JSON200.StderrB64 != "" { + if b, decErr := base64.StdEncoding.DecodeString(*rsp.JSON200.StderrB64); decErr == nil { + stderr = string(b) + } + } + + exitCode := 0 + if rsp.JSON200.ExitCode != nil { + exitCode = *rsp.JSON200.ExitCode + } + + return execResult{ + exitCode: exitCode, + output: stdout + stderr, + } +} From 3ae8d73b16f41bcb804093fa8fa4aebabb94aa70 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Sun, 1 Feb 2026 17:45:03 -0500 Subject: [PATCH 3/6] fix(e2e): rewrite test to properly validate CDP proxy JSON endpoints Rewrite the test to focus on validating the /json and /json/list endpoints added by the PR: - Test that /json returns targets with webSocketDebuggerUrl rewritten from port 9223 (Chrome) to port 9222 (proxy) - Test that /json/list works the same way - Test that /json/version continues to work - Add comparison test showing Chrome's direct /json on 9223 has unrewritten URLs Remove agent-browser tests since they required additional setup and the core functionality is validated by the JSON endpoint tests. --- server/e2e/e2e_agent_browser_cdp_test.go | 171 +++++++++-------------- 1 file changed, 66 insertions(+), 105 deletions(-) diff --git a/server/e2e/e2e_agent_browser_cdp_test.go b/server/e2e/e2e_agent_browser_cdp_test.go index df689a98..6c92afbe 100644 --- a/server/e2e/e2e_agent_browser_cdp_test.go +++ b/server/e2e/e2e_agent_browser_cdp_test.go @@ -3,6 +3,7 @@ package e2e import ( "context" "encoding/base64" + "encoding/json" "net/http" "os/exec" "strings" @@ -13,17 +14,18 @@ import ( "github.com/stretchr/testify/require" ) -// TestAgentBrowserCDPProxy tests that agent-browser can connect to Chrome via the CDP proxy on port 9222. -// This validates the /json and /json/list endpoints that the proxy exposes for target discovery, -// which is required for tools like agent-browser and Playwright's connectOverCDP. -func TestAgentBrowserCDPProxy(t *testing.T) { +// TestCDPProxyJSONEndpoints tests that the CDP proxy's /json and /json/list endpoints +// correctly return target information with URLs rewritten to point to the proxy (port 9222) +// instead of Chrome directly (port 9223). This is required for tools like agent-browser +// and Playwright's connectOverCDP to work through the proxy. +func TestCDPProxyJSONEndpoints(t *testing.T) { t.Parallel() if _, err := exec.LookPath("docker"); err != nil { t.Skipf("docker not available: %v", err) } - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) defer cancel() c := NewTestContainer(t, headlessImage) @@ -36,18 +38,8 @@ func TestAgentBrowserCDPProxy(t *testing.T) { client, err := c.APIClient() require.NoError(t, err) - // Install agent-browser globally inside the container - t.Log("Installing agent-browser...") - timeoutSec := 120 // npm install can take a while - installResult := execCommandWithTimeout(t, ctx, client, "npm", []string{"install", "-g", "agent-browser"}, &timeoutSec) - require.Zero(t, installResult.exitCode, "failed to install agent-browser: %s", installResult.output) - t.Log("agent-browser installed successfully") - - // First test the /json endpoints via curl to verify the proxy is working correctly - // before we test agent-browser - - // Test that /json endpoint returns proper target list with rewritten URLs - t.Run("json endpoint returns targets with rewritten URLs", func(t *testing.T) { + // Test that /json endpoint returns proper target list with webSocketDebuggerUrl rewritten + t.Run("json endpoint returns targets with rewritten webSocketDebuggerUrl", func(t *testing.T) { t.Log("Testing /json endpoint via curl") result := execCommand(t, ctx, client, "curl", []string{"-s", "http://127.0.0.1:9222/json"}) @@ -57,15 +49,27 @@ func TestAgentBrowserCDPProxy(t *testing.T) { require.True(t, strings.HasPrefix(strings.TrimSpace(result.output), "["), "expected JSON array from /json, got: %s", result.output) - // The URLs should point to the proxy (port 9222), not Chrome directly (port 9223) - require.Contains(t, result.output, "9222", - "expected target URLs to be rewritten to proxy port 9222, got: %s", result.output) - require.NotContains(t, result.output, "9223", - "target URLs should not contain Chrome port 9223, got: %s", result.output) + // Parse the response and verify webSocketDebuggerUrl is rewritten + var targets []map[string]interface{} + err := json.Unmarshal([]byte(result.output), &targets) + require.NoError(t, err, "failed to parse /json response: %s", result.output) + require.NotEmpty(t, targets, "expected at least one target") + + // Check that webSocketDebuggerUrl points to port 9222 (proxy), not 9223 (Chrome) + for i, target := range targets { + wsURL, ok := target["webSocketDebuggerUrl"].(string) + if ok && wsURL != "" { + require.Contains(t, wsURL, "9222", + "target %d: webSocketDebuggerUrl should contain proxy port 9222, got: %s", i, wsURL) + require.NotContains(t, wsURL, "9223", + "target %d: webSocketDebuggerUrl should not contain Chrome port 9223, got: %s", i, wsURL) + } + } + t.Logf("Verified %d targets have correctly rewritten webSocketDebuggerUrl", len(targets)) }) // Test that /json/list endpoint also works - t.Run("json/list endpoint returns targets with rewritten URLs", func(t *testing.T) { + t.Run("json/list endpoint returns targets with rewritten webSocketDebuggerUrl", func(t *testing.T) { t.Log("Testing /json/list endpoint via curl") result := execCommand(t, ctx, client, "curl", []string{"-s", "http://127.0.0.1:9222/json/list"}) @@ -75,11 +79,21 @@ func TestAgentBrowserCDPProxy(t *testing.T) { require.True(t, strings.HasPrefix(strings.TrimSpace(result.output), "["), "expected JSON array from /json/list, got: %s", result.output) - // The URLs should point to the proxy (port 9222), not Chrome directly (port 9223) - require.Contains(t, result.output, "9222", - "expected target URLs to be rewritten to proxy port 9222, got: %s", result.output) - require.NotContains(t, result.output, "9223", - "target URLs should not contain Chrome port 9223, got: %s", result.output) + // Parse and verify webSocketDebuggerUrl + var targets []map[string]interface{} + err := json.Unmarshal([]byte(result.output), &targets) + require.NoError(t, err, "failed to parse /json/list response") + require.NotEmpty(t, targets, "expected at least one target") + + for i, target := range targets { + wsURL, ok := target["webSocketDebuggerUrl"].(string) + if ok && wsURL != "" { + require.Contains(t, wsURL, "9222", + "target %d: webSocketDebuggerUrl should contain proxy port 9222", i) + require.NotContains(t, wsURL, "9223", + "target %d: webSocketDebuggerUrl should not contain Chrome port 9223", i) + } + } }) // Test that /json/version endpoint works (this was already there) @@ -93,90 +107,37 @@ func TestAgentBrowserCDPProxy(t *testing.T) { require.True(t, strings.HasPrefix(strings.TrimSpace(result.output), "{"), "expected JSON object from /json/version, got: %s", result.output) - // Should contain webSocketDebuggerUrl pointing to proxy - require.Contains(t, result.output, "webSocketDebuggerUrl", - "expected webSocketDebuggerUrl in response, got: %s", result.output) - require.Contains(t, result.output, "9222", - "expected webSocketDebuggerUrl to point to proxy port 9222, got: %s", result.output) - }) - - // Now test agent-browser with different CDP connection variations - // Each test connects to the browser, navigates, and gets the URL to verify connectivity - - testCases := []struct { - name string - cdpArg string - }{ - { - name: "port only (9222)", - cdpArg: "9222", - }, - { - name: "http URL", - cdpArg: "http://127.0.0.1:9222", - }, - { - name: "localhost:port", - cdpArg: "localhost:9222", - }, - { - name: "127.0.0.1:port", - cdpArg: "127.0.0.1:9222", - }, - } - - for _, tc := range testCases { - tc := tc // capture range variable - t.Run("agent-browser connect "+tc.name, func(t *testing.T) { - t.Logf("Testing agent-browser with --cdp %s", tc.cdpArg) - - // Navigate to example.com - this is the key test that verifies: - // 1. agent-browser can connect to the CDP proxy on port 9222 - // 2. The proxy's /json endpoint works for target discovery - // 3. The WebSocket connection through the proxy works - navResult := execCommand(t, ctx, client, "agent-browser", []string{"--cdp", tc.cdpArg, "open", "https://example.com"}) - t.Logf("Navigate result: exit=%d, output=%s", navResult.exitCode, navResult.output) - require.Zero(t, navResult.exitCode, "agent-browser --cdp %s open failed: %s", tc.cdpArg, navResult.output) - - // Get the current URL to verify we're connected and navigation worked - urlResult := execCommand(t, ctx, client, "agent-browser", []string{"--cdp", tc.cdpArg, "get", "url", "--json"}) - t.Logf("Get URL result: exit=%d, output=%s", urlResult.exitCode, urlResult.output) - require.Zero(t, urlResult.exitCode, "agent-browser --cdp %s get url failed: %s", tc.cdpArg, urlResult.output) - - // Verify we got a valid response containing example.com - require.Contains(t, urlResult.output, "example.com", - "expected URL to contain example.com, got: %s", urlResult.output) - }) - } + // Parse and verify webSocketDebuggerUrl + var version map[string]interface{} + err := json.Unmarshal([]byte(result.output), &version) + require.NoError(t, err, "failed to parse /json/version response") - // Test agent-browser snapshot command which uses /json for target discovery - t.Run("agent-browser snapshot via proxy", func(t *testing.T) { - t.Log("Testing agent-browser snapshot via CDP proxy") - - // Get snapshot - this exercises the /json endpoint to discover targets - result := execCommand(t, ctx, client, "agent-browser", []string{"--cdp", "9222", "snapshot", "-i", "--json"}) - t.Logf("Snapshot exit code: %d, Output length: %d", result.exitCode, len(result.output)) - - require.Zero(t, result.exitCode, "agent-browser snapshot failed: %s", result.output) - // Verify we got a valid snapshot response - require.True(t, strings.Contains(result.output, "success") || strings.Contains(result.output, "snapshot") || strings.Contains(result.output, "data"), - "expected valid snapshot response, got: %s", result.output) + wsURL, ok := version["webSocketDebuggerUrl"].(string) + require.True(t, ok, "expected webSocketDebuggerUrl in response") + require.Contains(t, wsURL, "9222", + "webSocketDebuggerUrl should point to proxy port 9222, got: %s", wsURL) }) - // Test agent-browser get title to further verify connectivity - t.Run("agent-browser get title via proxy", func(t *testing.T) { - t.Log("Testing agent-browser get title via CDP proxy") + // Test that Chrome's /json endpoint on 9223 returns unrewritten URLs (for comparison) + t.Run("chrome direct json has port 9223", func(t *testing.T) { + t.Log("Testing Chrome's /json endpoint directly on port 9223") + + result := execCommand(t, ctx, client, "curl", []string{"-s", "http://127.0.0.1:9223/json"}) + require.Zero(t, result.exitCode, "curl /json on 9223 failed: %s", result.output) - result := execCommand(t, ctx, client, "agent-browser", []string{"--cdp", "9222", "get", "title", "--json"}) - t.Logf("Get title result: exit=%d, output=%s", result.exitCode, result.output) + var targets []map[string]interface{} + err := json.Unmarshal([]byte(result.output), &targets) + require.NoError(t, err, "failed to parse Chrome's /json response") + require.NotEmpty(t, targets, "expected at least one target") - require.Zero(t, result.exitCode, "agent-browser get title failed: %s", result.output) - // Should contain "Example Domain" from previous navigation - require.Contains(t, result.output, "Example", - "expected title to contain 'Example', got: %s", result.output) + // Chrome's direct response should have port 9223 + wsURL, ok := targets[0]["webSocketDebuggerUrl"].(string) + require.True(t, ok && wsURL != "", "expected webSocketDebuggerUrl in first target") + require.Contains(t, wsURL, "9223", + "Chrome's webSocketDebuggerUrl should contain port 9223, got: %s", wsURL) }) - t.Log("All agent-browser CDP proxy tests passed") + t.Log("All CDP proxy JSON endpoint tests passed") } // execResult holds the result of a command execution From bcba715cacfef4666d962816a9253a0b07d94d2e Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Sun, 1 Feb 2026 18:04:11 -0500 Subject: [PATCH 4/6] fix(test): use stdbuf to prevent flaky UpstreamManager test The TestUpstreamManagerDetectsChromiumAndRestart test was flaky in CI because Chromium's stderr output was fully buffered when connected to a file. The "DevTools listening on ws://..." line would sit in a buffer and never be flushed until the buffer filled or the process exited, causing the test to timeout after 20 seconds. Fix by using stdbuf -oL -eL to force line buffering on both stdout and stderr, ensuring each line is flushed immediately after the newline. This also improves test speed from ~17-20s to ~0.5s since we no longer wait for buffered output. --- server/lib/devtoolsproxy/proxy_test.go | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/server/lib/devtoolsproxy/proxy_test.go b/server/lib/devtoolsproxy/proxy_test.go index e58bbc66..3092891f 100644 --- a/server/lib/devtoolsproxy/proxy_test.go +++ b/server/lib/devtoolsproxy/proxy_test.go @@ -189,7 +189,7 @@ func TestUpstreamManagerDetectsChromiumAndRestart(t *testing.T) { startChromium := func(port int) (*exec.Cmd, error) { userDir := t.TempDir() - args := []string{ + chromiumArgs := []string{ "--headless=new", "--remote-debugging-address=127.0.0.1", fmt.Sprintf("--remote-debugging-port=%d", port), @@ -203,8 +203,22 @@ func TestUpstreamManagerDetectsChromiumAndRestart(t *testing.T) { fmt.Sprintf("--user-data-dir=%s", userDir), "about:blank", } - t.Logf("starting chromium: %s %v", browser, args) - cmd := exec.Command(browser, args...) + + // Use stdbuf to force line-buffering on stderr so the "DevTools listening" + // line is flushed immediately. Without this, output to a file may be fully + // buffered and the line might not appear until the buffer fills or the + // process exits, causing the test to flake in CI. + var cmd *exec.Cmd + if stdbufPath, err := exec.LookPath("stdbuf"); err == nil { + // stdbuf -oL -eL: line-buffer stdout (-oL) and stderr (-eL) + args := append([]string{"-oL", "-eL", browser}, chromiumArgs...) + t.Logf("starting chromium via stdbuf: %s %v", stdbufPath, args) + cmd = exec.Command(stdbufPath, args...) + } else { + t.Logf("stdbuf not found, starting chromium directly: %s %v", browser, chromiumArgs) + cmd = exec.Command(browser, chromiumArgs...) + } + cmd.Stdout = logFile cmd.Stderr = logFile if err := cmd.Start(); err != nil { From 1cfe0c18e5fa2b341cd3cf69e911376c9f813c9b Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Sun, 1 Feb 2026 18:09:42 -0500 Subject: [PATCH 5/6] fix(cdp-proxy): address bot review comments 1. Add HTTP status code check before decoding JSON response from Chrome's /json endpoint. Previously, 4xx/5xx responses would result in confusing JSON decode errors like "invalid character '<'". 2. Fix devtoolsFrontendUrl rewriting for URLs with ws= query parameter. Chrome's devtoolsFrontendUrl often has the format: https://chrome-devtools-frontend.appspot.com/.../inspector.html?ws=127.0.0.1:9223/... The previous code only replaced the URL's host field, but in this format the Chrome host appears in the ws= query parameter. Now we handle both cases: direct host replacement and ws= query param replacement. --- server/cmd/api/main.go | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/server/cmd/api/main.go b/server/cmd/api/main.go index bb60350c..9deee677 100644 --- a/server/cmd/api/main.go +++ b/server/cmd/api/main.go @@ -10,6 +10,7 @@ import ( "os" "os/exec" "os/signal" + "strings" "syscall" "time" @@ -199,6 +200,13 @@ func main() { } defer resp.Body.Close() + // Verify Chrome returned a successful response + if resp.StatusCode != http.StatusOK { + slogger.Error("Chrome /json returned non-200 status", "status", resp.StatusCode, "url", chromeJSONURL) + http.Error(w, fmt.Sprintf("browser returned status %d", resp.StatusCode), http.StatusBadGateway) + return + } + // Read and parse the JSON response var targets []map[string]interface{} if err := json.NewDecoder(resp.Body).Decode(&targets); err != nil { @@ -284,14 +292,32 @@ func mustFFmpeg() { } // rewriteWSURL replaces the Chrome host with the proxy host in WebSocket URLs. -// e.g., "ws://127.0.0.1:9223/devtools/page/..." -> "ws://127.0.0.1:9222/devtools/page/..." +// It handles two cases: +// 1. Direct WebSocket URLs: "ws://127.0.0.1:9223/devtools/page/..." -> "ws://127.0.0.1:9222/devtools/page/..." +// 2. DevTools frontend URLs with ws= query param: "https://...?ws=127.0.0.1:9223/..." -> "https://...?ws=127.0.0.1:9222/..." func rewriteWSURL(urlStr, chromeHost, proxyHost string) string { parsed, err := url.Parse(urlStr) if err != nil { return urlStr } + + // Case 1: Direct replacement if the URL's host matches Chrome's host if parsed.Host == chromeHost { parsed.Host = proxyHost } + + // Case 2: Check for ws= query parameter (used in devtoolsFrontendUrl) + // e.g., https://chrome-devtools-frontend.appspot.com/.../inspector.html?ws=127.0.0.1:9223/devtools/page/... + if wsParam := parsed.Query().Get("ws"); wsParam != "" { + // The ws param value is like "127.0.0.1:9223/devtools/page/..." + // We need to replace the host portion + if strings.HasPrefix(wsParam, chromeHost) { + newWsParam := strings.Replace(wsParam, chromeHost, proxyHost, 1) + q := parsed.Query() + q.Set("ws", newWsParam) + parsed.RawQuery = q.Encode() + } + } + return parsed.String() } From d2ec4e36018f224189b8751ea8ae13b463da98c2 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Sun, 1 Feb 2026 22:39:14 -0500 Subject: [PATCH 6/6] Add trailing slash support for CDP JSON endpoints and agent-browser e2e test Playwright's connectOverCDP requests /json/version/ with a trailing slash, which was falling through to the WebSocket handler and returning a 426 "Upgrade Required" error. This prevented agent-browser (and other Playwright-based tools) from connecting via the CDP proxy on port 9222. - Register trailing-slash variants for /json, /json/, /json/list, /json/list/, /json/version, and /json/version/ endpoints - Add comprehensive e2e test for agent-browser that: - Installs agent-browser globally in the container - Tests CDP connection with port number format (--cdp 9222) - Tests CDP connection with http:// URL format (--cdp http://127.0.0.1:9222) - Verifies navigation, snapshot, get url, and get title commands This enables natural usage of agent-browser within containers: agent-browser --cdp 9222 open https://example.com agent-browser --cdp 9222 snapshot --json --- server/cmd/api/main.go | 10 ++- server/e2e/e2e_agent_browser_cdp_test.go | 96 ++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 2 deletions(-) diff --git a/server/cmd/api/main.go b/server/cmd/api/main.go index 9deee677..7e47a0b6 100644 --- a/server/cmd/api/main.go +++ b/server/cmd/api/main.go @@ -161,7 +161,9 @@ func main() { // Expose /json/version endpoint so clients that attempt to resolve a browser // websocket URL via HTTP can succeed. We map the upstream path onto this // proxy's host:port so clients connect back to us. - rDevtools.Get("/json/version", func(w http.ResponseWriter, r *http.Request) { + // Note: Playwright's connectOverCDP requests /json/version/ with trailing slash, + // so we register both variants to avoid 426 errors from the WebSocket handler. + jsonVersionHandler := func(w http.ResponseWriter, r *http.Request) { current := upstreamMgr.Current() if current == "" { http.Error(w, "upstream not ready", http.StatusServiceUnavailable) @@ -172,7 +174,9 @@ func main() { _ = json.NewEncoder(w).Encode(map[string]string{ "webSocketDebuggerUrl": proxyWSURL, }) - }) + } + rDevtools.Get("/json/version", jsonVersionHandler) + rDevtools.Get("/json/version/", jsonVersionHandler) // Handler for /json and /json/list - proxies to Chrome and rewrites URLs. // This is needed for Playwright's connectOverCDP which fetches /json for target discovery. @@ -233,7 +237,9 @@ func main() { _ = json.NewEncoder(w).Encode(targets) } rDevtools.Get("/json", jsonTargetHandler) + rDevtools.Get("/json/", jsonTargetHandler) rDevtools.Get("/json/list", jsonTargetHandler) + rDevtools.Get("/json/list/", jsonTargetHandler) rDevtools.Get("/*", func(w http.ResponseWriter, r *http.Request) { devtoolsproxy.WebSocketProxyHandler(upstreamMgr, slogger, config.LogCDPMessages, stz).ServeHTTP(w, r) diff --git a/server/e2e/e2e_agent_browser_cdp_test.go b/server/e2e/e2e_agent_browser_cdp_test.go index 6c92afbe..0c4ef086 100644 --- a/server/e2e/e2e_agent_browser_cdp_test.go +++ b/server/e2e/e2e_agent_browser_cdp_test.go @@ -140,6 +140,102 @@ func TestCDPProxyJSONEndpoints(t *testing.T) { t.Log("All CDP proxy JSON endpoint tests passed") } +// TestAgentBrowserCDPProxy tests that agent-browser can connect to Chrome via the CDP proxy on port 9222. +// This is the primary use case for the /json endpoint - enabling tools like agent-browser to work +// naturally within the container using `agent-browser --cdp 9222` instead of having to use port 9223. +func TestAgentBrowserCDPProxy(t *testing.T) { + t.Parallel() + + if _, err := exec.LookPath("docker"); err != nil { + t.Skipf("docker not available: %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + c := NewTestContainer(t, headlessImage) + require.NoError(t, c.Start(ctx, ContainerConfig{}), "failed to start container") + defer c.Stop(ctx) + + require.NoError(t, c.WaitReady(ctx), "api not ready") + require.NoError(t, c.WaitDevTools(ctx), "devtools not ready") + + client, err := c.APIClient() + require.NoError(t, err) + + // Install agent-browser globally inside the container + // agent-browser is a CLI tool that uses playwright-core's connectOverCDP under the hood + t.Log("Installing agent-browser...") + timeoutSec := 120 // npm install can take a while + installResult := execCommandWithTimeout(t, ctx, client, "npm", []string{"install", "-g", "agent-browser"}, &timeoutSec) + require.Zero(t, installResult.exitCode, "failed to install agent-browser: %s", installResult.output) + t.Log("agent-browser installed successfully") + + // Test agent-browser with different CDP connection formats + // All of these should work through the proxy on port 9222 + + // Test 1: Using just the port number (most common usage) + t.Run("agent-browser --cdp 9222 snapshot", func(t *testing.T) { + t.Log("Testing agent-browser snapshot with --cdp 9222") + + // Get a snapshot of the current page - this exercises: + // 1. CDP connection via connectOverCDP + // 2. Fetching /json to discover targets + // 3. WebSocket connection through the proxy + result := execCommand(t, ctx, client, "agent-browser", []string{"--cdp", "9222", "snapshot", "--json"}) + t.Logf("Snapshot result: exit=%d, output_length=%d", result.exitCode, len(result.output)) + + require.Zero(t, result.exitCode, "agent-browser snapshot failed: %s", result.output) + // The output should be valid JSON containing the snapshot + require.True(t, strings.Contains(result.output, "{") || strings.Contains(result.output, "["), + "expected JSON output from snapshot, got: %s", result.output) + }) + + // Test 2: Using http:// URL format + t.Run("agent-browser --cdp http://127.0.0.1:9222 snapshot", func(t *testing.T) { + t.Log("Testing agent-browser snapshot with --cdp http://127.0.0.1:9222") + + result := execCommand(t, ctx, client, "agent-browser", []string{"--cdp", "http://127.0.0.1:9222", "snapshot", "--json"}) + t.Logf("Snapshot result: exit=%d, output_length=%d", result.exitCode, len(result.output)) + + require.Zero(t, result.exitCode, "agent-browser snapshot with http URL failed: %s", result.output) + }) + + // Test 3: Navigate to a URL and verify it works + t.Run("agent-browser --cdp 9222 navigate and get url", func(t *testing.T) { + t.Log("Testing agent-browser navigation via CDP proxy") + + // Navigate to example.com + navResult := execCommand(t, ctx, client, "agent-browser", []string{"--cdp", "9222", "open", "https://example.com"}) + t.Logf("Navigate result: exit=%d, output=%s", navResult.exitCode, navResult.output) + require.Zero(t, navResult.exitCode, "agent-browser open failed: %s", navResult.output) + + // Get the current URL to verify navigation worked + urlResult := execCommand(t, ctx, client, "agent-browser", []string{"--cdp", "9222", "get", "url", "--json"}) + t.Logf("Get URL result: exit=%d, output=%s", urlResult.exitCode, urlResult.output) + require.Zero(t, urlResult.exitCode, "agent-browser get url failed: %s", urlResult.output) + + // The URL should contain example.com + require.Contains(t, urlResult.output, "example.com", + "expected URL to contain example.com, got: %s", urlResult.output) + }) + + // Test 4: Get page title to verify page loaded correctly + t.Run("agent-browser --cdp 9222 get title", func(t *testing.T) { + t.Log("Testing agent-browser get title via CDP proxy") + + result := execCommand(t, ctx, client, "agent-browser", []string{"--cdp", "9222", "get", "title", "--json"}) + t.Logf("Get title result: exit=%d, output=%s", result.exitCode, result.output) + + require.Zero(t, result.exitCode, "agent-browser get title failed: %s", result.output) + // Should contain "Example" from example.com's title "Example Domain" + require.Contains(t, result.output, "Example", + "expected title to contain 'Example', got: %s", result.output) + }) + + t.Log("All agent-browser CDP proxy tests passed") +} + // execResult holds the result of a command execution type execResult struct { exitCode int