From 7be7909c07f885b80ddd5249eb40e3ecb32c5b07 Mon Sep 17 00:00:00 2001
From: Alik Kurdyukov <alik@kurdyukov.com>
Date: Mon, 23 Feb 2026 16:59:19 +0400
Subject: [PATCH 1/2] cache disable implemented

---
 CLAUDE.md                                     |   3 +
 .../checklists/requirements.md                |  35 ++++
 specs/003-channel-cache-config/data-model.md  |  43 +++++
 specs/003-channel-cache-config/plan.md        |  99 ++++++++++
 specs/003-channel-cache-config/quickstart.md  |  66 +++++++
 specs/003-channel-cache-config/research.md    |  44 +++++
 specs/003-channel-cache-config/spec.md        |  78 ++++++++
 specs/003-channel-cache-config/tasks.md       | 173 ++++++++++++++++++
 src/checker.rs                                |  48 ++---
 src/config.rs                                 |  40 ++++
 src/main.rs                                   |   1 +
 tests/cache_config_test.rs                    |  94 ++++++++++
 tests/common/mod.rs                           |  21 +++
 13 files changed, 723 insertions(+), 22 deletions(-)
 create mode 100644 specs/003-channel-cache-config/checklists/requirements.md
 create mode 100644 specs/003-channel-cache-config/data-model.md
 create mode 100644 specs/003-channel-cache-config/plan.md
 create mode 100644 specs/003-channel-cache-config/quickstart.md
 create mode 100644 specs/003-channel-cache-config/research.md
 create mode 100644 specs/003-channel-cache-config/spec.md
 create mode 100644 specs/003-channel-cache-config/tasks.md
 create mode 100644 tests/cache_config_test.rs

diff --git a/CLAUDE.md b/CLAUDE.md
index 99214e6..7349941 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,6 +4,8 @@ Auto-generated from all feature plans. Last updated: 2025-10-28
 
 ## Active Technologies
 - Rust 1.75+ (edition 2024) + estcontainers 0.27 (new), tokio 1.x, tonic 0.14, existing mock-grpc-backend Docker image (002-testcontainers-integration)
+- Rust 1.75+ (edition 2024) + okio 1.x, tonic 0.14.2, dashmap 6.1.0, clap 4.5, serde 1.0, toml 0.9.8, prometheus 0.14 (003-channel-cache-config)
+- N/A (in-memory DashMap cache, no persistence) (003-channel-cache-config)
 
 - Rust 1.75+ + okio (async runtime), tonic (gRPC client for backend checks), prometheus (metrics), serde (config/logging) (001-core-agent)
 
@@ -23,6 +25,7 @@ cargo test [ONLY COMMANDS FOR ACTIVE TECHNOLOGIES][ONLY COMMANDS FOR ACTIVE TECH
 Rust 1.75+: Follow standard conventions
 
 ## Recent Changes
+- 003-channel-cache-config: Added Rust 1.75+ (edition 2024) + okio 1.x, tonic 0.14.2, dashmap 6.1.0, clap 4.5, serde 1.0, toml 0.9.8, prometheus 0.14
 - 002-testcontainers-integration: Added Rust 1.75+ (edition 2024) + estcontainers 0.27 (new), tokio 1.x, tonic 0.14, existing mock-grpc-backend Docker image
 
 - 001-core-agent: Added Rust 1.75+ + okio (async runtime), tonic (gRPC client for backend checks), prometheus (metrics), serde (config/logging)
diff --git a/specs/003-channel-cache-config/checklists/requirements.md b/specs/003-channel-cache-config/checklists/requirements.md
new file mode 100644
index 0000000..c7f8ea7
--- /dev/null
+++ b/specs/003-channel-cache-config/checklists/requirements.md
@@ -0,0 +1,35 @@
+# Specification Quality Checklist: Configurable Channel Cache
+
+**Purpose**: Validate specification completeness and quality before proceeding to planning
+**Created**: 2026-02-23
+**Feature**: [spec.md](../spec.md)
+
+## Content Quality
+
+- [x] No implementation details (languages, frameworks, APIs)
+- [x] Focused on user value and business needs
+- [x] Written for non-technical stakeholders
+- [x] All mandatory sections completed
+
+## Requirement Completeness
+
+- [x] No [NEEDS CLARIFICATION] markers remain
+- [x] Requirements are testable and unambiguous
+- [x] Success criteria are measurable
+- [x] Success criteria are technology-agnostic (no implementation details)
+- [x] All acceptance scenarios are defined
+- [x] Edge cases are identified
+- [x] Scope is clearly bounded
+- [x] Dependencies and assumptions identified
+
+## Feature Readiness
+
+- [x] All functional requirements have clear acceptance criteria
+- [x] User scenarios cover primary flows
+- [x] Feature meets measurable outcomes defined in Success Criteria
+- [x] No implementation details leak into specification
+
+## Notes
+
+- All items pass validation. Spec is ready for `/speckit.clarify` or `/speckit.plan`.
+- The feature is straightforward (on/off toggle for existing behavior) with clear boundaries and no ambiguity.
diff --git a/specs/003-channel-cache-config/data-model.md b/specs/003-channel-cache-config/data-model.md
new file mode 100644
index 0000000..8d862a7
--- /dev/null
+++ b/specs/003-channel-cache-config/data-model.md
@@ -0,0 +1,43 @@
+# Data Model: Configurable Channel Cache
+
+**Feature**: 003-channel-cache-config
+**Date**: 2026-02-23
+
+## Modified Entities
+
+### AgentConfig
+
+Existing configuration struct extended with one new field.
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| server_port | u16 | 5555 | TCP port for Agent Text Protocol server |
+| server_bind_address | String | "0.0.0.0" | Bind address for agent server |
+| grpc_connect_timeout_ms | u64 | 1000 | gRPC connection timeout in milliseconds |
+| grpc_rpc_timeout_ms | u64 | 1500 | gRPC RPC timeout in milliseconds |
+| **grpc_channel_cache_enabled** | **bool** | **true** | **Whether to cache and reuse gRPC channels across health checks** |
+| metrics_port | u16 | 9090 | HTTP port for Prometheus metrics |
+| metrics_bind_address | String | "0.0.0.0" | Bind address for metrics server |
+| log_level | LogLevel | Info | Logging verbosity level |
+| log_format | LogFormat | Json | Logging output format |
+
+**New field highlighted in bold.**
+
+### Configuration Sources
+
+| Source | Key/Flag | Example |
+|--------|----------|---------|
+| Environment variable | `HAPROXY_AGENT_GRPC_CHANNEL_CACHE` | `HAPROXY_AGENT_GRPC_CHANNEL_CACHE=false` |
+| TOML config file | `grpc_channel_cache_enabled` | `grpc_channel_cache_enabled = false` |
+| CLI argument | `--grpc-channel-cache` / `--no-grpc-channel-cache` | `--no-grpc-channel-cache` |
+
+### Precedence
+
+Environment variable < TOML config file < CLI argument (unchanged from existing behavior).
+
+## Unchanged Entities
+
+- **BackendChannelKey**: No changes (server, port, ssl_flag)
+- **GrpcHealthChecker**: Internal struct unchanged; behavior modified by reading `config.grpc_channel_cache_enabled`
+- **HealthCheckRequest / HealthCheckResponse**: No changes
+- **Metrics**: No new metrics; `GRPC_CHANNELS_ACTIVE` reports 0 when caching disabled
diff --git a/specs/003-channel-cache-config/plan.md b/specs/003-channel-cache-config/plan.md
new file mode 100644
index 0000000..235552b
--- /dev/null
+++ b/specs/003-channel-cache-config/plan.md
@@ -0,0 +1,99 @@
+# Implementation Plan: Configurable Channel Cache
+
+**Branch**: `003-channel-cache-config` | **Date**: 2026-02-23 | **Spec**: [spec.md](spec.md)
+**Input**: Feature specification from `/specs/003-channel-cache-config/spec.md`
+
+## Summary
+
+Add a boolean configuration option `grpc_channel_cache_enabled` (default: `true`) to `AgentConfig` that controls whether `GrpcHealthChecker` caches and reuses gRPC channels across health checks. When disabled, each health check creates a fresh channel and the `GRPC_CHANNELS_ACTIVE` metric remains at 0. The option follows existing configuration patterns: environment variable, TOML file, and CLI argument with standard precedence.
+
+## Technical Context
+
+**Language/Version**: Rust 1.75+ (edition 2024)
+**Primary Dependencies**: tokio 1.x, tonic 0.14.2, dashmap 6.1.0, clap 4.5, serde 1.0, toml 0.9.8, prometheus 0.14
+**Storage**: N/A (in-memory DashMap cache, no persistence)
+**Testing**: cargo test + testcontainers 0.27 (integration tests with mock gRPC backend)
+**Target Platform**: Linux server / Docker container
+**Project Type**: Single Rust binary
+**Performance Goals**: No degradation when caching enabled (existing behavior). Acceptable increased latency per check when caching disabled (operator's explicit choice).
+**Constraints**: Total gRPC timeout < 2000ms (HAProxy default). No new dependencies required.
+**Scale/Scope**: Single config field addition, 3 files modified (config.rs, checker.rs, tests/common/mod.rs), 1 new test file.
+
+## Constitution Check
+
+*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.*
+
+| Principle | Status | Notes |
+|-----------|--------|-------|
+| I. Agent Pattern | PASS | Single config option, no new external dependencies, follows existing config patterns (env/file/CLI) |
+| II. Integration-Heavy Testing | PASS | New integration test will verify cache-disabled behavior with real mock backend via testcontainers |
+| III. Observability | PASS | `GRPC_CHANNELS_ACTIVE` metric correctly reports 0 when caching disabled; startup log includes new config value |
+| IV. HAProxy Protocol Compliance | PASS | No protocol changes; agent still returns up/down via Agent Check Protocol |
+| V. Simplicity & Reliability | PASS | Minimal change: one boolean field, conditional logic in one method. No abstractions, no new patterns. YAGNI-compliant. |
+
+No violations. No entries needed in Complexity Tracking table.
+
+## Project Structure
+
+### Documentation (this feature)
+
+```text
+specs/003-channel-cache-config/
+├── plan.md              # This file
+├── research.md          # Phase 0 output
+├── data-model.md        # Phase 1 output
+├── quickstart.md        # Phase 1 output
+└── tasks.md             # Phase 2 output (created by /speckit.tasks)
+```
+
+### Source Code (repository root)
+
+```text
+src/
+├── checker.rs           # MODIFY: conditional cache logic in get_or_create_channel
+├── config.rs            # MODIFY: add grpc_channel_cache_enabled field + env/CLI/validation
+├── lib.rs               # NO CHANGE
+├── logger.rs            # NO CHANGE
+├── main.rs              # MODIFY: log new config value at startup
+├── metrics.rs           # NO CHANGE
+├── protocol.rs          # NO CHANGE
+└── server.rs            # NO CHANGE
+
+tests/
+├── common/mod.rs        # MODIFY: add start_agent_with_config helper accepting AgentConfig
+├── integration_test.rs  # NO CHANGE
+├── resilience_test.rs   # NO CHANGE
+└── cache_config_test.rs # NEW: tests for cache-enabled/disabled behavior
+```
+
+**Structure Decision**: Existing single-project layout preserved. Changes touch 3 source files (config.rs, checker.rs, main.rs), 1 test utility (common/mod.rs), and add 1 test file (cache_config_test.rs).
+
+## Complexity Tracking
+
+No violations to justify.
+
+## Design Decisions
+
+### D1: Config field name
+
+**Decision**: `grpc_channel_cache_enabled` (bool, default `true`)
+**Rationale**: Follows existing naming convention (`grpc_connect_timeout_ms`, `grpc_rpc_timeout_ms`). Positive boolean (`enabled`) avoids double-negative confusion (`no_cache = false`).
+**Env var**: `HAPROXY_AGENT_GRPC_CHANNEL_CACHE` (values: `true`/`false`)
+**CLI flag**: `--grpc-channel-cache` / `--no-grpc-channel-cache` (clap boolean flag)
+**TOML key**: `grpc_channel_cache_enabled = true`
+
+### D2: Implementation approach in checker.rs
+
+**Decision**: Conditional bypass in `get_or_create_channel` — when caching is disabled, skip cache lookup/insert and always create a fresh channel.
+**Rationale**: Minimal code change. The channel creation logic is identical regardless of caching. Only the DashMap interactions are skipped.
+**Alternative rejected**: Separate `CachedChecker` / `UncachedChecker` types — over-engineered for a boolean toggle.
+
+### D3: DashMap instantiation when caching disabled
+
+**Decision**: Still create the DashMap but never use it. The `Arc<DashMap>` is a zero-cost allocation when empty.
+**Rationale**: Avoids `Option<Arc<DashMap>>` complexity. An empty DashMap has negligible memory footprint.
+
+### D4: Test strategy
+
+**Decision**: New `tests/cache_config_test.rs` with integration tests using testcontainers. Add `start_agent_with_config` to `tests/common/mod.rs` to allow passing custom `AgentConfig`.
+**Rationale**: Constitution requires integration tests. Existing tests use default config and must remain unchanged. New helper enables config customization without breaking existing test patterns.
diff --git a/specs/003-channel-cache-config/quickstart.md b/specs/003-channel-cache-config/quickstart.md
new file mode 100644
index 0000000..add5af5
--- /dev/null
+++ b/specs/003-channel-cache-config/quickstart.md
@@ -0,0 +1,66 @@
+# Quickstart: Configurable Channel Cache
+
+**Feature**: 003-channel-cache-config
+
+## What Changed
+
+A new configuration option `grpc_channel_cache_enabled` controls whether the agent caches gRPC channels between health checks. By default, caching is enabled (existing behavior). Setting it to `false` forces the agent to create a fresh gRPC connection for every health check.
+
+## How to Use
+
+### Disable channel caching via environment variable
+
+```bash
+HAPROXY_AGENT_GRPC_CHANNEL_CACHE=false ./haproxy-grpc-agent
+```
+
+### Disable channel caching via TOML config file
+
+```toml
+# agent.toml
+grpc_channel_cache_enabled = false
+```
+
+```bash
+./haproxy-grpc-agent -c agent.toml
+```
+
+### Disable channel caching via CLI argument
+
+```bash
+./haproxy-grpc-agent --no-grpc-channel-cache
+```
+
+### Keep default behavior (caching enabled)
+
+No change needed. The default is `true`. All existing configurations continue to work without modification.
+
+## Files Modified
+
+| File | Change |
+|------|--------|
+| `src/config.rs` | New `grpc_channel_cache_enabled` field in `AgentConfig`, env var parsing, CLI arg, default function |
+| `src/checker.rs` | Conditional cache bypass in `get_or_create_channel` |
+| `src/main.rs` | Log new config value at startup |
+| `tests/common/mod.rs` | New `start_agent_with_config` helper |
+| `tests/cache_config_test.rs` | New integration tests for cache-disabled behavior |
+
+## Verification
+
+Run all tests:
+
+```bash
+cargo test
+```
+
+Run only the new cache config tests:
+
+```bash
+cargo test --test cache_config_test
+```
+
+Verify with clippy:
+
+```bash
+cargo clippy
+```
diff --git a/specs/003-channel-cache-config/research.md b/specs/003-channel-cache-config/research.md
new file mode 100644
index 0000000..477fe23
--- /dev/null
+++ b/specs/003-channel-cache-config/research.md
@@ -0,0 +1,44 @@
+# Research: Configurable Channel Cache
+
+**Feature**: 003-channel-cache-config
+**Date**: 2026-02-23
+
+## R1: Boolean config field patterns in clap + serde
+
+**Decision**: Use `#[serde(default = "default_true")]` for TOML/serde and `#[arg(long, default_value_t = true)]` with `--no-grpc-channel-cache` negation for clap CLI.
+
+**Rationale**: clap 4.x supports boolean flags with `--flag` / `--no-flag` syntax via `#[arg(long, default_value_t = true, action = clap::ArgAction::Set)]`. For the environment variable, parse `"true"` / `"false"` strings matching the existing pattern used for other env vars in `load_from_env`.
+
+**Alternatives considered**:
+- `clap::ArgAction::SetTrue` / `SetFalse` — doesn't support default `true` cleanly
+- Separate `--disable-channel-cache` flag — inconsistent with positive-bool convention
+
+## R2: DashMap behavior when unused
+
+**Decision**: Instantiate `DashMap::new()` even when caching is disabled; never insert into it.
+
+**Rationale**: An empty `DashMap` allocates minimal memory (just the internal shard array headers). Using `Option<DashMap>` would add `.as_ref().unwrap()` noise throughout `get_or_create_channel` for no meaningful benefit. The empty map is effectively free.
+
+**Alternatives considered**:
+- `Option<Arc<DashMap>>` — adds unwrap complexity, `None` path, conditional metric reporting
+- Trait-based strategy pattern — over-engineered for a boolean toggle
+
+## R3: Channel lifecycle when caching is disabled
+
+**Decision**: Create a fresh `Channel` for each `check_backend_internal` call. The channel is used for the single RPC and then dropped (Rust ownership ensures cleanup).
+
+**Rationale**: tonic `Channel` implements `Drop` which closes the underlying HTTP/2 connection. When the channel goes out of scope after the health check completes, the connection is cleaned up. No explicit teardown needed.
+
+**Alternatives considered**:
+- Channel pool with size=0 — unnecessary abstraction
+- Explicit `channel.close()` — not available in tonic's API; Drop handles it
+
+## R4: Test approach for verifying cache bypass
+
+**Decision**: Use two consecutive health checks to the same backend with caching disabled. Verify both succeed (functional correctness). Verify `GRPC_CHANNELS_ACTIVE` metric stays at 0 (behavioral verification that caching is bypassed).
+
+**Rationale**: Directly inspecting the DashMap internals would require exposing implementation details. The metric-based approach tests the observable behavior without coupling tests to internal data structures.
+
+**Alternatives considered**:
+- Expose `channel_cache.len()` as public API — leaks implementation details
+- Mock the DashMap — impractical with concrete type, and constitution favors integration tests
diff --git a/specs/003-channel-cache-config/spec.md b/specs/003-channel-cache-config/spec.md
new file mode 100644
index 0000000..e94f0fc
--- /dev/null
+++ b/specs/003-channel-cache-config/spec.md
@@ -0,0 +1,78 @@
+# Feature Specification: Configurable Channel Cache
+
+**Feature Branch**: `003-channel-cache-config`
+**Created**: 2026-02-23
+**Status**: Draft
+**Input**: User description: "Current implementation of GrpcHealthChecker has internal cache of channels. Please make is configurable so the user may disable caching with config option"
+
+## User Scenarios & Testing *(mandatory)*
+
+### User Story 1 - Disable Channel Caching via Configuration (Priority: P1)
+
+As an operator deploying the HAProxy gRPC agent, I want to disable the internal gRPC channel cache so that every health check creates a fresh connection to the backend. This is useful in environments where cached connections may become stale silently (e.g., behind load balancers that silently drop idle connections) or during debugging when I want to verify that each check independently establishes connectivity.
+
+**Why this priority**: This is the core request — the ability to toggle channel caching on or off. Without this, the feature has no value.
+
+**Independent Test**: Can be fully tested by setting the cache-disable option, running multiple health checks against the same backend, and verifying that a new connection is established for each check (no channel reuse).
+
+**Acceptance Scenarios**:
+
+1. **Given** the configuration has channel caching disabled, **When** the agent performs two consecutive health checks to the same backend, **Then** each check creates a new gRPC channel (no channel is retrieved from cache).
+2. **Given** the configuration has channel caching disabled, **When** the agent starts up, **Then** no channel cache data structure is populated during operation.
+3. **Given** the configuration has channel caching enabled (default), **When** the agent performs two consecutive health checks to the same backend, **Then** the second check reuses the channel created by the first check (existing behavior preserved).
+
+---
+
+### User Story 2 - Configure Channel Caching via All Supported Methods (Priority: P2)
+
+As an operator, I want to control channel caching through any of the existing configuration methods (environment variable, config file, or CLI argument) so that I can use whichever method fits my deployment workflow.
+
+**Why this priority**: Consistency with existing configuration patterns ensures operators don't need to learn new mechanisms. Less critical than the core toggle itself.
+
+**Independent Test**: Can be tested by setting the cache option via each configuration method independently and verifying the agent respects the setting in each case.
+
+**Acceptance Scenarios**:
+
+1. **Given** the environment variable for channel caching is set to disabled, **When** the agent starts, **Then** channel caching is disabled.
+2. **Given** the TOML config file has channel caching set to disabled, **When** the agent starts, **Then** channel caching is disabled.
+3. **Given** a CLI argument disables channel caching, **When** the agent starts, **Then** channel caching is disabled regardless of other configuration sources.
+4. **Given** no configuration is provided for channel caching, **When** the agent starts, **Then** channel caching is enabled (backward-compatible default).
+
+---
+
+### Edge Cases
+
+- What happens when channel caching is disabled and the backend is under high load? Each request creates a new connection, which may increase connection overhead. This is expected and acceptable — the operator has explicitly chosen this trade-off.
+- What happens to the active channels metric when caching is disabled? The metric should consistently report 0 cached channels.
+- What happens if the configuration is changed from cached to uncached (or vice versa) between restarts? The agent should respect the current configuration at startup; there is no hot-reload requirement.
+
+## Requirements *(mandatory)*
+
+### Functional Requirements
+
+- **FR-001**: System MUST provide a configuration option to enable or disable gRPC channel caching.
+- **FR-002**: Channel caching MUST be enabled by default to preserve backward compatibility with existing deployments.
+- **FR-003**: The channel caching option MUST be configurable via environment variable, TOML config file, and CLI argument, following the existing configuration precedence (env < config file < CLI).
+- **FR-004**: When channel caching is disabled, the system MUST create a new gRPC channel for every health check request.
+- **FR-005**: When channel caching is disabled, the cached channels metric MUST report 0.
+- **FR-006**: When channel caching is enabled, the system MUST behave identically to the current implementation (channels are cached and reused, with readiness validation).
+
+### Key Entities
+
+- **Channel Cache Setting**: A boolean configuration value indicating whether gRPC channels should be cached and reused across health checks or created fresh for each request.
+
+## Success Criteria *(mandatory)*
+
+### Measurable Outcomes
+
+- **SC-001**: Operators can disable channel caching with a single configuration change and observe that every health check creates a fresh connection.
+- **SC-002**: Enabling channel caching (default) produces identical behavior to the current system with no regression in check latency or connection reuse.
+- **SC-003**: The configuration option is available through all three existing configuration methods (environment variable, config file, CLI argument).
+- **SC-004**: The active channels metric accurately reflects 0 cached channels when caching is disabled.
+
+## Assumptions
+
+- The configuration is read at startup and does not change during runtime (no hot-reload).
+- Disabling caching may increase connection overhead and latency per check; this is an accepted trade-off chosen by the operator.
+- The existing configuration precedence (environment variable < config file < CLI argument) applies to this new option as well.
+- No additional cache eviction strategies (e.g., TTL, max size) are in scope for this feature; this is strictly an on/off toggle.
diff --git a/specs/003-channel-cache-config/tasks.md b/specs/003-channel-cache-config/tasks.md
new file mode 100644
index 0000000..7c0ccd1
--- /dev/null
+++ b/specs/003-channel-cache-config/tasks.md
@@ -0,0 +1,173 @@
+# Tasks: Configurable Channel Cache
+
+**Input**: Design documents from `/specs/003-channel-cache-config/`
+**Prerequisites**: plan.md (required), spec.md (required for user stories), research.md, data-model.md, quickstart.md
+
+**Tests**: Integration tests are included per constitution principle II (Integration-Heavy Testing).
+
+**Organization**: Tasks are grouped by user story to enable independent implementation and testing of each story.
+
+## Format: `[ID] [P?] [Story] Description`
+
+- **[P]**: Can run in parallel (different files, no dependencies)
+- **[Story]**: Which user story this task belongs to (e.g., US1, US2)
+- Include exact file paths in descriptions
+
+## Phase 1: Setup (Shared Infrastructure)
+
+**Purpose**: No new project structure needed. This feature modifies existing files only. Phase 1 covers the test infrastructure needed by both user stories.
+
+- [x] T001 Add `start_agent_with_config` helper function to `tests/common/mod.rs` that accepts an `AgentConfig` parameter, binds to port 0, and returns `(JoinHandle, SocketAddr)` — mirrors existing `start_agent()` but with custom config
+
+**Checkpoint**: Test infrastructure ready for user story implementation.
+
+---
+
+## Phase 2: Foundational (Blocking Prerequisites)
+
+**Purpose**: Add the `grpc_channel_cache_enabled` field to `AgentConfig` with serde/default support. This is required by both US1 (checker behavior) and US2 (multi-source config).
+
+**⚠️ CRITICAL**: No user story work can begin until this phase is complete.
+
+- [x] T002 Add `default_grpc_channel_cache_enabled` function returning `true` in `src/config.rs`
+- [x] T003 Add `grpc_channel_cache_enabled: bool` field to `AgentConfig` struct with `#[serde(default = "default_grpc_channel_cache_enabled")]` in `src/config.rs`
+- [x] T004 Update `AgentConfig::default()` impl to include `grpc_channel_cache_enabled: default_grpc_channel_cache_enabled()` in `src/config.rs`
+- [x] T005 Add `grpc_channel_cache_enabled` field to all existing `AgentConfig` literals in unit tests in `src/config.rs` (tests: `test_config_validation_valid`, `test_config_validation_valid_custom_ports`, `test_config_validation_port_conflict`, `test_config_validation_invalid_server_port_zero`, `test_config_validation_invalid_metrics_port_zero`, `test_config_validation_invalid_connect_timeout_zero`, `test_config_validation_invalid_rpc_timeout_zero`)
+- [x] T006 Log `grpc_channel_cache_enabled` config value at startup in `src/main.rs` tracing::info block
+
+**Checkpoint**: Config field exists with default `true`, compiles, existing tests pass (`cargo test`).
+
+---
+
+## Phase 3: User Story 1 — Disable Channel Caching via Configuration (Priority: P1) 🎯 MVP
+
+**Goal**: When `grpc_channel_cache_enabled` is `false`, every health check creates a fresh gRPC channel. When `true` (default), existing caching behavior is preserved unchanged.
+
+**Independent Test**: Set `grpc_channel_cache_enabled = false` in config, run multiple health checks to same backend, verify all succeed and no channels are cached.
+
+### Implementation for User Story 1
+
+- [x] T007 [US1] Modify `GrpcHealthChecker::get_or_create_channel` in `src/checker.rs`: when `self.config.grpc_channel_cache_enabled` is `false`, skip cache lookup (lines 53–70) and skip cache insert (line 114), and skip metric update on insert (line 117) — go directly to channel creation and return the fresh channel
+- [x] T008 [US1] Ensure `GRPC_CHANNELS_ACTIVE` metric is never incremented when caching is disabled — verify no `metrics::GRPC_CHANNELS_ACTIVE.set(...)` calls occur on the no-cache path in `src/checker.rs`
+
+### Integration Tests for User Story 1
+
+- [x] T009 [US1] Create `tests/cache_config_test.rs` with test `test_health_check_cache_disabled`: start mock backend (SERVING), start agent with `grpc_channel_cache_enabled: false`, send two consecutive checks to same backend, assert both return "up"
+- [x] T010 [US1] Add test `test_cache_enabled_default_behavior` in `tests/cache_config_test.rs`: start mock backend (SERVING), start agent with default config (`grpc_channel_cache_enabled: true`), send two consecutive checks, assert both return "up" (regression test for existing behavior)
+- [x] T011 [US1] Add test `test_cache_disabled_unreachable_backend` in `tests/cache_config_test.rs`: start agent with `grpc_channel_cache_enabled: false`, send check to nonexistent backend, assert returns "down"
+
+**Checkpoint**: Cache toggle works. Caching disabled = fresh channel per check. Caching enabled = existing behavior. All integration tests pass.
+
+---
+
+## Phase 4: User Story 2 — Configure Channel Caching via All Supported Methods (Priority: P2)
+
+**Goal**: The `grpc_channel_cache_enabled` option is configurable via environment variable (`HAPROXY_AGENT_GRPC_CHANNEL_CACHE`), TOML config file, and CLI argument (`--grpc-channel-cache` / `--no-grpc-channel-cache`), following existing precedence.
+
+**Independent Test**: Set the option via each config method independently, verify agent respects the setting.
+
+### Implementation for User Story 2
+
+- [x] T012 [US2] Add `HAPROXY_AGENT_GRPC_CHANNEL_CACHE` environment variable parsing in `AgentConfig::load_from_env` in `src/config.rs` — parse "true"/"false" strings, bail on invalid values
+- [x] T013 [US2] Add `--grpc-channel-cache` / `--no-grpc-channel-cache` CLI argument to `CliArgs` struct in `src/config.rs` using `#[arg(long, default_value_t = true, action = clap::ArgAction::Set)]`
+- [x] T014 [US2] Add `grpc_channel_cache` override in `AgentConfig::apply_cli_overrides` in `src/config.rs` — apply `cli.grpc_channel_cache` to `config.grpc_channel_cache_enabled`
+
+### Unit Tests for User Story 2
+
+- [x] T015 [US2] Add unit test `test_config_default_grpc_channel_cache_enabled` in `src/config.rs` — verify `AgentConfig::default().grpc_channel_cache_enabled == true`
+
+**Checkpoint**: Config option works via all three methods with correct precedence. Existing tests pass.
+
+---
+
+## Phase 5: Polish & Cross-Cutting Concerns
+
+**Purpose**: Final validation across all stories.
+
+- [x] T016 Run `cargo clippy` and fix any warnings introduced by this feature
+- [x] T017 Run full test suite (`cargo test`) and verify all existing + new tests pass
+- [x] T018 Validate quickstart.md scenarios manually: verify env var, TOML, and CLI examples from `specs/003-channel-cache-config/quickstart.md` are accurate
+
+---
+
+## Dependencies & Execution Order
+
+### Phase Dependencies
+
+- **Setup (Phase 1)**: No dependencies — can start immediately
+- **Foundational (Phase 2)**: T002-T004 are sequential (field depends on default fn). T005-T006 can start after T004.
+- **User Story 1 (Phase 3)**: Depends on Phase 2 completion. T007-T008 are sequential. T009-T011 depend on T007-T008 and T001.
+- **User Story 2 (Phase 4)**: Depends on Phase 2 completion. Independent of US1. T012-T014 can run in parallel (different functions).
+- **Polish (Phase 5)**: Depends on Phases 3 and 4 completion.
+
+### User Story Dependencies
+
+- **User Story 1 (P1)**: Depends on Phase 2 only. No dependency on US2.
+- **User Story 2 (P2)**: Depends on Phase 2 only. No dependency on US1.
+- US1 and US2 can be implemented in parallel after Phase 2.
+
+### Within Each User Story
+
+- Implementation tasks before test tasks (tests need the code to test against)
+- Core logic (T007) before metric handling (T008)
+- Config parsing tasks (T012-T014) can run in parallel (different functions in same file)
+
+### Parallel Opportunities
+
+- T001 (test helper) can run in parallel with T002-T006 (config changes) — different files
+- T012, T013, T014 can run in parallel — different functions in config.rs
+- US1 and US2 phases can run in parallel after Phase 2
+
+---
+
+## Parallel Example: User Story 1
+
+```text
+# After Phase 2 complete, launch implementation:
+Task T007: "Modify get_or_create_channel cache bypass in src/checker.rs"
+Task T008: "Verify metric handling on no-cache path in src/checker.rs"
+
+# After T007+T008 complete, launch tests:
+Task T009: "Test cache disabled with healthy backend"
+Task T010: "Test cache enabled default behavior"
+Task T011: "Test cache disabled with unreachable backend"
+```
+
+## Parallel Example: User Story 2
+
+```text
+# After Phase 2 complete, launch all config source tasks:
+Task T012: "Add HAPROXY_AGENT_GRPC_CHANNEL_CACHE env var parsing in src/config.rs"
+Task T013: "Add --grpc-channel-cache CLI arg in src/config.rs"
+Task T014: "Add CLI override in apply_cli_overrides in src/config.rs"
+```
+
+---
+
+## Implementation Strategy
+
+### MVP First (User Story 1 Only)
+
+1. Complete Phase 1: Setup (T001)
+2. Complete Phase 2: Foundational (T002-T006)
+3. Complete Phase 3: User Story 1 (T007-T011)
+4. **STOP and VALIDATE**: `cargo test --test cache_config_test` — all pass
+5. Feature is usable with TOML config file only (serde handles it)
+
+### Incremental Delivery
+
+1. Setup + Foundational → Config field exists with default
+2. Add User Story 1 → Cache toggle works → Test independently
+3. Add User Story 2 → All config methods work → Test independently
+4. Polish → Full validation
+
+---
+
+## Notes
+
+- [P] tasks = different files, no dependencies
+- [Story] label maps task to specific user story for traceability
+- Constitution requires integration tests (Principle II) — test tasks included
+- No new dependencies needed — all changes use existing crates
+- Commit after each phase completion
+- Stop at any checkpoint to validate independently
diff --git a/src/checker.rs b/src/checker.rs
index 5f1f451..6436725 100644
--- a/src/checker.rs
+++ b/src/checker.rs
@@ -49,24 +49,27 @@ impl GrpcHealthChecker {
         key: &BackendChannelKey,
         proxy_host: &str,
     ) -> Result<Channel, anyhow::Error> {
-        // Check if channel exists in cache
-        if let Some(channel) = self.channel_cache.get(key) {
-            let channel_clone = channel.clone();
-            drop(channel); // Release the DashMap lock
-
-            // Try to get the channel ready with a very short timeout
-            let ready_check = tokio::time::timeout(Duration::from_millis(10), async {
-                let mut grpc = tonic::client::Grpc::new(channel_clone.clone());
-                grpc.ready().await
-            });
-
-            if ready_check.await.is_ok() {
-                return Ok(channel_clone);
+        // Only use cache when channel caching is enabled
+        if self.config.grpc_channel_cache_enabled {
+            // Check if channel exists in cache
+            if let Some(channel) = self.channel_cache.get(key) {
+                let channel_clone = channel.clone();
+                drop(channel); // Release the DashMap lock
+
+                // Try to get the channel ready with a very short timeout
+                let ready_check = tokio::time::timeout(Duration::from_millis(10), async {
+                    let mut grpc = tonic::client::Grpc::new(channel_clone.clone());
+                    grpc.ready().await
+                });
+
+                if ready_check.await.is_ok() {
+                    return Ok(channel_clone);
+                }
+
+                // Channel not ready, remove from cache
+                self.channel_cache.remove(key);
+                metrics::GRPC_CHANNELS_ACTIVE.set(self.channel_cache.len() as f64);
             }
-
-            // Channel not ready, remove from cache
-            self.channel_cache.remove(key);
-            metrics::GRPC_CHANNELS_ACTIVE.set(self.channel_cache.len() as f64);
         }
 
         // T061: Create new channel with connect timeout
@@ -110,11 +113,12 @@ impl GrpcHealthChecker {
             .await
             .map_err(|e| anyhow::anyhow!("Connection failed to {}: {}", endpoint, e))?;
 
-        // Cache the channel
-        self.channel_cache.insert(key.clone(), channel.clone());
-
-        // T127: Update GRPC_CHANNELS_ACTIVE gauge
-        metrics::GRPC_CHANNELS_ACTIVE.set(self.channel_cache.len() as f64);
+        // Only cache channel and update metric when caching is enabled
+        if self.config.grpc_channel_cache_enabled {
+            self.channel_cache.insert(key.clone(), channel.clone());
+            // T127: Update GRPC_CHANNELS_ACTIVE gauge
+            metrics::GRPC_CHANNELS_ACTIVE.set(self.channel_cache.len() as f64);
+        }
 
         Ok(channel)
     }
diff --git a/src/config.rs b/src/config.rs
index 183fd83..218a341 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -44,6 +44,9 @@ pub struct AgentConfig {
     #[serde(default = "default_grpc_rpc_timeout")]
     pub grpc_rpc_timeout_ms: u64,
 
+    #[serde(default = "default_grpc_channel_cache_enabled")]
+    pub grpc_channel_cache_enabled: bool,
+
     #[serde(default = "default_metrics_port")]
     pub metrics_port: u16,
 
@@ -78,6 +81,10 @@ fn default_grpc_rpc_timeout() -> u64 {
     1500
 }
 
+fn default_grpc_channel_cache_enabled() -> bool {
+    true
+}
+
 impl Default for AgentConfig {
     fn default() -> Self {
         AgentConfig {
@@ -85,6 +92,7 @@ impl Default for AgentConfig {
             server_bind_address: default_bind_address(),
             grpc_connect_timeout_ms: default_grpc_connect_timeout(),
             grpc_rpc_timeout_ms: default_grpc_rpc_timeout(),
+            grpc_channel_cache_enabled: default_grpc_channel_cache_enabled(),
             metrics_port: default_metrics_port(),
             metrics_bind_address: default_bind_address(),
             log_level: LogLevel::default(),
@@ -126,6 +134,10 @@ pub struct CliArgs {
     #[arg(long)]
     pub grpc_rpc_timeout: Option<u64>,
 
+    /// Enable or disable gRPC channel caching (default: true)
+    #[arg(long, num_args = 0..=1, default_missing_value = "true", action = clap::ArgAction::Set)]
+    pub grpc_channel_cache: Option<bool>,
+
     /// Log level
     #[arg(long, value_enum)]
     pub log_level: Option<LogLevel>,
@@ -257,6 +269,14 @@ impl AgentConfig {
             };
         }
 
+        if let Ok(cache) = std::env::var("HAPROXY_AGENT_GRPC_CHANNEL_CACHE") {
+            config.grpc_channel_cache_enabled = match cache.to_lowercase().as_str() {
+                "true" => true,
+                "false" => false,
+                _ => anyhow::bail!("Invalid HAPROXY_AGENT_GRPC_CHANNEL_CACHE value: {} (expected 'true' or 'false')", cache),
+            };
+        }
+
         Ok(config)
     }
 
@@ -305,6 +325,10 @@ impl AgentConfig {
             config.log_format = format;
         }
 
+        if let Some(cache) = cli.grpc_channel_cache {
+            config.grpc_channel_cache_enabled = cache;
+        }
+
         config
     }
 }
@@ -323,6 +347,7 @@ mod tests {
             grpc_rpc_timeout_ms: 1500,
             metrics_port: 9090,
             metrics_bind_address: "0.0.0.0".to_string(),
+            grpc_channel_cache_enabled: true,
             log_level: LogLevel::Info,
             log_format: LogFormat::Json,
         };
@@ -338,6 +363,7 @@ mod tests {
             server_bind_address: "127.0.0.1".to_string(),
             grpc_connect_timeout_ms: 500,
             grpc_rpc_timeout_ms: 1000,
+            grpc_channel_cache_enabled: true,
             metrics_port: 9091,
             metrics_bind_address: "127.0.0.1".to_string(),
             log_level: LogLevel::Debug,
@@ -356,6 +382,7 @@ mod tests {
             server_bind_address: "0.0.0.0".to_string(),
             grpc_connect_timeout_ms: 1000,
             grpc_rpc_timeout_ms: 1500,
+            grpc_channel_cache_enabled: true,
             metrics_port: 9090, // Same as server_port!
             metrics_bind_address: "0.0.0.0".to_string(),
             log_level: LogLevel::Info,
@@ -375,6 +402,7 @@ mod tests {
             server_bind_address: "0.0.0.0".to_string(),
             grpc_connect_timeout_ms: 1000,
             grpc_rpc_timeout_ms: 1500,
+            grpc_channel_cache_enabled: true,
             metrics_port: 9090,
             metrics_bind_address: "0.0.0.0".to_string(),
             log_level: LogLevel::Info,
@@ -394,6 +422,7 @@ mod tests {
             server_bind_address: "0.0.0.0".to_string(),
             grpc_connect_timeout_ms: 1000,
             grpc_rpc_timeout_ms: 1500,
+            grpc_channel_cache_enabled: true,
             metrics_port: 0,
             metrics_bind_address: "0.0.0.0".to_string(),
             log_level: LogLevel::Info,
@@ -414,6 +443,7 @@ mod tests {
             server_bind_address: "0.0.0.0".to_string(),
             grpc_connect_timeout_ms: 0, // Invalid!
             grpc_rpc_timeout_ms: 1500,
+            grpc_channel_cache_enabled: true,
             metrics_port: 9090,
             metrics_bind_address: "0.0.0.0".to_string(),
             log_level: LogLevel::Info,
@@ -433,6 +463,7 @@ mod tests {
             server_bind_address: "0.0.0.0".to_string(),
             grpc_connect_timeout_ms: 1000,
             grpc_rpc_timeout_ms: 0, // Invalid!
+            grpc_channel_cache_enabled: true,
             metrics_port: 9090,
             metrics_bind_address: "0.0.0.0".to_string(),
             log_level: LogLevel::Info,
@@ -456,4 +487,13 @@ mod tests {
         assert_eq!(config.metrics_port, 9090);
         assert_eq!(config.metrics_bind_address, "0.0.0.0");
     }
+
+    #[test]
+    fn test_config_default_grpc_channel_cache_enabled() {
+        let config = AgentConfig::default();
+        assert!(
+            config.grpc_channel_cache_enabled,
+            "grpc_channel_cache_enabled should default to true"
+        );
+    }
 }
diff --git a/src/main.rs b/src/main.rs
index b870afa..7647b57 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -56,6 +56,7 @@ async fn main() -> Result<()> {
         metrics_port = config.metrics_port,
         grpc_connect_timeout_ms = config.grpc_connect_timeout_ms,
         grpc_rpc_timeout_ms = config.grpc_rpc_timeout_ms,
+        grpc_channel_cache_enabled = config.grpc_channel_cache_enabled,
         log_level = ?config.log_level,
         log_format = ?config.log_format,
         "HAProxy gRPC Agent starting"
diff --git a/tests/cache_config_test.rs b/tests/cache_config_test.rs
new file mode 100644
index 0000000..bdf464a
--- /dev/null
+++ b/tests/cache_config_test.rs
@@ -0,0 +1,94 @@
+// Integration tests for channel cache configuration
+// Tests cache-enabled and cache-disabled behavior
+
+mod common;
+
+use common::{cleanup_agent, send_check, start_agent_with_config, start_mock_backend};
+use haproxy_grpc_agent::config::AgentConfig;
+
+// T009: Test that health checks work with caching disabled
+#[tokio::test]
+async fn test_health_check_cache_disabled() {
+    let (_container, backend_port) = start_mock_backend("SERVING").await;
+
+    let config = AgentConfig {
+        server_port: 0,
+        server_bind_address: "127.0.0.1".to_string(),
+        metrics_port: 0,
+        metrics_bind_address: "127.0.0.1".to_string(),
+        grpc_channel_cache_enabled: false,
+        ..AgentConfig::default()
+    };
+
+    let (handle, agent_addr) = start_agent_with_config(config).await;
+
+    // Send two consecutive checks — both should succeed with fresh channels
+    let response1 = send_check(agent_addr, "127.0.0.1", backend_port).await;
+    assert_eq!(
+        response1, "up",
+        "First check with cache disabled should return 'up'"
+    );
+
+    let response2 = send_check(agent_addr, "127.0.0.1", backend_port).await;
+    assert_eq!(
+        response2, "up",
+        "Second check with cache disabled should return 'up'"
+    );
+
+    cleanup_agent(handle);
+}
+
+// T010: Test that default config (cache enabled) preserves existing behavior
+#[tokio::test]
+async fn test_cache_enabled_default_behavior() {
+    let (_container, backend_port) = start_mock_backend("SERVING").await;
+
+    let config = AgentConfig {
+        server_port: 0,
+        server_bind_address: "127.0.0.1".to_string(),
+        metrics_port: 0,
+        metrics_bind_address: "127.0.0.1".to_string(),
+        grpc_channel_cache_enabled: true,
+        ..AgentConfig::default()
+    };
+
+    let (handle, agent_addr) = start_agent_with_config(config).await;
+
+    // Send two consecutive checks — both should succeed (second uses cached channel)
+    let response1 = send_check(agent_addr, "127.0.0.1", backend_port).await;
+    assert_eq!(
+        response1, "up",
+        "First check with cache enabled should return 'up'"
+    );
+
+    let response2 = send_check(agent_addr, "127.0.0.1", backend_port).await;
+    assert_eq!(
+        response2, "up",
+        "Second check with cache enabled should return 'up' (cached channel)"
+    );
+
+    cleanup_agent(handle);
+}
+
+// T011: Test that unreachable backend returns 'down' with caching disabled
+#[tokio::test]
+async fn test_cache_disabled_unreachable_backend() {
+    let config = AgentConfig {
+        server_port: 0,
+        server_bind_address: "127.0.0.1".to_string(),
+        metrics_port: 0,
+        metrics_bind_address: "127.0.0.1".to_string(),
+        grpc_channel_cache_enabled: false,
+        ..AgentConfig::default()
+    };
+
+    let (handle, agent_addr) = start_agent_with_config(config).await;
+
+    let response = send_check(agent_addr, "nonexistent.example.com", 9999).await;
+    assert_eq!(
+        response, "down",
+        "Unreachable backend with cache disabled should return 'down'"
+    );
+
+    cleanup_agent(handle);
+}
diff --git a/tests/common/mod.rs b/tests/common/mod.rs
index 63dab22..c4f322f 100644
--- a/tests/common/mod.rs
+++ b/tests/common/mod.rs
@@ -118,6 +118,27 @@ pub async fn send_raw_request(agent_addr: SocketAddr, request: &str) -> String {
     result.expect("Agent request timed out after 5 seconds")
 }
 
+/// Starts the agent server in-process with a custom config on a dynamic port.
+/// Returns the tokio JoinHandle and the bound SocketAddr.
+pub async fn start_agent_with_config(
+    config: AgentConfig,
+) -> (tokio::task::JoinHandle<anyhow::Result<()>>, SocketAddr) {
+    let listener = TcpListener::bind("127.0.0.1:0")
+        .await
+        .expect("Failed to bind agent listener");
+    let addr = listener
+        .local_addr()
+        .expect("Failed to get agent bound address");
+
+    let server = AgentServer::new(config);
+    let handle = tokio::spawn(async move { server.run_with_listener(listener).await });
+
+    // Brief pause to let the accept loop start
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    (handle, addr)
+}
+
 /// Aborts the agent server task.
 pub fn cleanup_agent(handle: tokio::task::JoinHandle<anyhow::Result<()>>) {
     handle.abort();

From 8b4b791c33879b2d2dcaf61c8bc6cbda14460485 Mon Sep 17 00:00:00 2001
From: Alik Kurdyukov <alik@kurdyukov.com>
Date: Mon, 23 Feb 2026 17:07:09 +0400
Subject: [PATCH 2/2] warning fixed

---
 src/config.rs              |  5 ++++-
 tests/cache_config_test.rs | 13 ++-----------
 tests/common/mod.rs        | 19 ++-----------------
 3 files changed, 8 insertions(+), 29 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index 218a341..1fe4853 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -273,7 +273,10 @@ impl AgentConfig {
             config.grpc_channel_cache_enabled = match cache.to_lowercase().as_str() {
                 "true" => true,
                 "false" => false,
-                _ => anyhow::bail!("Invalid HAPROXY_AGENT_GRPC_CHANNEL_CACHE value: {} (expected 'true' or 'false')", cache),
+                _ => anyhow::bail!(
+                    "Invalid HAPROXY_AGENT_GRPC_CHANNEL_CACHE value: {} (expected 'true' or 'false')",
+                    cache
+                ),
             };
         }
 
diff --git a/tests/cache_config_test.rs b/tests/cache_config_test.rs
index bdf464a..478f398 100644
--- a/tests/cache_config_test.rs
+++ b/tests/cache_config_test.rs
@@ -3,7 +3,7 @@
 
 mod common;
 
-use common::{cleanup_agent, send_check, start_agent_with_config, start_mock_backend};
+use common::{cleanup_agent, send_check, start_agent, start_agent_with_config, start_mock_backend};
 use haproxy_grpc_agent::config::AgentConfig;
 
 // T009: Test that health checks work with caching disabled
@@ -43,16 +43,7 @@ async fn test_health_check_cache_disabled() {
 async fn test_cache_enabled_default_behavior() {
     let (_container, backend_port) = start_mock_backend("SERVING").await;
 
-    let config = AgentConfig {
-        server_port: 0,
-        server_bind_address: "127.0.0.1".to_string(),
-        metrics_port: 0,
-        metrics_bind_address: "127.0.0.1".to_string(),
-        grpc_channel_cache_enabled: true,
-        ..AgentConfig::default()
-    };
-
-    let (handle, agent_addr) = start_agent_with_config(config).await;
+    let (handle, agent_addr) = start_agent().await;
 
     // Send two consecutive checks — both should succeed (second uses cached channel)
     let response1 = send_check(agent_addr, "127.0.0.1", backend_port).await;
diff --git a/tests/common/mod.rs b/tests/common/mod.rs
index c4f322f..091d75e 100644
--- a/tests/common/mod.rs
+++ b/tests/common/mod.rs
@@ -1,5 +1,4 @@
 // Shared test utilities for integration and resilience tests
-
 use haproxy_grpc_agent::config::AgentConfig;
 use haproxy_grpc_agent::server::AgentServer;
 use std::net::SocketAddr;
@@ -55,7 +54,7 @@ pub async fn start_mock_backend(health_status: &str) -> (ContainerAsync<GenericI
     (container, port)
 }
 
-/// Starts the agent server in-process on a dynamic port.
+/// Starts the agent server in-process on a dynamic port with default config.
 /// Returns the tokio JoinHandle and the bound SocketAddr.
 pub async fn start_agent() -> (tokio::task::JoinHandle<anyhow::Result<()>>, SocketAddr) {
     let config = AgentConfig {
@@ -65,21 +64,7 @@ pub async fn start_agent() -> (tokio::task::JoinHandle<anyhow::Result<()>>, Sock
         metrics_bind_address: "127.0.0.1".to_string(),
         ..AgentConfig::default()
     };
-
-    let listener = TcpListener::bind("127.0.0.1:0")
-        .await
-        .expect("Failed to bind agent listener");
-    let addr = listener
-        .local_addr()
-        .expect("Failed to get agent bound address");
-
-    let server = AgentServer::new(config);
-    let handle = tokio::spawn(async move { server.run_with_listener(listener).await });
-
-    // Brief pause to let the accept loop start
-    tokio::time::sleep(Duration::from_millis(50)).await;
-
-    (handle, addr)
+    start_agent_with_config(config).await
 }
 
 /// Sends a health check request to the agent and returns the trimmed response.