From 49b30cb9a4063ff2bdf8837690f1645f2b75deb5 Mon Sep 17 00:00:00 2001
From: Rahul Shetty <rashetty@redhat.com>
Date: Thu, 29 Jan 2026 21:11:59 +0530
Subject: [PATCH 1/5] integrate cursor-cli agent

Signed-off-by: Rahul Shetty <rashetty@redhat.com>
---
 src/agentready/cli/benchmark.py               | 23 ++++++--
 .../services/eval_harness/harbor_config.py    | 10 ++++
 .../services/eval_harness/tbench_runner.py    | 59 ++++++++++++-------
 3 files changed, 66 insertions(+), 26 deletions(-)

diff --git a/src/agentready/cli/benchmark.py b/src/agentready/cli/benchmark.py
index b74cea14..c41f29ab 100644
--- a/src/agentready/cli/benchmark.py
+++ b/src/agentready/cli/benchmark.py
@@ -27,6 +27,12 @@
     default=None,
     help="Benchmark subset (tbench: smoketest/full)",
 )
+@click.option(
+    "--agent",
+    type=click.Choice(["claude-code", "cursor-cli"]),
+    default="claude-code",
+    help="Agent for evaluation",
+)
 @click.option(
     "--model",
     type=click.Choice(["claude-haiku-4-5", "claude-sonnet-4-5"]),
@@ -53,7 +59,7 @@
     help="Skip dependency checks (for advanced users)",
 )
 def benchmark(
-    repository, harness, subset, model, verbose, timeout, output_dir, skip_preflight
+    repository, harness, subset, agent, model, verbose, timeout, output_dir, skip_preflight
 ):
     """Run agent coding benchmarks.
 
@@ -81,14 +87,14 @@ def benchmark(
     # Route to appropriate harness
     if harness == "tbench":
         _run_tbench(
-            repo_path, subset, model, verbose, timeout, output_dir, skip_preflight
+            repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight
         )
     else:
         click.echo(f"Unknown harness: {harness}", err=True)
         raise click.Abort()
 
 
-def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_preflight):
+def _run_tbench(repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight):
     """Run Terminal-Bench evaluation."""
     # Default subset to 'full' if not specified
     if subset is None:
@@ -107,6 +113,7 @@ def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_pre
         click.echo("AgentReady Terminal-Bench Benchmark")
         click.echo(f"{'=' * 50}\n")
         click.echo(f"Repository: {repo_path}")
+        click.echo(f"Agent: {agent}")
         click.echo(f"Model: {model}")
         click.echo(f"Subset: {subset} ({'1-2 tasks' if smoketest else '89 tasks'})")
         click.echo(f"Timeout: {timeout}s\n")
@@ -135,7 +142,11 @@ def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_pre
             raise click.Abort()
 
     # Validate API key BEFORE creating HarborConfig
-    api_key = os.environ.get("ANTHROPIC_API_KEY", "")
+    if agent == "claude-code":
+        api_key = os.environ.get("ANTHROPIC_API_KEY", "")
+    elif agent == "cursor-cli":
+        api_key = os.environ.get("CURSOR_API_KEY", "")
+
     if not api_key:
         click.echo(
             "Error: ANTHROPIC_API_KEY environment variable not set.\n"
@@ -146,8 +157,8 @@ def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_pre
 
     # Create HarborConfig (will not raise ValueError now)
     harbor_config = HarborConfig(
-        model=f"anthropic/{model}",
-        agent="claude-code",
+        model=model,
+        agent=agent,
         jobs_dir=Path(tempfile.mkdtemp()),
         api_key=api_key,
         timeout=timeout,
diff --git a/src/agentready/services/eval_harness/harbor_config.py b/src/agentready/services/eval_harness/harbor_config.py
index 3befc010..4c422cd7 100644
--- a/src/agentready/services/eval_harness/harbor_config.py
+++ b/src/agentready/services/eval_harness/harbor_config.py
@@ -12,11 +12,21 @@
 ALLOWED_MODELS = {
     "anthropic/claude-haiku-4-5",
     "anthropic/claude-sonnet-4-5",
+    "cursor/composer-1",
+    "cursor/gpt-5.2-codex",
+    "cursor/gpt-5.2-codex-fast",
+    "cursor/gemini-3-pro",
+    "cursor/opus-4.5",
+    "cursor/sonnet-4.5",
+    "cursor/sonnet-4.5-thinking",
+    "cursor/gpt-5.1-high",
+    "cursor/gemini-3-flash",
 }
 
 # Allowed agents (excludes oracle as it's not relevant for real-world assessment)
 ALLOWED_AGENTS = {
     "claude-code",
+    "cursor-cli",
 }
 
 
diff --git a/src/agentready/services/eval_harness/tbench_runner.py b/src/agentready/services/eval_harness/tbench_runner.py
index 11d1c513..0e31f781 100644
--- a/src/agentready/services/eval_harness/tbench_runner.py
+++ b/src/agentready/services/eval_harness/tbench_runner.py
@@ -125,31 +125,50 @@ def _real_tbench_result(repo_path: Path, config: HarborConfig) -> TbenchResult:
     # Pass through current environment but ensure API key is set
     # Harbor's claude-code agent has MiniMax API hardcoded - override it
     clean_env = os.environ.copy()
-    clean_env["ANTHROPIC_API_KEY"] = config.api_key
-    clean_env["ANTHROPIC_AUTH_TOKEN"] = config.api_key  # Harbor uses this
-    clean_env["ANTHROPIC_BASE_URL"] = "https://api.anthropic.com"  # Override MiniMax
-    clean_env["ANTHROPIC_API_BASE"] = "https://api.anthropic.com"  # Alternative var
+
+    # Define agent-specific environment variable configurations
+    # Structure: (Env Key, Env Value, Is Sensitive)
+    agent_env_configs = {
+        "claude-code": [
+            ("ANTHROPIC_API_KEY", config.api_key, True),
+            ("ANTHROPIC_AUTH_TOKEN", config.api_key, True),
+            ("ANTHROPIC_BASE_URL", "https://api.anthropic.com", False),
+            ("ANTHROPIC_API_BASE", "https://api.anthropic.com", False),
+        ],
+        "cursor-cli": [
+            ("CURSOR_API_KEY", config.api_key, True),
+        ],
+    }
+
+    if config.agent not in agent_env_configs:
+        raise ValueError(f"Invalid agent: {config.agent}")
+
+    # Set environment variables and build display/copyable lists
+    env_vars_display = []
+    env_vars_copyable = []
+
+    for var_name, var_value, is_sensitive in agent_env_configs[config.agent]:
+        clean_env[var_name] = var_value
+
+        # Build display string (truncate sensitive values)
+        if is_sensitive:
+            display_value = f"{var_value[:20]}..."
+        else:
+            display_value = var_value
+        env_vars_display.append(f"{var_name}={display_value}")
+
+        # Build copyable string (use variable reference for sensitive values)
+        if is_sensitive:
+            copyable_value = f"${var_name}"
+        else:
+            copyable_value = var_value
+        env_vars_copyable.append(f"{var_name}={copyable_value}")
+
     # Clear MiniMax settings if present
     clean_env.pop("MINIMAX_API_KEY", None)
 
     # Print Harbor command for debugging and manual execution
     shell_cmd = " ".join(shlex.quote(arg) for arg in cmd)
-
-    # Prepare environment variable strings (truncate API key for security in display)
-    env_vars_display = [
-        f"ANTHROPIC_API_KEY={config.api_key[:20]}...",  # Truncated for display
-        f"ANTHROPIC_AUTH_TOKEN={config.api_key[:20]}...",
-        f"ANTHROPIC_BASE_URL={clean_env['ANTHROPIC_BASE_URL']}",
-        f"ANTHROPIC_API_BASE={clean_env['ANTHROPIC_API_BASE']}",
-    ]
-
-    # Full command for copy/paste (use $ANTHROPIC_API_KEY to avoid exposing key)
-    env_vars_copyable = [
-        "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY",
-        "ANTHROPIC_AUTH_TOKEN=$ANTHROPIC_API_KEY",
-        f"ANTHROPIC_BASE_URL={clean_env['ANTHROPIC_BASE_URL']}",
-        f"ANTHROPIC_API_BASE={clean_env['ANTHROPIC_API_BASE']}",
-    ]
     full_cmd_copyable = " ".join(env_vars_copyable) + " " + shell_cmd
 
     print(f"\n{'=' * 70}")

From a32ba5d0c7cc946328cf0b8f78269366d1d14b00 Mon Sep 17 00:00:00 2001
From: Rahul Shetty <rashetty@redhat.com>
Date: Thu, 29 Jan 2026 21:20:07 +0530
Subject: [PATCH 2/5] update agent model list in benchmark command

Signed-off-by: Rahul Shetty <rashetty@redhat.com>
---
 src/agentready/cli/benchmark.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/agentready/cli/benchmark.py b/src/agentready/cli/benchmark.py
index c41f29ab..9b9cc132 100644
--- a/src/agentready/cli/benchmark.py
+++ b/src/agentready/cli/benchmark.py
@@ -7,7 +7,7 @@
 
 import click
 
-from ..services.eval_harness.harbor_config import HarborConfig
+from ..services.eval_harness.harbor_config import ALLOWED_MODELS, HarborConfig
 from ..services.eval_harness.tbench_runner import _real_tbench_result
 from ..services.harbor.agent_toggler import AssessorStateToggler
 from ..services.harbor.comparer import compare_assessor_impact
@@ -35,7 +35,7 @@
 )
 @click.option(
     "--model",
-    type=click.Choice(["claude-haiku-4-5", "claude-sonnet-4-5"]),
+    type=click.Choice(list(ALLOWED_MODELS)),
     default="claude-haiku-4-5",
     help="Model for evaluation",
 )

From 867ae6348ac912b655f4685987f9f0d20dc15518 Mon Sep 17 00:00:00 2001
From: Rahul Shetty <rashetty@redhat.com>
Date: Thu, 5 Feb 2026 12:39:02 +0530
Subject: [PATCH 3/5] update tests for benchmark command

Signed-off-by: Rahul Shetty <rashetty@redhat.com>
---
 src/agentready/cli/benchmark.py               |  7 +-
 .../services/eval_harness/harbor_config.py    |  3 +
 tests/unit/test_cli_benchmark.py              | 81 +++++++++++++++----
 tests/unit/test_harbor_config.py              | 36 +++++++++
 4 files changed, 110 insertions(+), 17 deletions(-)

diff --git a/src/agentready/cli/benchmark.py b/src/agentready/cli/benchmark.py
index 9b9cc132..d964a747 100644
--- a/src/agentready/cli/benchmark.py
+++ b/src/agentready/cli/benchmark.py
@@ -36,7 +36,7 @@
 @click.option(
     "--model",
     type=click.Choice(list(ALLOWED_MODELS)),
-    default="claude-haiku-4-5",
+    default="anthropic/claude-haiku-4-5",
     help="Model for evaluation",
 )
 @click.option("--verbose", "-v", is_flag=True, help="Enable verbose output")
@@ -148,9 +148,10 @@ def _run_tbench(repo_path, subset, agent, model, verbose, timeout, output_dir, s
         api_key = os.environ.get("CURSOR_API_KEY", "")
 
     if not api_key:
+        key_name = "ANTHROPIC_API_KEY" if agent == "claude-code" else "CURSOR_API_KEY"
         click.echo(
-            "Error: ANTHROPIC_API_KEY environment variable not set.\n"
-            "Set it with: export ANTHROPIC_API_KEY=your-key-here",
+            f"Error: {key_name} environment variable not set.\n"
+            f"Set it with: export {key_name}=your-key-here",
             err=True,
         )
         raise click.Abort()
diff --git a/src/agentready/services/eval_harness/harbor_config.py b/src/agentready/services/eval_harness/harbor_config.py
index 4c422cd7..2753dd91 100644
--- a/src/agentready/services/eval_harness/harbor_config.py
+++ b/src/agentready/services/eval_harness/harbor_config.py
@@ -9,6 +9,8 @@
 from typing import Optional
 
 # Allowed models (excludes opus due to cost)
+# Anthropic models: https://platform.claude.com/docs/en/about-claude/models/overview
+# Cursor models: https://cursor.com/docs/models
 ALLOWED_MODELS = {
     "anthropic/claude-haiku-4-5",
     "anthropic/claude-sonnet-4-5",
@@ -24,6 +26,7 @@
 }
 
 # Allowed agents (excludes oracle as it's not relevant for real-world assessment)
+# Harbor supported agents: https://github.com/laude-institute/harbor/blob/main/src/harbor/agents/factory.py
 ALLOWED_AGENTS = {
     "claude-code",
     "cursor-cli",
diff --git a/tests/unit/test_cli_benchmark.py b/tests/unit/test_cli_benchmark.py
index 7797c5c2..55b89717 100644
--- a/tests/unit/test_cli_benchmark.py
+++ b/tests/unit/test_cli_benchmark.py
@@ -161,8 +161,8 @@ def test_benchmark_with_verbose_flag(self, mock_run, runner, temp_repo):
         )
 
         assert result.exit_code == 0
-        # Verbose flag passed to _run_tbench
-        _, _, _, verbose, _, _, _ = mock_run.call_args[0]
+        # Verbose flag passed to _run_tbench (repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight)
+        _, _, _, _, verbose, _, _, _ = mock_run.call_args[0]
         assert verbose is True
 
     @patch("agentready.cli.benchmark._run_tbench")
@@ -174,7 +174,7 @@ def test_benchmark_with_custom_timeout(self, mock_run, runner, temp_repo):
         )
 
         assert result.exit_code == 0
-        _, _, _, _, timeout, _, _ = mock_run.call_args[0]
+        _, _, _, _, _, timeout, _, _ = mock_run.call_args[0]
         assert timeout == 7200
 
     @patch("agentready.cli.benchmark._run_tbench")
@@ -192,7 +192,7 @@ def test_benchmark_with_output_dir(self, mock_run, runner, temp_repo):
         )
 
         assert result.exit_code == 0
-        _, _, _, _, _, output_dir, _ = mock_run.call_args[0]
+        _, _, _, _, _, _, output_dir, _ = mock_run.call_args[0]
         assert output_dir == "/custom/output"
 
     @patch("agentready.cli.benchmark._run_tbench")
@@ -204,7 +204,7 @@ def test_benchmark_skip_preflight(self, mock_run, runner, temp_repo):
         )
 
         assert result.exit_code == 0
-        _, _, _, _, _, _, skip_preflight = mock_run.call_args[0]
+        _, _, _, _, _, _, _, skip_preflight = mock_run.call_args[0]
         assert skip_preflight is True
 
     def test_benchmark_unknown_harness(self, runner, temp_repo):
@@ -225,15 +225,62 @@ def test_benchmark_with_model_selection(self, mock_run, runner, temp_repo):
             [
                 str(temp_repo),
                 "--model",
-                "claude-sonnet-4-5",
+                "anthropic/claude-sonnet-4-5",
                 "--subset",
                 "smoketest",
             ],
         )
 
         assert result.exit_code == 0
-        _, _, model, _, _, _, _ = mock_run.call_args[0]
-        assert model == "claude-sonnet-4-5"
+        _, _, _, model, _, _, _, _ = mock_run.call_args[0]
+        assert model == "anthropic/claude-sonnet-4-5"
+
+    @patch.dict("os.environ", {}, clear=True)
+    def test_benchmark_cursor_cli_agent_requires_cursor_api_key(
+        self, runner, temp_repo
+    ):
+        """Test that cursor-cli agent requires CURSOR_API_KEY."""
+        result = runner.invoke(
+            benchmark,
+            [
+                str(temp_repo),
+                "--agent",
+                "cursor-cli",
+                "--model",
+                "cursor/sonnet-4.5",
+                "--subset",
+                "smoketest",
+                "--skip-preflight",
+            ],
+        )
+
+        assert result.exit_code != 0
+        assert "CURSOR_API_KEY" in result.output
+
+    @patch("agentready.cli.benchmark._run_tbench")
+    @patch.dict("os.environ", {"CURSOR_API_KEY": "test-cursor-key"})
+    def test_benchmark_cursor_cli_with_valid_cursor_model(
+        self, mock_run, runner, temp_repo
+    ):
+        """Test cursor-cli works with cursor/ prefixed models."""
+        result = runner.invoke(
+            benchmark,
+            [
+                str(temp_repo),
+                "--agent",
+                "cursor-cli",
+                "--model",
+                "cursor/sonnet-4.5",
+                "--subset",
+                "smoketest",
+            ],
+        )
+
+        assert result.exit_code == 0
+        mock_run.assert_called_once()
+        _, _, agent, model, _, _, _, _ = mock_run.call_args[0]
+        assert agent == "cursor-cli"
+        assert model == "cursor/sonnet-4.5"
 
 
 class TestRunTbench:
@@ -253,7 +300,8 @@ def test_run_tbench_smoketest(self, mock_result, tmp_path, mock_tbench_result):
         _run_tbench(
             repo_path=repo_path,
             subset="smoketest",
-            model="claude-haiku-4-5",
+            agent="claude-code",
+            model="anthropic/claude-haiku-4-5",
             verbose=False,
             timeout=3600,
             output_dir=None,
@@ -275,7 +323,8 @@ def test_run_tbench_full_subset(self, mock_result, tmp_path, mock_tbench_result)
         _run_tbench(
             repo_path=repo_path,
             subset="full",
-            model="claude-haiku-4-5",
+            agent="claude-code",
+            model="anthropic/claude-haiku-4-5",
             verbose=False,
             timeout=3600,
             output_dir=None,
@@ -295,7 +344,8 @@ def test_run_tbench_invalid_subset(self, mock_abort, mock_echo, tmp_path):
             _run_tbench(
                 repo_path=repo_path,
                 subset="invalid",
-                model="claude-haiku-4-5",
+                agent="claude-code",
+                model="anthropic/claude-haiku-4-5",
                 verbose=False,
                 timeout=3600,
                 output_dir=None,
@@ -314,7 +364,8 @@ def test_run_tbench_missing_api_key(self, mock_abort, mock_echo, tmp_path):
             _run_tbench(
                 repo_path=repo_path,
                 subset="smoketest",
-                model="claude-haiku-4-5",
+                agent="claude-code",
+                model="anthropic/claude-haiku-4-5",
                 verbose=False,
                 timeout=3600,
                 output_dir=None,
@@ -335,7 +386,8 @@ def test_run_tbench_defaults_to_full(
         _run_tbench(
             repo_path=repo_path,
             subset=None,  # Should default to 'full'
-            model="claude-haiku-4-5",
+            agent="claude-code",
+            model="anthropic/claude-haiku-4-5",
             verbose=False,
             timeout=3600,
             output_dir=None,
@@ -361,7 +413,8 @@ def test_run_tbench_exception_handling(self, mock_echo, mock_result, tmp_path):
             _run_tbench(
                 repo_path=repo_path,
                 subset="smoketest",
-                model="claude-haiku-4-5",
+                agent="claude-code",
+                model="anthropic/claude-haiku-4-5",
                 verbose=False,
                 timeout=3600,
                 output_dir=None,
diff --git a/tests/unit/test_harbor_config.py b/tests/unit/test_harbor_config.py
index 58f22f04..7f54d1c2 100644
--- a/tests/unit/test_harbor_config.py
+++ b/tests/unit/test_harbor_config.py
@@ -222,3 +222,39 @@ def test_allowed_models_is_set(self):
     def test_allowed_agents_is_set(self):
         """Test that ALLOWED_AGENTS is a set (not list)"""
         assert isinstance(ALLOWED_AGENTS, set)
+
+
+class TestHarborConfigCursorModels:
+    """Test cursor/* model acceptance"""
+
+    def test_harbor_config_cursor_models_accepted(self):
+        """Test that cursor/* models are accepted"""
+        config = HarborConfig(
+            model="cursor/sonnet-4.5",
+            agent="cursor-cli",
+            jobs_dir=Path("/tmp/test"),
+            api_key="test-key",
+        )
+        assert config.model == "cursor/sonnet-4.5"
+
+        config_gemini = HarborConfig(
+            model="cursor/gemini-3-pro",
+            agent="cursor-cli",
+            jobs_dir=Path("/tmp/test"),
+            api_key="test-key",
+        )
+        assert config_gemini.model == "cursor/gemini-3-pro"
+
+
+class TestHarborConfigCursorAgent:
+    """Test cursor-cli agent acceptance"""
+
+    def test_harbor_config_cursor_agent_accepted(self):
+        """Test that cursor-cli agent is accepted"""
+        config = HarborConfig(
+            model="anthropic/claude-haiku-4-5",
+            agent="cursor-cli",
+            jobs_dir=Path("/tmp/test"),
+            api_key="test-key",
+        )
+        assert config.agent == "cursor-cli"

From b644a35232ac0ad53d98e8793e0cc02f2fc9a6d5 Mon Sep 17 00:00:00 2001
From: Rahul Shetty <rashetty@redhat.com>
Date: Thu, 5 Feb 2026 12:47:04 +0530
Subject: [PATCH 4/5] format code

Signed-off-by: Rahul Shetty <rashetty@redhat.com>
---
 src/agentready/assessors/documentation.py   |  6 +--
 src/agentready/assessors/testing.py         |  6 +--
 src/agentready/cli/benchmark.py             | 23 ++++++++--
 src/agentready/services/assessment_cache.py | 18 +++-----
 tests/e2e/test_critical_paths.py            |  6 +--
 tests/e2e/test_critical_paths_simplified.py |  6 +--
 tests/unit/cli/test_main.py                 |  6 +--
 tests/unit/test_assessors_code_quality.py   | 48 +++++++--------------
 tests/unit/test_assessors_containers.py     | 24 ++++-------
 tests/unit/test_assessors_security.py       | 24 ++++-------
 tests/unit/test_assessors_stub.py           | 42 ++++++------------
 11 files changed, 82 insertions(+), 127 deletions(-)

diff --git a/src/agentready/assessors/documentation.py b/src/agentready/assessors/documentation.py
index 95e25e9d..4c4125c8 100644
--- a/src/agentready/assessors/documentation.py
+++ b/src/agentready/assessors/documentation.py
@@ -453,8 +453,7 @@ def _create_remediation(self) -> Remediation:
             ],
             tools=[],
             commands=[],
-            examples=[
-                """# Project Name
+            examples=["""# Project Name
 
 ## Overview
 What this project does and why it exists.
@@ -477,8 +476,7 @@ def _create_remediation(self) -> Remediation:
 # Format code
 black .
 ```
-"""
-            ],
+"""],
             citations=[
                 Citation(
                     source="GitHub",
diff --git a/src/agentready/assessors/testing.py b/src/agentready/assessors/testing.py
index 3ba3b2ba..09eb31b6 100644
--- a/src/agentready/assessors/testing.py
+++ b/src/agentready/assessors/testing.py
@@ -286,8 +286,7 @@ def _create_remediation(self) -> Remediation:
                 "pre-commit install",
                 "pre-commit run --all-files",
             ],
-            examples=[
-                """# .pre-commit-config.yaml
+            examples=["""# .pre-commit-config.yaml
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.4.0
@@ -306,8 +305,7 @@ def _create_remediation(self) -> Remediation:
     rev: 5.12.0
     hooks:
       - id: isort
-"""
-            ],
+"""],
             citations=[
                 Citation(
                     source="pre-commit.com",
diff --git a/src/agentready/cli/benchmark.py b/src/agentready/cli/benchmark.py
index d964a747..09be4232 100644
--- a/src/agentready/cli/benchmark.py
+++ b/src/agentready/cli/benchmark.py
@@ -59,7 +59,15 @@
     help="Skip dependency checks (for advanced users)",
 )
 def benchmark(
-    repository, harness, subset, agent, model, verbose, timeout, output_dir, skip_preflight
+    repository,
+    harness,
+    subset,
+    agent,
+    model,
+    verbose,
+    timeout,
+    output_dir,
+    skip_preflight,
 ):
     """Run agent coding benchmarks.
 
@@ -87,14 +95,23 @@ def benchmark(
     # Route to appropriate harness
     if harness == "tbench":
         _run_tbench(
-            repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight
+            repo_path,
+            subset,
+            agent,
+            model,
+            verbose,
+            timeout,
+            output_dir,
+            skip_preflight,
         )
     else:
         click.echo(f"Unknown harness: {harness}", err=True)
         raise click.Abort()
 
 
-def _run_tbench(repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight):
+def _run_tbench(
+    repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight
+):
     """Run Terminal-Bench evaluation."""
     # Default subset to 'full' if not specified
     if subset is None:
diff --git a/src/agentready/services/assessment_cache.py b/src/agentready/services/assessment_cache.py
index 886787c3..d820e9e6 100644
--- a/src/agentready/services/assessment_cache.py
+++ b/src/agentready/services/assessment_cache.py
@@ -33,8 +33,7 @@ def _initialize_db(self) -> None:
         """Initialize database schema."""
         try:
             with sqlite3.connect(self.db_path) as conn:
-                conn.execute(
-                    """
+                conn.execute("""
                     CREATE TABLE IF NOT EXISTS assessments (
                         id INTEGER PRIMARY KEY AUTOINCREMENT,
                         repository_url TEXT NOT NULL,
@@ -45,23 +44,18 @@ def _initialize_db(self) -> None:
                         expires_at TIMESTAMP,
                         UNIQUE(repository_url, commit_hash)
                     )
-                    """
-                )
+                    """)
 
                 # Create index for faster queries
-                conn.execute(
-                    """
+                conn.execute("""
                     CREATE INDEX IF NOT EXISTS idx_repo_commit
                     ON assessments(repository_url, commit_hash)
-                    """
-                )
+                    """)
 
-                conn.execute(
-                    """
+                conn.execute("""
                     CREATE INDEX IF NOT EXISTS idx_expires_at
                     ON assessments(expires_at)
-                    """
-                )
+                    """)
 
                 conn.commit()
         except sqlite3.Error as e:
diff --git a/tests/e2e/test_critical_paths.py b/tests/e2e/test_critical_paths.py
index ad49a76d..ae94e278 100644
--- a/tests/e2e/test_critical_paths.py
+++ b/tests/e2e/test_critical_paths.py
@@ -276,14 +276,12 @@ def test_assess_with_valid_config(self):
         with tempfile.TemporaryDirectory() as tmp_dir:
             # Create valid config file
             config_file = Path(tmp_dir) / "config.yaml"
-            config_file.write_text(
-                """
+            config_file.write_text("""
 weights:
   claude_md: 2.0
 excluded_attributes:
   - repomix_config
-"""
-            )
+""")
 
             output_dir = Path(tmp_dir) / "output"
 
diff --git a/tests/e2e/test_critical_paths_simplified.py b/tests/e2e/test_critical_paths_simplified.py
index c0cdca8d..3dced950 100644
--- a/tests/e2e/test_critical_paths_simplified.py
+++ b/tests/e2e/test_critical_paths_simplified.py
@@ -219,14 +219,12 @@ def test_valid_config_application(self, temp_output_dir):
         with tempfile.TemporaryDirectory() as tmp_dir:
             # Create valid config
             config_file = Path(tmp_dir) / "config.yaml"
-            config_file.write_text(
-                """
+            config_file.write_text("""
 weights:
   claude_md: 2.0
 excluded_attributes:
   - repomix_config
-"""
-            )
+""")
 
             # Run assessment with config
             result = helper.run_assessment(
diff --git a/tests/unit/cli/test_main.py b/tests/unit/cli/test_main.py
index 3398ee38..2ffb9160 100644
--- a/tests/unit/cli/test_main.py
+++ b/tests/unit/cli/test_main.py
@@ -355,14 +355,12 @@ class TestConfigLoading:
     def test_load_config_valid_yaml(self, tmp_path):
         """Test loading valid config file."""
         config_file = tmp_path / "config.yaml"
-        config_file.write_text(
-            """
+        config_file.write_text("""
 weights:
   claude_md_file: 2.0
 excluded_attributes:
   - test_attribute
-"""
-        )
+""")
 
         config = load_config(config_file)
 
diff --git a/tests/unit/test_assessors_code_quality.py b/tests/unit/test_assessors_code_quality.py
index aec492bc..149e9efa 100644
--- a/tests/unit/test_assessors_code_quality.py
+++ b/tests/unit/test_assessors_code_quality.py
@@ -69,14 +69,12 @@ def test_python_pylint_configured(self, tmp_path):
 
         # Create .pylintrc
         pylintrc = tmp_path / ".pylintrc"
-        pylintrc.write_text(
-            """[MASTER]
+        pylintrc.write_text("""[MASTER]
 max-line-length=100
 
 [MESSAGES CONTROL]
 disable=C0111
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -105,11 +103,9 @@ def test_python_ruff_configured(self, tmp_path):
 
         # Create ruff.toml
         ruff_toml = tmp_path / "ruff.toml"
-        ruff_toml.write_text(
-            """line-length = 100
+        ruff_toml.write_text("""line-length = 100
 select = ["E", "F", "W"]
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -135,14 +131,12 @@ def test_python_pyproject_toml(self, tmp_path):
 
         # Create pyproject.toml with both tools
         pyproject = tmp_path / "pyproject.toml"
-        pyproject.write_text(
-            """[tool.pylint]
+        pyproject.write_text("""[tool.pylint]
 max-line-length = 100
 
 [tool.ruff]
 line-length = 100
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -171,15 +165,13 @@ def test_javascript_eslint_configured(self, tmp_path):
 
         # Create .eslintrc.json
         eslintrc = tmp_path / ".eslintrc.json"
-        eslintrc.write_text(
-            """{
+        eslintrc.write_text("""{
   "extends": "eslint:recommended",
   "rules": {
     "no-console": "warn"
   }
 }
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -234,14 +226,12 @@ def test_ruby_rubocop_configured(self, tmp_path):
 
         # Create .rubocop.yml
         rubocop = tmp_path / ".rubocop.yml"
-        rubocop.write_text(
-            """AllCops:
+        rubocop.write_text("""AllCops:
   TargetRubyVersion: 3.0
 
 Style/StringLiterals:
   EnforcedStyle: double_quotes
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -269,14 +259,12 @@ def test_go_golangci_lint_configured(self, tmp_path):
 
         # Create .golangci.yml
         golangci = tmp_path / ".golangci.yml"
-        golangci.write_text(
-            """linters:
+        golangci.write_text("""linters:
   enable:
     - gofmt
     - golint
     - govet
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -311,14 +299,12 @@ def test_actionlint_in_precommit(self, tmp_path):
 
         # Create .pre-commit-config.yaml with actionlint
         precommit = tmp_path / ".pre-commit-config.yaml"
-        precommit.write_text(
-            """repos:
+        precommit.write_text("""repos:
   - repo: https://github.com/rhysd/actionlint
     rev: v1.6.0
     hooks:
       - id: actionlint
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -344,13 +330,11 @@ def test_markdownlint_configured(self, tmp_path):
 
         # Create .markdownlint.json
         markdownlint = tmp_path / ".markdownlint.json"
-        markdownlint.write_text(
-            """{
+        markdownlint.write_text("""{
   "default": true,
   "MD013": false
 }
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
diff --git a/tests/unit/test_assessors_containers.py b/tests/unit/test_assessors_containers.py
index 15af3d87..7b159c3a 100644
--- a/tests/unit/test_assessors_containers.py
+++ b/tests/unit/test_assessors_containers.py
@@ -98,8 +98,7 @@ def test_multi_stage_build(self, tmp_path):
 
         # Create multi-stage Dockerfile
         dockerfile = tmp_path / "Dockerfile"
-        dockerfile.write_text(
-            """FROM node:18 AS builder
+        dockerfile.write_text("""FROM node:18 AS builder
 WORKDIR /app
 COPY . .
 RUN npm ci && npm run build
@@ -108,8 +107,7 @@ def test_multi_stage_build(self, tmp_path):
 WORKDIR /app
 COPY --from=builder /app/dist ./dist
 CMD ["node", "dist/index.js"]
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -138,15 +136,13 @@ def test_docker_compose(self, tmp_path):
 
         # Create docker-compose.yml
         compose = tmp_path / "docker-compose.yml"
-        compose.write_text(
-            """version: '3.8'
+        compose.write_text("""version: '3.8'
 services:
   app:
     build: .
     ports:
       - "8000:8000"
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -177,15 +173,13 @@ def test_dockerignore_file(self, tmp_path):
 
         # Create .dockerignore
         dockerignore = tmp_path / ".dockerignore"
-        dockerignore.write_text(
-            """.git
+        dockerignore.write_text(""".git
 .venv
 __pycache__
 *.pyc
 .env
 node_modules
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -239,14 +233,12 @@ def test_comprehensive_container_setup(self, tmp_path):
         subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True, check=True)
 
         # Multi-stage Dockerfile
-        (tmp_path / "Dockerfile").write_text(
-            """FROM python:3.12 AS builder
+        (tmp_path / "Dockerfile").write_text("""FROM python:3.12 AS builder
 RUN pip install build
 
 FROM python:3.12-slim
 COPY --from=builder /app /app
-"""
-        )
+""")
 
         # docker-compose.yml
         (tmp_path / "docker-compose.yml").write_text(
diff --git a/tests/unit/test_assessors_security.py b/tests/unit/test_assessors_security.py
index 975bd7fa..2ced2f62 100644
--- a/tests/unit/test_assessors_security.py
+++ b/tests/unit/test_assessors_security.py
@@ -45,15 +45,13 @@ def test_dependabot_configured(self, tmp_path):
         github_dir = tmp_path / ".github"
         github_dir.mkdir()
         dependabot_file = github_dir / "dependabot.yml"
-        dependabot_file.write_text(
-            """version: 2
+        dependabot_file.write_text("""version: 2
 updates:
   - package-ecosystem: pip
     directory: /
     schedule:
       interval: weekly
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -109,12 +107,10 @@ def test_python_security_tools(self, tmp_path):
 
         # Create pyproject.toml with security tools
         pyproject = tmp_path / "pyproject.toml"
-        pyproject.write_text(
-            """[tool.poetry.dev-dependencies]
+        pyproject.write_text("""[tool.poetry.dev-dependencies]
 pip-audit = "^2.0.0"
 bandit = "^1.7.0"
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -143,14 +139,12 @@ def test_secret_detection(self, tmp_path):
 
         # Create .pre-commit-config.yaml with detect-secrets
         precommit = tmp_path / ".pre-commit-config.yaml"
-        precommit.write_text(
-            """repos:
+        precommit.write_text("""repos:
   - repo: https://github.com/Yelp/detect-secrets
     rev: v1.4.0
     hooks:
       - id: detect-secrets
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -261,8 +255,7 @@ def test_javascript_security_tools(self, tmp_path):
 
         # Create package.json with audit script
         package_json = tmp_path / "package.json"
-        package_json.write_text(
-            """{
+        package_json.write_text("""{
   "scripts": {
     "audit": "npm audit",
     "test": "jest"
@@ -271,8 +264,7 @@ def test_javascript_security_tools(self, tmp_path):
     "snyk": "^1.0.0"
   }
 }
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
diff --git a/tests/unit/test_assessors_stub.py b/tests/unit/test_assessors_stub.py
index 7decdf1e..2a274e62 100644
--- a/tests/unit/test_assessors_stub.py
+++ b/tests/unit/test_assessors_stub.py
@@ -102,12 +102,10 @@ def test_requirements_txt_all_pinned(self, tmp_path):
 
         # Create requirements.txt with exact versions
         requirements = tmp_path / "requirements.txt"
-        requirements.write_text(
-            """requests==2.28.1
+        requirements.write_text("""requests==2.28.1
 flask==2.3.0
 pytest==7.4.0
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -134,13 +132,11 @@ def test_requirements_txt_unpinned_dependencies(self, tmp_path):
 
         # Create requirements.txt with mix of pinned and unpinned
         requirements = tmp_path / "requirements.txt"
-        requirements.write_text(
-            """requests==2.28.1
+        requirements.write_text("""requests==2.28.1
 flask>=2.0.0
 pytest~=7.0
 numpy
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -296,8 +292,7 @@ def test_python_patterns(self, tmp_path):
 
         # Create .gitignore with Python patterns
         gitignore = tmp_path / ".gitignore"
-        gitignore.write_text(
-            """# Python
+        gitignore.write_text("""# Python
 __pycache__/
 *.py[cod]
 *.egg-info/
@@ -311,8 +306,7 @@ def test_python_patterns(self, tmp_path):
 .vscode/
 .idea/
 *.swp
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -340,8 +334,7 @@ def test_javascript_patterns(self, tmp_path):
 
         # Create .gitignore with JavaScript patterns
         gitignore = tmp_path / ".gitignore"
-        gitignore.write_text(
-            """# JavaScript
+        gitignore.write_text("""# JavaScript
 node_modules/
 dist/
 build/
@@ -351,8 +344,7 @@ def test_javascript_patterns(self, tmp_path):
 # General
 .DS_Store
 .vscode/
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -380,12 +372,10 @@ def test_missing_patterns(self, tmp_path):
 
         # Create .gitignore with only general patterns
         gitignore = tmp_path / ".gitignore"
-        gitignore.write_text(
-            """# General only
+        gitignore.write_text("""# General only
 .DS_Store
 .vscode/
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -414,8 +404,7 @@ def test_multi_language_patterns(self, tmp_path):
 
         # Create .gitignore with Python and JavaScript patterns
         gitignore = tmp_path / ".gitignore"
-        gitignore.write_text(
-            """# Python
+        gitignore.write_text("""# Python
 __pycache__/
 *.py[cod]
 *.egg-info/
@@ -435,8 +424,7 @@ def test_multi_language_patterns(self, tmp_path):
 .DS_Store
 .vscode/
 .idea/
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,
@@ -463,13 +451,11 @@ def test_pattern_with_trailing_slash(self, tmp_path):
 
         # Create .gitignore with mixed slash usage
         gitignore = tmp_path / ".gitignore"
-        gitignore.write_text(
-            """__pycache__
+        gitignore.write_text("""__pycache__
 venv
 .venv/
 .DS_Store
-"""
-        )
+""")
 
         repo = Repository(
             path=tmp_path,

From 3c872d85106ccdac8bf0228b6aa91d6d247212b7 Mon Sep 17 00:00:00 2001
From: Rahul Shetty <rashetty@redhat.com>
Date: Thu, 5 Feb 2026 13:37:33 +0530
Subject: [PATCH 5/5] remove sensitive env vars from printing

Signed-off-by: Rahul Shetty <rashetty@redhat.com>
---
 src/agentready/services/eval_harness/tbench_runner.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/agentready/services/eval_harness/tbench_runner.py b/src/agentready/services/eval_harness/tbench_runner.py
index 0e31f781..429368e8 100644
--- a/src/agentready/services/eval_harness/tbench_runner.py
+++ b/src/agentready/services/eval_harness/tbench_runner.py
@@ -151,9 +151,7 @@ def _real_tbench_result(repo_path: Path, config: HarborConfig) -> TbenchResult:
         clean_env[var_name] = var_value
 
         # Build display string (truncate sensitive values)
-        if is_sensitive:
-            display_value = f"{var_value[:20]}..."
-        else:
+        if not is_sensitive:
             display_value = var_value
         env_vars_display.append(f"{var_name}={display_value}")