From 49b30cb9a4063ff2bdf8837690f1645f2b75deb5 Mon Sep 17 00:00:00 2001 From: Rahul Shetty Date: Thu, 29 Jan 2026 21:11:59 +0530 Subject: [PATCH 1/5] integrate cursor-cli agent Signed-off-by: Rahul Shetty --- src/agentready/cli/benchmark.py | 23 ++++++-- .../services/eval_harness/harbor_config.py | 10 ++++ .../services/eval_harness/tbench_runner.py | 59 ++++++++++++------- 3 files changed, 66 insertions(+), 26 deletions(-) diff --git a/src/agentready/cli/benchmark.py b/src/agentready/cli/benchmark.py index b74cea14..c41f29ab 100644 --- a/src/agentready/cli/benchmark.py +++ b/src/agentready/cli/benchmark.py @@ -27,6 +27,12 @@ default=None, help="Benchmark subset (tbench: smoketest/full)", ) +@click.option( + "--agent", + type=click.Choice(["claude-code", "cursor-cli"]), + default="claude-code", + help="Agent for evaluation", +) @click.option( "--model", type=click.Choice(["claude-haiku-4-5", "claude-sonnet-4-5"]), @@ -53,7 +59,7 @@ help="Skip dependency checks (for advanced users)", ) def benchmark( - repository, harness, subset, model, verbose, timeout, output_dir, skip_preflight + repository, harness, subset, agent, model, verbose, timeout, output_dir, skip_preflight ): """Run agent coding benchmarks. @@ -81,14 +87,14 @@ def benchmark( # Route to appropriate harness if harness == "tbench": _run_tbench( - repo_path, subset, model, verbose, timeout, output_dir, skip_preflight + repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight ) else: click.echo(f"Unknown harness: {harness}", err=True) raise click.Abort() -def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_preflight): +def _run_tbench(repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight): """Run Terminal-Bench evaluation.""" # Default subset to 'full' if not specified if subset is None: @@ -107,6 +113,7 @@ def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_pre click.echo("AgentReady Terminal-Bench Benchmark") click.echo(f"{'=' * 50}\n") click.echo(f"Repository: {repo_path}") + click.echo(f"Agent: {agent}") click.echo(f"Model: {model}") click.echo(f"Subset: {subset} ({'1-2 tasks' if smoketest else '89 tasks'})") click.echo(f"Timeout: {timeout}s\n") @@ -135,7 +142,11 @@ def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_pre raise click.Abort() # Validate API key BEFORE creating HarborConfig - api_key = os.environ.get("ANTHROPIC_API_KEY", "") + if agent == "claude-code": + api_key = os.environ.get("ANTHROPIC_API_KEY", "") + elif agent == "cursor-cli": + api_key = os.environ.get("CURSOR_API_KEY", "") + if not api_key: click.echo( "Error: ANTHROPIC_API_KEY environment variable not set.\n" @@ -146,8 +157,8 @@ def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_pre # Create HarborConfig (will not raise ValueError now) harbor_config = HarborConfig( - model=f"anthropic/{model}", - agent="claude-code", + model=model, + agent=agent, jobs_dir=Path(tempfile.mkdtemp()), api_key=api_key, timeout=timeout, diff --git a/src/agentready/services/eval_harness/harbor_config.py b/src/agentready/services/eval_harness/harbor_config.py index 3befc010..4c422cd7 100644 --- a/src/agentready/services/eval_harness/harbor_config.py +++ b/src/agentready/services/eval_harness/harbor_config.py @@ -12,11 +12,21 @@ ALLOWED_MODELS = { "anthropic/claude-haiku-4-5", "anthropic/claude-sonnet-4-5", + "cursor/composer-1", + "cursor/gpt-5.2-codex", + "cursor/gpt-5.2-codex-fast", + "cursor/gemini-3-pro", + "cursor/opus-4.5", + "cursor/sonnet-4.5", + "cursor/sonnet-4.5-thinking", + "cursor/gpt-5.1-high", + "cursor/gemini-3-flash", } # Allowed agents (excludes oracle as it's not relevant for real-world assessment) ALLOWED_AGENTS = { "claude-code", + "cursor-cli", } diff --git a/src/agentready/services/eval_harness/tbench_runner.py b/src/agentready/services/eval_harness/tbench_runner.py index 11d1c513..0e31f781 100644 --- a/src/agentready/services/eval_harness/tbench_runner.py +++ b/src/agentready/services/eval_harness/tbench_runner.py @@ -125,31 +125,50 @@ def _real_tbench_result(repo_path: Path, config: HarborConfig) -> TbenchResult: # Pass through current environment but ensure API key is set # Harbor's claude-code agent has MiniMax API hardcoded - override it clean_env = os.environ.copy() - clean_env["ANTHROPIC_API_KEY"] = config.api_key - clean_env["ANTHROPIC_AUTH_TOKEN"] = config.api_key # Harbor uses this - clean_env["ANTHROPIC_BASE_URL"] = "https://api.anthropic.com" # Override MiniMax - clean_env["ANTHROPIC_API_BASE"] = "https://api.anthropic.com" # Alternative var + + # Define agent-specific environment variable configurations + # Structure: (Env Key, Env Value, Is Sensitive) + agent_env_configs = { + "claude-code": [ + ("ANTHROPIC_API_KEY", config.api_key, True), + ("ANTHROPIC_AUTH_TOKEN", config.api_key, True), + ("ANTHROPIC_BASE_URL", "https://api.anthropic.com", False), + ("ANTHROPIC_API_BASE", "https://api.anthropic.com", False), + ], + "cursor-cli": [ + ("CURSOR_API_KEY", config.api_key, True), + ], + } + + if config.agent not in agent_env_configs: + raise ValueError(f"Invalid agent: {config.agent}") + + # Set environment variables and build display/copyable lists + env_vars_display = [] + env_vars_copyable = [] + + for var_name, var_value, is_sensitive in agent_env_configs[config.agent]: + clean_env[var_name] = var_value + + # Build display string (truncate sensitive values) + if is_sensitive: + display_value = f"{var_value[:20]}..." + else: + display_value = var_value + env_vars_display.append(f"{var_name}={display_value}") + + # Build copyable string (use variable reference for sensitive values) + if is_sensitive: + copyable_value = f"${var_name}" + else: + copyable_value = var_value + env_vars_copyable.append(f"{var_name}={copyable_value}") + # Clear MiniMax settings if present clean_env.pop("MINIMAX_API_KEY", None) # Print Harbor command for debugging and manual execution shell_cmd = " ".join(shlex.quote(arg) for arg in cmd) - - # Prepare environment variable strings (truncate API key for security in display) - env_vars_display = [ - f"ANTHROPIC_API_KEY={config.api_key[:20]}...", # Truncated for display - f"ANTHROPIC_AUTH_TOKEN={config.api_key[:20]}...", - f"ANTHROPIC_BASE_URL={clean_env['ANTHROPIC_BASE_URL']}", - f"ANTHROPIC_API_BASE={clean_env['ANTHROPIC_API_BASE']}", - ] - - # Full command for copy/paste (use $ANTHROPIC_API_KEY to avoid exposing key) - env_vars_copyable = [ - "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY", - "ANTHROPIC_AUTH_TOKEN=$ANTHROPIC_API_KEY", - f"ANTHROPIC_BASE_URL={clean_env['ANTHROPIC_BASE_URL']}", - f"ANTHROPIC_API_BASE={clean_env['ANTHROPIC_API_BASE']}", - ] full_cmd_copyable = " ".join(env_vars_copyable) + " " + shell_cmd print(f"\n{'=' * 70}") From a32ba5d0c7cc946328cf0b8f78269366d1d14b00 Mon Sep 17 00:00:00 2001 From: Rahul Shetty Date: Thu, 29 Jan 2026 21:20:07 +0530 Subject: [PATCH 2/5] update agent model list in benchmark command Signed-off-by: Rahul Shetty --- src/agentready/cli/benchmark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/agentready/cli/benchmark.py b/src/agentready/cli/benchmark.py index c41f29ab..9b9cc132 100644 --- a/src/agentready/cli/benchmark.py +++ b/src/agentready/cli/benchmark.py @@ -7,7 +7,7 @@ import click -from ..services.eval_harness.harbor_config import HarborConfig +from ..services.eval_harness.harbor_config import ALLOWED_MODELS, HarborConfig from ..services.eval_harness.tbench_runner import _real_tbench_result from ..services.harbor.agent_toggler import AssessorStateToggler from ..services.harbor.comparer import compare_assessor_impact @@ -35,7 +35,7 @@ ) @click.option( "--model", - type=click.Choice(["claude-haiku-4-5", "claude-sonnet-4-5"]), + type=click.Choice(list(ALLOWED_MODELS)), default="claude-haiku-4-5", help="Model for evaluation", ) From 867ae6348ac912b655f4685987f9f0d20dc15518 Mon Sep 17 00:00:00 2001 From: Rahul Shetty Date: Thu, 5 Feb 2026 12:39:02 +0530 Subject: [PATCH 3/5] update tests for benchmark command Signed-off-by: Rahul Shetty --- src/agentready/cli/benchmark.py | 7 +- .../services/eval_harness/harbor_config.py | 3 + tests/unit/test_cli_benchmark.py | 81 +++++++++++++++---- tests/unit/test_harbor_config.py | 36 +++++++++ 4 files changed, 110 insertions(+), 17 deletions(-) diff --git a/src/agentready/cli/benchmark.py b/src/agentready/cli/benchmark.py index 9b9cc132..d964a747 100644 --- a/src/agentready/cli/benchmark.py +++ b/src/agentready/cli/benchmark.py @@ -36,7 +36,7 @@ @click.option( "--model", type=click.Choice(list(ALLOWED_MODELS)), - default="claude-haiku-4-5", + default="anthropic/claude-haiku-4-5", help="Model for evaluation", ) @click.option("--verbose", "-v", is_flag=True, help="Enable verbose output") @@ -148,9 +148,10 @@ def _run_tbench(repo_path, subset, agent, model, verbose, timeout, output_dir, s api_key = os.environ.get("CURSOR_API_KEY", "") if not api_key: + key_name = "ANTHROPIC_API_KEY" if agent == "claude-code" else "CURSOR_API_KEY" click.echo( - "Error: ANTHROPIC_API_KEY environment variable not set.\n" - "Set it with: export ANTHROPIC_API_KEY=your-key-here", + f"Error: {key_name} environment variable not set.\n" + f"Set it with: export {key_name}=your-key-here", err=True, ) raise click.Abort() diff --git a/src/agentready/services/eval_harness/harbor_config.py b/src/agentready/services/eval_harness/harbor_config.py index 4c422cd7..2753dd91 100644 --- a/src/agentready/services/eval_harness/harbor_config.py +++ b/src/agentready/services/eval_harness/harbor_config.py @@ -9,6 +9,8 @@ from typing import Optional # Allowed models (excludes opus due to cost) +# Anthropic models: https://platform.claude.com/docs/en/about-claude/models/overview +# Cursor models: https://cursor.com/docs/models ALLOWED_MODELS = { "anthropic/claude-haiku-4-5", "anthropic/claude-sonnet-4-5", @@ -24,6 +26,7 @@ } # Allowed agents (excludes oracle as it's not relevant for real-world assessment) +# Harbor supported agents: https://github.com/laude-institute/harbor/blob/main/src/harbor/agents/factory.py ALLOWED_AGENTS = { "claude-code", "cursor-cli", diff --git a/tests/unit/test_cli_benchmark.py b/tests/unit/test_cli_benchmark.py index 7797c5c2..55b89717 100644 --- a/tests/unit/test_cli_benchmark.py +++ b/tests/unit/test_cli_benchmark.py @@ -161,8 +161,8 @@ def test_benchmark_with_verbose_flag(self, mock_run, runner, temp_repo): ) assert result.exit_code == 0 - # Verbose flag passed to _run_tbench - _, _, _, verbose, _, _, _ = mock_run.call_args[0] + # Verbose flag passed to _run_tbench (repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight) + _, _, _, _, verbose, _, _, _ = mock_run.call_args[0] assert verbose is True @patch("agentready.cli.benchmark._run_tbench") @@ -174,7 +174,7 @@ def test_benchmark_with_custom_timeout(self, mock_run, runner, temp_repo): ) assert result.exit_code == 0 - _, _, _, _, timeout, _, _ = mock_run.call_args[0] + _, _, _, _, _, timeout, _, _ = mock_run.call_args[0] assert timeout == 7200 @patch("agentready.cli.benchmark._run_tbench") @@ -192,7 +192,7 @@ def test_benchmark_with_output_dir(self, mock_run, runner, temp_repo): ) assert result.exit_code == 0 - _, _, _, _, _, output_dir, _ = mock_run.call_args[0] + _, _, _, _, _, _, output_dir, _ = mock_run.call_args[0] assert output_dir == "/custom/output" @patch("agentready.cli.benchmark._run_tbench") @@ -204,7 +204,7 @@ def test_benchmark_skip_preflight(self, mock_run, runner, temp_repo): ) assert result.exit_code == 0 - _, _, _, _, _, _, skip_preflight = mock_run.call_args[0] + _, _, _, _, _, _, _, skip_preflight = mock_run.call_args[0] assert skip_preflight is True def test_benchmark_unknown_harness(self, runner, temp_repo): @@ -225,15 +225,62 @@ def test_benchmark_with_model_selection(self, mock_run, runner, temp_repo): [ str(temp_repo), "--model", - "claude-sonnet-4-5", + "anthropic/claude-sonnet-4-5", "--subset", "smoketest", ], ) assert result.exit_code == 0 - _, _, model, _, _, _, _ = mock_run.call_args[0] - assert model == "claude-sonnet-4-5" + _, _, _, model, _, _, _, _ = mock_run.call_args[0] + assert model == "anthropic/claude-sonnet-4-5" + + @patch.dict("os.environ", {}, clear=True) + def test_benchmark_cursor_cli_agent_requires_cursor_api_key( + self, runner, temp_repo + ): + """Test that cursor-cli agent requires CURSOR_API_KEY.""" + result = runner.invoke( + benchmark, + [ + str(temp_repo), + "--agent", + "cursor-cli", + "--model", + "cursor/sonnet-4.5", + "--subset", + "smoketest", + "--skip-preflight", + ], + ) + + assert result.exit_code != 0 + assert "CURSOR_API_KEY" in result.output + + @patch("agentready.cli.benchmark._run_tbench") + @patch.dict("os.environ", {"CURSOR_API_KEY": "test-cursor-key"}) + def test_benchmark_cursor_cli_with_valid_cursor_model( + self, mock_run, runner, temp_repo + ): + """Test cursor-cli works with cursor/ prefixed models.""" + result = runner.invoke( + benchmark, + [ + str(temp_repo), + "--agent", + "cursor-cli", + "--model", + "cursor/sonnet-4.5", + "--subset", + "smoketest", + ], + ) + + assert result.exit_code == 0 + mock_run.assert_called_once() + _, _, agent, model, _, _, _, _ = mock_run.call_args[0] + assert agent == "cursor-cli" + assert model == "cursor/sonnet-4.5" class TestRunTbench: @@ -253,7 +300,8 @@ def test_run_tbench_smoketest(self, mock_result, tmp_path, mock_tbench_result): _run_tbench( repo_path=repo_path, subset="smoketest", - model="claude-haiku-4-5", + agent="claude-code", + model="anthropic/claude-haiku-4-5", verbose=False, timeout=3600, output_dir=None, @@ -275,7 +323,8 @@ def test_run_tbench_full_subset(self, mock_result, tmp_path, mock_tbench_result) _run_tbench( repo_path=repo_path, subset="full", - model="claude-haiku-4-5", + agent="claude-code", + model="anthropic/claude-haiku-4-5", verbose=False, timeout=3600, output_dir=None, @@ -295,7 +344,8 @@ def test_run_tbench_invalid_subset(self, mock_abort, mock_echo, tmp_path): _run_tbench( repo_path=repo_path, subset="invalid", - model="claude-haiku-4-5", + agent="claude-code", + model="anthropic/claude-haiku-4-5", verbose=False, timeout=3600, output_dir=None, @@ -314,7 +364,8 @@ def test_run_tbench_missing_api_key(self, mock_abort, mock_echo, tmp_path): _run_tbench( repo_path=repo_path, subset="smoketest", - model="claude-haiku-4-5", + agent="claude-code", + model="anthropic/claude-haiku-4-5", verbose=False, timeout=3600, output_dir=None, @@ -335,7 +386,8 @@ def test_run_tbench_defaults_to_full( _run_tbench( repo_path=repo_path, subset=None, # Should default to 'full' - model="claude-haiku-4-5", + agent="claude-code", + model="anthropic/claude-haiku-4-5", verbose=False, timeout=3600, output_dir=None, @@ -361,7 +413,8 @@ def test_run_tbench_exception_handling(self, mock_echo, mock_result, tmp_path): _run_tbench( repo_path=repo_path, subset="smoketest", - model="claude-haiku-4-5", + agent="claude-code", + model="anthropic/claude-haiku-4-5", verbose=False, timeout=3600, output_dir=None, diff --git a/tests/unit/test_harbor_config.py b/tests/unit/test_harbor_config.py index 58f22f04..7f54d1c2 100644 --- a/tests/unit/test_harbor_config.py +++ b/tests/unit/test_harbor_config.py @@ -222,3 +222,39 @@ def test_allowed_models_is_set(self): def test_allowed_agents_is_set(self): """Test that ALLOWED_AGENTS is a set (not list)""" assert isinstance(ALLOWED_AGENTS, set) + + +class TestHarborConfigCursorModels: + """Test cursor/* model acceptance""" + + def test_harbor_config_cursor_models_accepted(self): + """Test that cursor/* models are accepted""" + config = HarborConfig( + model="cursor/sonnet-4.5", + agent="cursor-cli", + jobs_dir=Path("/tmp/test"), + api_key="test-key", + ) + assert config.model == "cursor/sonnet-4.5" + + config_gemini = HarborConfig( + model="cursor/gemini-3-pro", + agent="cursor-cli", + jobs_dir=Path("/tmp/test"), + api_key="test-key", + ) + assert config_gemini.model == "cursor/gemini-3-pro" + + +class TestHarborConfigCursorAgent: + """Test cursor-cli agent acceptance""" + + def test_harbor_config_cursor_agent_accepted(self): + """Test that cursor-cli agent is accepted""" + config = HarborConfig( + model="anthropic/claude-haiku-4-5", + agent="cursor-cli", + jobs_dir=Path("/tmp/test"), + api_key="test-key", + ) + assert config.agent == "cursor-cli" From b644a35232ac0ad53d98e8793e0cc02f2fc9a6d5 Mon Sep 17 00:00:00 2001 From: Rahul Shetty Date: Thu, 5 Feb 2026 12:47:04 +0530 Subject: [PATCH 4/5] format code Signed-off-by: Rahul Shetty --- src/agentready/assessors/documentation.py | 6 +-- src/agentready/assessors/testing.py | 6 +-- src/agentready/cli/benchmark.py | 23 ++++++++-- src/agentready/services/assessment_cache.py | 18 +++----- tests/e2e/test_critical_paths.py | 6 +-- tests/e2e/test_critical_paths_simplified.py | 6 +-- tests/unit/cli/test_main.py | 6 +-- tests/unit/test_assessors_code_quality.py | 48 +++++++-------------- tests/unit/test_assessors_containers.py | 24 ++++------- tests/unit/test_assessors_security.py | 24 ++++------- tests/unit/test_assessors_stub.py | 42 ++++++------------ 11 files changed, 82 insertions(+), 127 deletions(-) diff --git a/src/agentready/assessors/documentation.py b/src/agentready/assessors/documentation.py index 95e25e9d..4c4125c8 100644 --- a/src/agentready/assessors/documentation.py +++ b/src/agentready/assessors/documentation.py @@ -453,8 +453,7 @@ def _create_remediation(self) -> Remediation: ], tools=[], commands=[], - examples=[ - """# Project Name + examples=["""# Project Name ## Overview What this project does and why it exists. @@ -477,8 +476,7 @@ def _create_remediation(self) -> Remediation: # Format code black . ``` -""" - ], +"""], citations=[ Citation( source="GitHub", diff --git a/src/agentready/assessors/testing.py b/src/agentready/assessors/testing.py index 3ba3b2ba..09eb31b6 100644 --- a/src/agentready/assessors/testing.py +++ b/src/agentready/assessors/testing.py @@ -286,8 +286,7 @@ def _create_remediation(self) -> Remediation: "pre-commit install", "pre-commit run --all-files", ], - examples=[ - """# .pre-commit-config.yaml + examples=["""# .pre-commit-config.yaml repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 @@ -306,8 +305,7 @@ def _create_remediation(self) -> Remediation: rev: 5.12.0 hooks: - id: isort -""" - ], +"""], citations=[ Citation( source="pre-commit.com", diff --git a/src/agentready/cli/benchmark.py b/src/agentready/cli/benchmark.py index d964a747..09be4232 100644 --- a/src/agentready/cli/benchmark.py +++ b/src/agentready/cli/benchmark.py @@ -59,7 +59,15 @@ help="Skip dependency checks (for advanced users)", ) def benchmark( - repository, harness, subset, agent, model, verbose, timeout, output_dir, skip_preflight + repository, + harness, + subset, + agent, + model, + verbose, + timeout, + output_dir, + skip_preflight, ): """Run agent coding benchmarks. @@ -87,14 +95,23 @@ def benchmark( # Route to appropriate harness if harness == "tbench": _run_tbench( - repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight + repo_path, + subset, + agent, + model, + verbose, + timeout, + output_dir, + skip_preflight, ) else: click.echo(f"Unknown harness: {harness}", err=True) raise click.Abort() -def _run_tbench(repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight): +def _run_tbench( + repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight +): """Run Terminal-Bench evaluation.""" # Default subset to 'full' if not specified if subset is None: diff --git a/src/agentready/services/assessment_cache.py b/src/agentready/services/assessment_cache.py index 886787c3..d820e9e6 100644 --- a/src/agentready/services/assessment_cache.py +++ b/src/agentready/services/assessment_cache.py @@ -33,8 +33,7 @@ def _initialize_db(self) -> None: """Initialize database schema.""" try: with sqlite3.connect(self.db_path) as conn: - conn.execute( - """ + conn.execute(""" CREATE TABLE IF NOT EXISTS assessments ( id INTEGER PRIMARY KEY AUTOINCREMENT, repository_url TEXT NOT NULL, @@ -45,23 +44,18 @@ def _initialize_db(self) -> None: expires_at TIMESTAMP, UNIQUE(repository_url, commit_hash) ) - """ - ) + """) # Create index for faster queries - conn.execute( - """ + conn.execute(""" CREATE INDEX IF NOT EXISTS idx_repo_commit ON assessments(repository_url, commit_hash) - """ - ) + """) - conn.execute( - """ + conn.execute(""" CREATE INDEX IF NOT EXISTS idx_expires_at ON assessments(expires_at) - """ - ) + """) conn.commit() except sqlite3.Error as e: diff --git a/tests/e2e/test_critical_paths.py b/tests/e2e/test_critical_paths.py index ad49a76d..ae94e278 100644 --- a/tests/e2e/test_critical_paths.py +++ b/tests/e2e/test_critical_paths.py @@ -276,14 +276,12 @@ def test_assess_with_valid_config(self): with tempfile.TemporaryDirectory() as tmp_dir: # Create valid config file config_file = Path(tmp_dir) / "config.yaml" - config_file.write_text( - """ + config_file.write_text(""" weights: claude_md: 2.0 excluded_attributes: - repomix_config -""" - ) +""") output_dir = Path(tmp_dir) / "output" diff --git a/tests/e2e/test_critical_paths_simplified.py b/tests/e2e/test_critical_paths_simplified.py index c0cdca8d..3dced950 100644 --- a/tests/e2e/test_critical_paths_simplified.py +++ b/tests/e2e/test_critical_paths_simplified.py @@ -219,14 +219,12 @@ def test_valid_config_application(self, temp_output_dir): with tempfile.TemporaryDirectory() as tmp_dir: # Create valid config config_file = Path(tmp_dir) / "config.yaml" - config_file.write_text( - """ + config_file.write_text(""" weights: claude_md: 2.0 excluded_attributes: - repomix_config -""" - ) +""") # Run assessment with config result = helper.run_assessment( diff --git a/tests/unit/cli/test_main.py b/tests/unit/cli/test_main.py index 3398ee38..2ffb9160 100644 --- a/tests/unit/cli/test_main.py +++ b/tests/unit/cli/test_main.py @@ -355,14 +355,12 @@ class TestConfigLoading: def test_load_config_valid_yaml(self, tmp_path): """Test loading valid config file.""" config_file = tmp_path / "config.yaml" - config_file.write_text( - """ + config_file.write_text(""" weights: claude_md_file: 2.0 excluded_attributes: - test_attribute -""" - ) +""") config = load_config(config_file) diff --git a/tests/unit/test_assessors_code_quality.py b/tests/unit/test_assessors_code_quality.py index aec492bc..149e9efa 100644 --- a/tests/unit/test_assessors_code_quality.py +++ b/tests/unit/test_assessors_code_quality.py @@ -69,14 +69,12 @@ def test_python_pylint_configured(self, tmp_path): # Create .pylintrc pylintrc = tmp_path / ".pylintrc" - pylintrc.write_text( - """[MASTER] + pylintrc.write_text("""[MASTER] max-line-length=100 [MESSAGES CONTROL] disable=C0111 -""" - ) +""") repo = Repository( path=tmp_path, @@ -105,11 +103,9 @@ def test_python_ruff_configured(self, tmp_path): # Create ruff.toml ruff_toml = tmp_path / "ruff.toml" - ruff_toml.write_text( - """line-length = 100 + ruff_toml.write_text("""line-length = 100 select = ["E", "F", "W"] -""" - ) +""") repo = Repository( path=tmp_path, @@ -135,14 +131,12 @@ def test_python_pyproject_toml(self, tmp_path): # Create pyproject.toml with both tools pyproject = tmp_path / "pyproject.toml" - pyproject.write_text( - """[tool.pylint] + pyproject.write_text("""[tool.pylint] max-line-length = 100 [tool.ruff] line-length = 100 -""" - ) +""") repo = Repository( path=tmp_path, @@ -171,15 +165,13 @@ def test_javascript_eslint_configured(self, tmp_path): # Create .eslintrc.json eslintrc = tmp_path / ".eslintrc.json" - eslintrc.write_text( - """{ + eslintrc.write_text("""{ "extends": "eslint:recommended", "rules": { "no-console": "warn" } } -""" - ) +""") repo = Repository( path=tmp_path, @@ -234,14 +226,12 @@ def test_ruby_rubocop_configured(self, tmp_path): # Create .rubocop.yml rubocop = tmp_path / ".rubocop.yml" - rubocop.write_text( - """AllCops: + rubocop.write_text("""AllCops: TargetRubyVersion: 3.0 Style/StringLiterals: EnforcedStyle: double_quotes -""" - ) +""") repo = Repository( path=tmp_path, @@ -269,14 +259,12 @@ def test_go_golangci_lint_configured(self, tmp_path): # Create .golangci.yml golangci = tmp_path / ".golangci.yml" - golangci.write_text( - """linters: + golangci.write_text("""linters: enable: - gofmt - golint - govet -""" - ) +""") repo = Repository( path=tmp_path, @@ -311,14 +299,12 @@ def test_actionlint_in_precommit(self, tmp_path): # Create .pre-commit-config.yaml with actionlint precommit = tmp_path / ".pre-commit-config.yaml" - precommit.write_text( - """repos: + precommit.write_text("""repos: - repo: https://github.com/rhysd/actionlint rev: v1.6.0 hooks: - id: actionlint -""" - ) +""") repo = Repository( path=tmp_path, @@ -344,13 +330,11 @@ def test_markdownlint_configured(self, tmp_path): # Create .markdownlint.json markdownlint = tmp_path / ".markdownlint.json" - markdownlint.write_text( - """{ + markdownlint.write_text("""{ "default": true, "MD013": false } -""" - ) +""") repo = Repository( path=tmp_path, diff --git a/tests/unit/test_assessors_containers.py b/tests/unit/test_assessors_containers.py index 15af3d87..7b159c3a 100644 --- a/tests/unit/test_assessors_containers.py +++ b/tests/unit/test_assessors_containers.py @@ -98,8 +98,7 @@ def test_multi_stage_build(self, tmp_path): # Create multi-stage Dockerfile dockerfile = tmp_path / "Dockerfile" - dockerfile.write_text( - """FROM node:18 AS builder + dockerfile.write_text("""FROM node:18 AS builder WORKDIR /app COPY . . RUN npm ci && npm run build @@ -108,8 +107,7 @@ def test_multi_stage_build(self, tmp_path): WORKDIR /app COPY --from=builder /app/dist ./dist CMD ["node", "dist/index.js"] -""" - ) +""") repo = Repository( path=tmp_path, @@ -138,15 +136,13 @@ def test_docker_compose(self, tmp_path): # Create docker-compose.yml compose = tmp_path / "docker-compose.yml" - compose.write_text( - """version: '3.8' + compose.write_text("""version: '3.8' services: app: build: . ports: - "8000:8000" -""" - ) +""") repo = Repository( path=tmp_path, @@ -177,15 +173,13 @@ def test_dockerignore_file(self, tmp_path): # Create .dockerignore dockerignore = tmp_path / ".dockerignore" - dockerignore.write_text( - """.git + dockerignore.write_text(""".git .venv __pycache__ *.pyc .env node_modules -""" - ) +""") repo = Repository( path=tmp_path, @@ -239,14 +233,12 @@ def test_comprehensive_container_setup(self, tmp_path): subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True, check=True) # Multi-stage Dockerfile - (tmp_path / "Dockerfile").write_text( - """FROM python:3.12 AS builder + (tmp_path / "Dockerfile").write_text("""FROM python:3.12 AS builder RUN pip install build FROM python:3.12-slim COPY --from=builder /app /app -""" - ) +""") # docker-compose.yml (tmp_path / "docker-compose.yml").write_text( diff --git a/tests/unit/test_assessors_security.py b/tests/unit/test_assessors_security.py index 975bd7fa..2ced2f62 100644 --- a/tests/unit/test_assessors_security.py +++ b/tests/unit/test_assessors_security.py @@ -45,15 +45,13 @@ def test_dependabot_configured(self, tmp_path): github_dir = tmp_path / ".github" github_dir.mkdir() dependabot_file = github_dir / "dependabot.yml" - dependabot_file.write_text( - """version: 2 + dependabot_file.write_text("""version: 2 updates: - package-ecosystem: pip directory: / schedule: interval: weekly -""" - ) +""") repo = Repository( path=tmp_path, @@ -109,12 +107,10 @@ def test_python_security_tools(self, tmp_path): # Create pyproject.toml with security tools pyproject = tmp_path / "pyproject.toml" - pyproject.write_text( - """[tool.poetry.dev-dependencies] + pyproject.write_text("""[tool.poetry.dev-dependencies] pip-audit = "^2.0.0" bandit = "^1.7.0" -""" - ) +""") repo = Repository( path=tmp_path, @@ -143,14 +139,12 @@ def test_secret_detection(self, tmp_path): # Create .pre-commit-config.yaml with detect-secrets precommit = tmp_path / ".pre-commit-config.yaml" - precommit.write_text( - """repos: + precommit.write_text("""repos: - repo: https://github.com/Yelp/detect-secrets rev: v1.4.0 hooks: - id: detect-secrets -""" - ) +""") repo = Repository( path=tmp_path, @@ -261,8 +255,7 @@ def test_javascript_security_tools(self, tmp_path): # Create package.json with audit script package_json = tmp_path / "package.json" - package_json.write_text( - """{ + package_json.write_text("""{ "scripts": { "audit": "npm audit", "test": "jest" @@ -271,8 +264,7 @@ def test_javascript_security_tools(self, tmp_path): "snyk": "^1.0.0" } } -""" - ) +""") repo = Repository( path=tmp_path, diff --git a/tests/unit/test_assessors_stub.py b/tests/unit/test_assessors_stub.py index 7decdf1e..2a274e62 100644 --- a/tests/unit/test_assessors_stub.py +++ b/tests/unit/test_assessors_stub.py @@ -102,12 +102,10 @@ def test_requirements_txt_all_pinned(self, tmp_path): # Create requirements.txt with exact versions requirements = tmp_path / "requirements.txt" - requirements.write_text( - """requests==2.28.1 + requirements.write_text("""requests==2.28.1 flask==2.3.0 pytest==7.4.0 -""" - ) +""") repo = Repository( path=tmp_path, @@ -134,13 +132,11 @@ def test_requirements_txt_unpinned_dependencies(self, tmp_path): # Create requirements.txt with mix of pinned and unpinned requirements = tmp_path / "requirements.txt" - requirements.write_text( - """requests==2.28.1 + requirements.write_text("""requests==2.28.1 flask>=2.0.0 pytest~=7.0 numpy -""" - ) +""") repo = Repository( path=tmp_path, @@ -296,8 +292,7 @@ def test_python_patterns(self, tmp_path): # Create .gitignore with Python patterns gitignore = tmp_path / ".gitignore" - gitignore.write_text( - """# Python + gitignore.write_text("""# Python __pycache__/ *.py[cod] *.egg-info/ @@ -311,8 +306,7 @@ def test_python_patterns(self, tmp_path): .vscode/ .idea/ *.swp -""" - ) +""") repo = Repository( path=tmp_path, @@ -340,8 +334,7 @@ def test_javascript_patterns(self, tmp_path): # Create .gitignore with JavaScript patterns gitignore = tmp_path / ".gitignore" - gitignore.write_text( - """# JavaScript + gitignore.write_text("""# JavaScript node_modules/ dist/ build/ @@ -351,8 +344,7 @@ def test_javascript_patterns(self, tmp_path): # General .DS_Store .vscode/ -""" - ) +""") repo = Repository( path=tmp_path, @@ -380,12 +372,10 @@ def test_missing_patterns(self, tmp_path): # Create .gitignore with only general patterns gitignore = tmp_path / ".gitignore" - gitignore.write_text( - """# General only + gitignore.write_text("""# General only .DS_Store .vscode/ -""" - ) +""") repo = Repository( path=tmp_path, @@ -414,8 +404,7 @@ def test_multi_language_patterns(self, tmp_path): # Create .gitignore with Python and JavaScript patterns gitignore = tmp_path / ".gitignore" - gitignore.write_text( - """# Python + gitignore.write_text("""# Python __pycache__/ *.py[cod] *.egg-info/ @@ -435,8 +424,7 @@ def test_multi_language_patterns(self, tmp_path): .DS_Store .vscode/ .idea/ -""" - ) +""") repo = Repository( path=tmp_path, @@ -463,13 +451,11 @@ def test_pattern_with_trailing_slash(self, tmp_path): # Create .gitignore with mixed slash usage gitignore = tmp_path / ".gitignore" - gitignore.write_text( - """__pycache__ + gitignore.write_text("""__pycache__ venv .venv/ .DS_Store -""" - ) +""") repo = Repository( path=tmp_path, From 3c872d85106ccdac8bf0228b6aa91d6d247212b7 Mon Sep 17 00:00:00 2001 From: Rahul Shetty Date: Thu, 5 Feb 2026 13:37:33 +0530 Subject: [PATCH 5/5] remove sensitive env vars from printing Signed-off-by: Rahul Shetty --- src/agentready/services/eval_harness/tbench_runner.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/agentready/services/eval_harness/tbench_runner.py b/src/agentready/services/eval_harness/tbench_runner.py index 0e31f781..429368e8 100644 --- a/src/agentready/services/eval_harness/tbench_runner.py +++ b/src/agentready/services/eval_harness/tbench_runner.py @@ -151,9 +151,7 @@ def _real_tbench_result(repo_path: Path, config: HarborConfig) -> TbenchResult: clean_env[var_name] = var_value # Build display string (truncate sensitive values) - if is_sensitive: - display_value = f"{var_value[:20]}..." - else: + if not is_sensitive: display_value = var_value env_vars_display.append(f"{var_name}={display_value}")