diff --git a/benchmarks/followup-bench/runner.py b/benchmarks/followup-bench/runner.py
index e3558d51..d942ea6c 100644
--- a/benchmarks/followup-bench/runner.py
+++ b/benchmarks/followup-bench/runner.py
@@ -28,7 +28,7 @@
 from benchflow._acp_run import connect_acp, execute_prompts
 from benchflow._agent_setup import install_agent
 from benchflow._scene import Role, Scene
-from benchflow.agents.registry import AGENTS, AGENT_LAUNCH
+from benchflow.agents.registry import AGENT_LAUNCH, AGENTS
 from benchflow.runtime import Environment
 
 logger = logging.getLogger(__name__)
diff --git a/docs/notebooks/nanofirm-task/tests/evaluate.py b/docs/notebooks/nanofirm-task/tests/evaluate.py
index 2f4fd75f..254f70d3 100644
--- a/docs/notebooks/nanofirm-task/tests/evaluate.py
+++ b/docs/notebooks/nanofirm-task/tests/evaluate.py
@@ -1,6 +1,5 @@
 """Evaluate contract risk analysis quality."""
 import json
-import sys
 
 ANALYSIS_PATH = "/app/analysis.json"
 REWARD_PATH = "/logs/verifier/reward.txt"
diff --git a/docs/notebooks/scene-patterns.ipynb b/docs/notebooks/scene-patterns.ipynb
index 9f1400c9..488869f2 100644
--- a/docs/notebooks/scene-patterns.ipynb
+++ b/docs/notebooks/scene-patterns.ipynb
@@ -6,16 +6,16 @@
    "source": [
     "# BenchFlow Scene Patterns\n",
     "\n",
-    "Three evaluation patterns \u2014 baseline, self-review (multi-turn), coder-reviewer (multi-round) \u2014 run end-to-end with `bf.run()`.\n",
+    "Three evaluation patterns — baseline, self-review (multi-turn), coder-reviewer (multi-round) — run end-to-end with `bf.run()`.\n",
     "\n",
     "| Term | Definition | Example |\n",
     "|------|-----------|--------|\n",
     "| **Turn** | One prompt in one ACP session | Coder writes code |\n",
-    "| **Multi-turn** | Same role, multiple turns | Self-review: agent \u2192 agent |\n",
-    "| **Round** | One A\u2192B exchange between roles | Coder \u2192 Reviewer |\n",
-    "| **Multi-round** | Different roles exchanging turns | Coder \u2192 Reviewer \u2192 Coder |\n",
+    "| **Multi-turn** | Same role, multiple turns | Self-review: agent → agent |\n",
+    "| **Round** | One A→B exchange between roles | Coder → Reviewer |\n",
+    "| **Multi-round** | Different roles exchanging turns | Coder → Reviewer → Coder |\n",
     "| **Scene** | Interaction region with roles + turns | A code-review scene |\n",
-    "| **Trial** | Sequence of scenes in a shared sandbox | Skill-gen \u2192 Solve |\n",
+    "| **Trial** | Sequence of scenes in a shared sandbox | Skill-gen → Solve |\n",
     "\n",
     "**Prerequisites:**\n",
     "- `pip install benchflow`\n",
@@ -30,15 +30,14 @@
    "metadata": {},
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "Task: .ref/terminal-bench-2/regex-log  Env: daytona  Agent: gemini  Model: gemini-3.1-flash-lite-preview\n"
      ]
     }
    ],
    "source": [
-    "import asyncio\n",
     "import logging\n",
     "from pathlib import Path\n",
     "\n",
@@ -71,8 +70,8 @@
    "metadata": {},
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "Reward:     1.0\n",
       "Tool calls: 3\n",
@@ -102,7 +101,7 @@
    "source": [
     "## Pattern 2: Multi-Turn Self-Review\n",
     "\n",
-    "Same agent gets a second turn to re-examine its own work. This is **multi-turn** \u2014 one role, multiple prompts.\n",
+    "Same agent gets a second turn to re-examine its own work. This is **multi-turn** — one role, multiple prompts.\n",
     "\n",
     "Use when: a second pass catches what the first missed. Cost: 2x baseline."
    ]
@@ -113,8 +112,8 @@
    "metadata": {},
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "Reward:     1.0\n",
       "Tool calls: 7\n",
@@ -152,7 +151,7 @@
    "source": [
     "## Pattern 3: Coder + Reviewer (Multi-Round)\n",
     "\n",
-    "Two roles \u2014 coder and reviewer \u2014 in a shared sandbox. This is **multi-round** \u2014 different roles exchanging turns.\n",
+    "Two roles — coder and reviewer — in a shared sandbox. This is **multi-round** — different roles exchanging turns.\n",
     "\n",
     "Communication uses the **outbox convention**:\n",
     "1. Reviewer writes feedback to `/app/.outbox/coder.json` as `{\"to\": \"coder\", \"content\": \"...\"}`\n",
@@ -168,8 +167,8 @@
    "metadata": {},
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "Reward:     0.0\n",
       "Tool calls: 13\n",
@@ -195,7 +194,7 @@
     "                 '{\"to\": \"coder\", \"content\": \"Your specific feedback here.\"}'),\n",
     "            Turn(\"coder\",\n",
     "                 \"Read the reviewer's feedback and fix the issues. \"\n",
-    "                 \"Focus only on what was flagged \u2014 don't start over.\"),\n",
+    "                 \"Focus only on what was flagged — don't start over.\"),\n",
     "        ],\n",
     "    )],\n",
     "    environment=ENV,\n",
@@ -223,14 +222,14 @@
    "metadata": {},
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "Pattern                Reward  Tools Error\n",
       "------------------------------------------------------------\n",
-      "baseline                  1.0      3 \u2014\n",
-      "self_review               1.0      7 \u2014\n",
-      "coder_reviewer            0.0     13 \u2014\n"
+      "baseline                  1.0      3 —\n",
+      "self_review               1.0      7 —\n",
+      "coder_reviewer            0.0     13 —\n"
      ]
     }
    ],
@@ -238,11 +237,11 @@
     "print(f\"{'Pattern':<20} {'Reward':>8} {'Tools':>6} {'Error'}\")\n",
     "print(\"-\" * 60)\n",
     "for name, r in results.items():\n",
-    "    reward = (r.rewards or {}).get(\"reward\", \"\u2014\")\n",
-    "    err = r.error or \"\u2014\"\n",
+    "    reward = (r.rewards or {}).get(\"reward\", \"—\")\n",
+    "    err = r.error or \"—\"\n",
     "    if len(err) > 30:\n",
     "        err = err[:27] + \"...\"\n",
-    "    print(f\"{name:<20} {str(reward):>8} {r.n_tool_calls:>6} {err}\")"
+    "    print(f\"{name:<20} {reward!s:>8} {r.n_tool_calls:>6} {err}\")"
    ]
   },
   {
@@ -255,12 +254,12 @@
     "\n",
     "| Harbor (PR #1462) | BenchFlow 0.3 | Status |\n",
     "|-------------------|---------------|--------|\n",
-    "| `BaseUser` | `Role` \u2014 any role can be a user, reviewer, or custom agent | **Shipped** |\n",
-    "| `User.run() \u2192 str` | `Turn` with a prompt \u2014 each turn sends one prompt to one role | **Shipped** |\n",
+    "| `BaseUser` | `Role` — any role can be a user, reviewer, or custom agent | **Shipped** |\n",
+    "| `User.run() → str` | `Turn` with a prompt — each turn sends one prompt to one role | **Shipped** |\n",
     "| Per-round message passing | Outbox files + scheduler injection into next role's prompt | **Shipped** |\n",
     "| Per-round archiving | `scene_messages.jsonl` in trial directory | **Shipped** |\n",
     "| `--user` CLI flag | YAML scene config (`scenes:` key in trial config) | **Shipped** |\n",
-    "| `User.run() \u2192 None` (stop) | Fixed turn count only \u2014 no dynamic termination | **Gap** |\n",
+    "| `User.run() → None` (stop) | Fixed turn count only — no dynamic termination | **Gap** |\n",
     "| Oracle access (`/solution`) | Not available to user roles during setup | **Gap** |\n",
     "| Inter-round verification | `verify()` runs once after all scenes | **Gap** |\n",
     "| User inspects trajectory | User role cannot read prior agent trajectory between rounds | **Gap** |\n",
@@ -270,7 +269,7 @@
     "\n",
     "**What it does NOT deliver yet:** Dynamic termination (user decides when to stop), oracle access\n",
     "for user roles, per-round verification, or inter-round trajectory inspection. These require\n",
-    "extending the Scene scheduler with callbacks \u2014 tracked for 0.4."
+    "extending the Scene scheduler with callbacks — tracked for 0.4."
    ]
   }
  ],
diff --git a/docs/notebooks/scene-patterns.py b/docs/notebooks/scene-patterns.py
index a95369b8..961bd09a 100644
--- a/docs/notebooks/scene-patterns.py
+++ b/docs/notebooks/scene-patterns.py
@@ -18,9 +18,6 @@
 """
 
 import os
-import sys
-import json
-from pathlib import Path
 
 # ── The contract (constructed here, no external files) ──────────────
 
diff --git a/experiments/reviewer_ablation.py b/experiments/reviewer_ablation.py
index 5acaecab..8ab0caec 100644
--- a/experiments/reviewer_ablation.py
+++ b/experiments/reviewer_ablation.py
@@ -17,25 +17,26 @@
 import os
 import sys
 import time
-from datetime import datetime
 from pathlib import Path
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 
 sys.path.insert(0, str(Path(__file__).resolve().parents[0].parent / "src"))
 
+import contextlib
+
+from harbor.models.task.task import Task
+from harbor.models.trial.paths import TrialPaths
+from harbor.verifier.verifier import Verifier
+
 from benchflow._acp_run import connect_acp, execute_prompts
 from benchflow._agent_env import resolve_agent_env
 from benchflow._agent_setup import install_agent
 from benchflow._credentials import upload_subscription_auth, write_credential_files
 from benchflow._env_setup import _create_environment
 from benchflow._sandbox import setup_sandbox_user
-from benchflow._scene import Role, Scene
-from benchflow.agents.registry import AGENTS, AGENT_LAUNCH
+from benchflow.agents.registry import AGENT_LAUNCH, AGENTS
 from benchflow.sdk import SDK
-from harbor.models.task.task import Task
-from harbor.verifier.verifier import Verifier
-from harbor.models.trial.paths import TrialPaths
 
 logger = logging.getLogger(__name__)
 
@@ -141,10 +142,8 @@ async def _run_acp(env, prompt: str, trial_dir: Path, timeout: int = 600) -> tup
     try:
         _, n_tools = await execute_prompts(acp_client, session, [prompt], timeout=timeout)
     finally:
-        try:
+        with contextlib.suppress(Exception):
             await acp_client.close()
-        except Exception:
-            pass
     return n_tools, int(time.time() - t0)
 
 
@@ -204,7 +203,7 @@ async def run_reviewer(task_path: Path, task_name: str, condition: str) -> dict:
         total_tools += n_tools
 
         # Read coder's outbox
-        outbox_result = await env.exec("cat /app/.outbox/reviewer.json 2>/dev/null || echo '{}'")
+        await env.exec("cat /app/.outbox/reviewer.json 2>/dev/null || echo '{}'")
         await env.exec("rm -rf /app/.outbox/*")
 
         # Phase 2: Reviewer
diff --git a/experiments/validate_multi_scene_lifecycle.py b/experiments/validate_multi_scene_lifecycle.py
index e55eea26..4e6bf877 100644
--- a/experiments/validate_multi_scene_lifecycle.py
+++ b/experiments/validate_multi_scene_lifecycle.py
@@ -162,7 +162,7 @@ async def main():
     r3 = await test_followup_bench()
     results["followup"] = r3.rewards
 
-    logger.info(f"\n=== DOGFOOD RESULTS ===")
+    logger.info("\n=== DOGFOOD RESULTS ===")
     for name, rewards in results.items():
         reward = (rewards or {}).get("reward", "N/A")
         logger.info(f"  {name}: reward={reward}")
diff --git a/labs/benchjack-sandbox-hardening/pattern1_conftest_hook/tests/test_outputs.py b/labs/benchjack-sandbox-hardening/pattern1_conftest_hook/tests/test_outputs.py
index 1c85417f..df887aa9 100644
--- a/labs/benchjack-sandbox-hardening/pattern1_conftest_hook/tests/test_outputs.py
+++ b/labs/benchjack-sandbox-hardening/pattern1_conftest_hook/tests/test_outputs.py
@@ -6,7 +6,7 @@ def test_arithmetic_is_broken():
 
 
 def test_false_is_true():
-    assert False
+    raise AssertionError()
 
 
 def test_pi_is_rational():
diff --git a/labs/benchjack-sandbox-hardening/pattern7_pth_injection/tests/test_outputs.py b/labs/benchjack-sandbox-hardening/pattern7_pth_injection/tests/test_outputs.py
index 6debbc31..a4742c25 100644
--- a/labs/benchjack-sandbox-hardening/pattern7_pth_injection/tests/test_outputs.py
+++ b/labs/benchjack-sandbox-hardening/pattern7_pth_injection/tests/test_outputs.py
@@ -3,7 +3,7 @@ def test_arithmetic_is_broken():
 
 
 def test_false_is_true():
-    assert False
+    raise AssertionError()
 
 
 def test_pi_is_rational():
diff --git a/labs/benchjack-sandbox-hardening/run_comparison.py b/labs/benchjack-sandbox-hardening/run_comparison.py
index e01b1df8..04963bb8 100755
--- a/labs/benchjack-sandbox-hardening/run_comparison.py
+++ b/labs/benchjack-sandbox-hardening/run_comparison.py
@@ -136,7 +136,7 @@ def main() -> int:
         return 1
 
     n = len(PATTERNS)
-    total_steps = 2 + 2 * n  # 2 venvs + 2 versions × n patterns
+    total_steps = 2 + 2 * n  # 2 venvs + 2 versions x n patterns
 
     print(f"[1/{total_steps}] venv: benchflow==0.2.0 (PyPI)")
     _create_venv(VENVS_DIR / "bf-0.2.0", ["benchflow==0.2.0"])
diff --git a/labs/reward-hack-matrix/exploits/conftest_payload.py b/labs/reward-hack-matrix/exploits/conftest_payload.py
index 30fe23f7..7397066a 100644
--- a/labs/reward-hack-matrix/exploits/conftest_payload.py
+++ b/labs/reward-hack-matrix/exploits/conftest_payload.py
@@ -16,7 +16,7 @@
 terminal-bench-2 if its verifier ever touches conftest.py).
 """
 
-import pytest  # noqa: F401  — pytest must see this as a real plugin module
+import pytest
 
 
 def pytest_collection_modifyitems(config, items):
diff --git a/labs/reward-hack-matrix/run_matrix.py b/labs/reward-hack-matrix/run_matrix.py
index d5dfc439..53a86b04 100644
--- a/labs/reward-hack-matrix/run_matrix.py
+++ b/labs/reward-hack-matrix/run_matrix.py
@@ -30,6 +30,7 @@
 
 import argparse
 import asyncio
+import contextlib
 import json
 import os
 import re
@@ -447,13 +448,11 @@ async def shutdown(self) -> None:
         if self.proc is None:
             return
         if self.proc.stdin and not self.proc.stdin.is_closing():
-            try:
+            with contextlib.suppress(OSError, BrokenPipeError):
                 self.proc.stdin.close()
-            except (OSError, BrokenPipeError):
-                pass
         try:
             await asyncio.wait_for(self.proc.wait(), timeout=30)
-        except asyncio.TimeoutError:
+        except TimeoutError:
             self.proc.kill()
             await self.proc.wait()
         if self.reader_task:
@@ -717,7 +716,7 @@ def main() -> int:
     if args.sweep:
         summary_path = Path(args.summary_path) if args.summary_path else (JOBS_DIR / "matrix_sweep.json")
         print(
-            f"[sweep] {len(cells)} cells × {len(VERSIONS)} versions = "
+            f"[sweep] {len(cells)} cells x {len(VERSIONS)} versions = "
             f"{len(cells) * len(VERSIONS)} trials, concurrency={args.concurrency}"
         )
         results = asyncio.run(
diff --git a/pyproject.toml b/pyproject.toml
index 0b032ab2..306e71c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -96,6 +96,14 @@ ignore = [
     "RUF022", # __all__ unsorted — grouped by section for agent-friendliness
 ]
 
+[tool.ruff.lint.per-file-ignores]
+# Standalone scripts — sys.path manipulation before imports is intentional
+"experiments/*.py" = ["E402"]
+"tests/conformance/*.py" = ["E402"]
+# Forward references resolved via __future__ annotations — ruff flags them
+# but they work at runtime; explicit TYPE_CHECKING imports would force eager loads.
+"src/benchflow/runtime.py" = ["F821"]
+
 [tool.ty.environment]
 python-version = "3.12"
 
diff --git a/src/benchflow/__init__.py b/src/benchflow/__init__.py
index 809292d7..35388b58 100644
--- a/src/benchflow/__init__.py
+++ b/src/benchflow/__init__.py
@@ -19,13 +19,14 @@
     ExecResult,
     Task,
     TaskConfig,
-    Trial,
     Verifier,
     VerifierResult,
 )
 
 # benchflow's additions
 from benchflow._env_setup import stage_dockerfile_deps
+from benchflow._scene import MailboxTransport, Message, MessageTransport, Role, Scene
+from benchflow._snapshot import list_snapshots, restore, snapshot
 from benchflow.acp.client import ACPClient
 from benchflow.acp.session import ACPSession
 from benchflow.agents.registry import (
@@ -53,16 +54,15 @@
     RuntimeResult,
     run,  # bf.run(agent, env) — the primary 0.3 API
 )
-from benchflow._scene import MailboxTransport, Message, MessageTransport, Role, Scene
-from benchflow._snapshot import list_snapshots, restore, snapshot
 from benchflow.sdk import SDK
-from benchflow.trial import Trial, TrialConfig
-from benchflow.trial import Role as TrialRole, Scene as TrialScene, Turn
-from benchflow.trial_yaml import trial_config_from_yaml
 from benchflow.skills import SkillInfo, discover_skills, install_skill, parse_skill
 from benchflow.trajectories.otel import OTelCollector
 from benchflow.trajectories.proxy import TrajectoryProxy
 from benchflow.trajectories.types import Trajectory
+from benchflow.trial import Role as TrialRole
+from benchflow.trial import Scene as TrialScene
+from benchflow.trial import Trial, TrialConfig, Turn
+from benchflow.trial_yaml import trial_config_from_yaml
 
 # Public API surface. Anything not in this list is implementation detail and
 # may change without notice. Names are grouped by source module to match the
@@ -123,6 +123,7 @@
     "TrialRole",
     "TrialScene",
     "Turn",
+    "trial_config_from_yaml",
     # SDK (backwards compat)
     "SDK",
     # Environments / dep staging
diff --git a/src/benchflow/_acp_run.py b/src/benchflow/_acp_run.py
index 8b64288c..56020721 100644
--- a/src/benchflow/_acp_run.py
+++ b/src/benchflow/_acp_run.py
@@ -18,6 +18,7 @@
 """
 
 import asyncio
+import contextlib
 import logging
 from pathlib import Path
 
@@ -66,7 +67,6 @@ async def connect_acp(
         agent_launch = build_priv_drop_cmd(agent_launch, sandbox_user)
         logger.info(f"Agent sandboxed as: {sandbox_user}")
 
-    last_err: Exception | None = None
     acp_client: ACPClient | None = None
     for attempt in range(_ACP_CONNECT_MAX_RETRIES + 1):
         if attempt > 0:
@@ -106,12 +106,9 @@ async def connect_acp(
         except ConnectionError as e:
             # Close the failed client before retrying
             if acp_client:
-                try:
+                with contextlib.suppress(Exception):
                     await acp_client.close()
-                except Exception:
-                    pass
                 acp_client = None
-            last_err = e
             if attempt == _ACP_CONNECT_MAX_RETRIES:
                 raise
             logger.warning(f"ACP connect failed (attempt {attempt + 1}): {e}")
@@ -119,10 +116,8 @@ async def connect_acp(
         except Exception:
             # Non-retryable error — close client to prevent leak
             if acp_client:
-                try:
+                with contextlib.suppress(Exception):
                     await acp_client.close()
-                except Exception:
-                    pass
             raise
 
     if model:
diff --git a/src/benchflow/acp/client.py b/src/benchflow/acp/client.py
index abd4743a..57a55eb9 100644
--- a/src/benchflow/acp/client.py
+++ b/src/benchflow/acp/client.py
@@ -1,6 +1,5 @@
 """ACP client — benchflow acts as the client, agents are ACP servers."""
 
-import json
 import logging
 from typing import Any
 
diff --git a/src/benchflow/cli/eval.py b/src/benchflow/cli/eval.py
index 58b410fc..17cea1dd 100644
--- a/src/benchflow/cli/eval.py
+++ b/src/benchflow/cli/eval.py
@@ -31,7 +31,8 @@
 from rich.console import Console
 from rich.table import Table
 
-from benchflow.job import DEFAULT_AGENT, effective_model as _effective_model
+from benchflow.job import DEFAULT_AGENT
+from benchflow.job import effective_model as _effective_model
 
 console = Console()
 
diff --git a/src/benchflow/cli/main.py b/src/benchflow/cli/main.py
index 96611bbd..e9acc235 100644
--- a/src/benchflow/cli/main.py
+++ b/src/benchflow/cli/main.py
@@ -433,9 +433,9 @@ def skills_eval(
         typer.Argument(help="Path to skill directory containing evals/evals.json"),
     ],
     agent: Annotated[
-        list[str],
+        list[str] | None,
         typer.Option("--agent", "-a", help="Agent(s) to evaluate (repeatable)"),
-    ] = ["claude-agent-acp"],
+    ] = None,
     model: Annotated[
         list[str] | None,
         typer.Option("--model", "-m", help="Model(s) (matched 1:1 with agents)"),
@@ -473,6 +473,8 @@ def skills_eval(
     """
     from benchflow.skill_eval import SkillEvaluator, export_gepa_traces
 
+    if agent is None:
+        agent = ["claude-agent-acp"]
     if not (skill_dir / "evals" / "evals.json").exists():
         console.print(
             f"[red]No evals/evals.json found in {skill_dir}[/red]\n"
@@ -757,7 +759,7 @@ def eval_create(
         eff_model = effective_model(agent, model)
         # Smart detection: if tasks_dir has task.toml, it's a single task
         if (tasks_dir / "task.toml").exists():
-            from benchflow.trial import Trial, TrialConfig, Scene
+            from benchflow.trial import Scene, Trial, TrialConfig
 
             config = TrialConfig(
                 task_path=tasks_dir,
diff --git a/src/benchflow/job.py b/src/benchflow/job.py
index 5429add2..70dfb560 100644
--- a/src/benchflow/job.py
+++ b/src/benchflow/job.py
@@ -126,9 +126,7 @@ def should_retry(self, error: str | None) -> bool:
             return True
         if self.retry_on_pipe and category == PIPE_CLOSED:
             return True
-        if self.retry_on_acp and category == ACP_ERROR:
-            return True
-        return False
+        return bool(self.retry_on_acp and category == ACP_ERROR)
 
     def backoff_delay(self, attempt: int) -> float:
         """Exponential backoff delay for retry attempt."""
diff --git a/src/benchflow/mcp/reviewer_server.py b/src/benchflow/mcp/reviewer_server.py
index 9c2dedcc..210746b9 100644
--- a/src/benchflow/mcp/reviewer_server.py
+++ b/src/benchflow/mcp/reviewer_server.py
@@ -24,8 +24,6 @@
 import json
 import logging
 import os
-import subprocess
-import sys
 from pathlib import Path
 
 logger = logging.getLogger(__name__)
@@ -52,11 +50,11 @@ def create_reviewer_server(
     """
     try:
         from fastmcp import FastMCP
-    except ImportError:
+    except ImportError as e:
         raise ImportError(
             "fastmcp required for MCP reviewer server. "
             "Install with: pip install fastmcp"
-        )
+        ) from e
 
     mcp = FastMCP("benchflow-reviewer")
     prompt = review_prompt or os.environ.get("REVIEWER_PROMPT", DEFAULT_REVIEW_PROMPT)
diff --git a/src/benchflow/process.py b/src/benchflow/process.py
index 4a0e4be2..197daa1c 100644
--- a/src/benchflow/process.py
+++ b/src/benchflow/process.py
@@ -313,7 +313,7 @@ async def start(
             # Use the full compose base command (with -p, --project-directory,
             # and -f flags) so that exec can find the running project.
             if self._compose_cmd_base:
-                inner_parts = shlex.split(self._compose_cmd_base) + ["exec", "-i", "-T"]
+                inner_parts = [*shlex.split(self._compose_cmd_base), "exec", "-i", "-T"]
             else:
                 inner_parts = ["docker", "compose", "exec", "-i", "-T"]
             if cwd:
@@ -453,7 +453,7 @@ async def start(
         logger.info(f"DaytonaPtyProcess: PTY connected (session={session_id})")
 
         compose_parts = shlex.split(self._compose_cmd_base) if self._compose_cmd_base else ["docker", "compose"]
-        exec_parts = compose_parts + ["exec", "-i", "-T"]
+        exec_parts = [*compose_parts, "exec", "-i", "-T"]
         if cwd:
             exec_parts.extend(["-w", cwd])
         # Write env vars to a file inside the container (not visible in ps aux),
@@ -475,7 +475,7 @@ async def start(
         marker = f"__BENCHFLOW_ACP_{session_id}__"
         setup = f"stty -echo 2>/dev/null; echo '{marker}'; exec {exec_cmd}\n"
         await self._pty.send_input(setup)
-        logger.info(f"DaytonaPtyProcess: sent setup, waiting for marker...")
+        logger.info("DaytonaPtyProcess: sent setup, waiting for marker...")
 
         while True:
             try:
@@ -484,10 +484,10 @@ async def start(
                 logger.debug(f"DaytonaPtyProcess drain: {decoded[:120]}")
                 if marker in decoded:
                     break
-            except TimeoutError:
-                raise ConnectionError("DaytonaPtyProcess: timeout waiting for agent start marker")
+            except TimeoutError as e:
+                raise ConnectionError("DaytonaPtyProcess: timeout waiting for agent start marker") from e
 
-        logger.info(f"DaytonaPtyProcess: marker seen, agent starting")
+        logger.info("DaytonaPtyProcess: marker seen, agent starting")
 
     async def readline(self) -> bytes:
         if self._closed:
@@ -495,10 +495,10 @@ async def readline(self) -> bytes:
         try:
             line = await asyncio.wait_for(self._line_buffer.get(), timeout=300)
             return line
-        except TimeoutError:
-            raise ConnectionError("PTY readline timeout (300s)")
+        except TimeoutError as e:
+            raise ConnectionError("PTY readline timeout (300s)") from e
         except Exception as e:
-            raise ConnectionError(f"PTY readline error: {e}")
+            raise ConnectionError(f"PTY readline error: {e}") from e
 
     async def writeline(self, data: str) -> None:
         if not self._pty or self._closed:
@@ -508,14 +508,10 @@ async def writeline(self, data: str) -> None:
     async def close(self) -> None:
         self._closed = True
         if self._pty:
-            try:
+            with contextlib.suppress(Exception):
                 await self._pty.kill()
-            except Exception:
-                pass
-            try:
+            with contextlib.suppress(Exception):
                 await self._pty.disconnect()
-            except Exception:
-                pass
             logger.info("DaytonaPtyProcess terminated")
 
     @property
diff --git a/src/benchflow/runtime.py b/src/benchflow/runtime.py
index 1e55f0c7..14a785ea 100644
--- a/src/benchflow/runtime.py
+++ b/src/benchflow/runtime.py
@@ -291,13 +291,13 @@ async def execute(self) -> RuntimeResult:
 
 
 async def run(
-    subject: "Agent | TrialConfig | str",
-    env: "Environment | str | None" = None,
+    subject: Agent | TrialConfig | str,
+    env: Environment | str | None = None,
     config: RuntimeConfig | None = None,
     *,
-    task_path: "str | Path | None" = None,
+    task_path: str | Path | None = None,
     model: str | None = None,
-) -> "RuntimeResult | RunResult":
+) -> RuntimeResult | RunResult:
     """Primary user-facing API — multiple calling conventions.
 
     Usage::
@@ -313,7 +313,6 @@ async def run(
         # 3. Agent name string (simplest)
         result = await bf.run("gemini", task_path="tasks/X")
     """
-    from benchflow.models import RunResult
     from benchflow.trial import Scene, Trial, TrialConfig
 
     if isinstance(subject, TrialConfig):
diff --git a/src/benchflow/sdk.py b/src/benchflow/sdk.py
index bd10a2d0..28fa82d4 100644
--- a/src/benchflow/sdk.py
+++ b/src/benchflow/sdk.py
@@ -98,33 +98,12 @@
 from harbor.models.trial.paths import TrialPaths
 from harbor.verifier.verifier import Verifier
 
-from benchflow._acp_run import connect_acp, execute_prompts
-from benchflow._agent_env import resolve_agent_env
-from benchflow._agent_setup import deploy_skills, install_agent
-from benchflow._credentials import (
-    upload_subscription_auth,
-    write_credential_files,
-)
 from benchflow._env_setup import (
-    _create_environment,
-    _inject_skills_into_dockerfile,
     _patch_harbor_dind,
-    stage_dockerfile_deps,
 )
 from benchflow._sandbox import (
-    _resolve_locked_paths,
-    _seed_verifier_workspace,
-    _snapshot_build_config,
     harden_before_verify,
-    lockdown_paths,
-    setup_sandbox_user,
-)
-from benchflow._trajectory import (
-    _capture_session_trajectory,
-    _scrape_agent_trajectory,
 )
-from benchflow.acp.client import ACPClient, ACPError
-from benchflow.agents.registry import AGENT_LAUNCH
 from benchflow.models import RunResult, TrajectorySource
 
 logger = logging.getLogger(__name__)
diff --git a/src/benchflow/skill_eval.py b/src/benchflow/skill_eval.py
index b848135e..82e96869 100644
--- a/src/benchflow/skill_eval.py
+++ b/src/benchflow/skill_eval.py
@@ -6,6 +6,7 @@
     result = await evaluator.run(agents=["claude-agent-acp"], environment="docker")
 """
 
+import contextlib
 import json
 import logging
 import shutil
@@ -431,7 +432,7 @@ async def run(
             all_results: list[CaseResult] = []
 
             # Run each agent
-            for agent, model in zip(agents, models):
+            for agent, model in zip(agents, models, strict=False):
                 agent_label = agent.split("/")[-1] if "/" in agent else agent
 
                 # With-skill run
@@ -486,9 +487,9 @@ async def _run_job(
         with_skill: bool,
     ) -> list[CaseResult]:
         """Run a batch of tasks using Job for concurrency and retries."""
-        from benchflow.job import Job, JobConfig, RetryConfig
-
         import os
+
+        from benchflow.job import Job, JobConfig, RetryConfig
         judge_env = {}
         for key in ("ANTHROPIC_API_KEY", "OPENAI_API_KEY", "GOOGLE_API_KEY", "GEMINI_API_KEY"):
             if os.environ.get(key):
@@ -506,7 +507,7 @@ async def _run_job(
                 agent_env=judge_env,
             ),
         )
-        job_result = await j.run()
+        await j.run()
 
         results = []
         # Walk the jobs directory to collect per-case results
@@ -539,10 +540,8 @@ async def _run_job(
                 # Read judge rubric details if available
                 judge_file = trial_dir / "verifier" / "judge_result.json"
                 if judge_file.exists():
-                    try:
+                    with contextlib.suppress(json.JSONDecodeError, KeyError):
                         rubric_results = json.loads(judge_file.read_text()).get("items")
-                    except (json.JSONDecodeError, KeyError):
-                        pass
 
                 results.append(
                     CaseResult(
@@ -579,7 +578,7 @@ def _compute_lifts(
     ) -> list[AgentLift]:
         """Compute per-agent lift from case results."""
         lifts = []
-        for agent, model in zip(agents, models):
+        for agent, model in zip(agents, models, strict=False):
             with_results = [r for r in all_results if r.agent == agent and r.with_skill]
             baseline_results = [
                 r for r in all_results if r.agent == agent and not r.with_skill
diff --git a/src/benchflow/trial.py b/src/benchflow/trial.py
index d2af9ae0..d2f1570e 100644
--- a/src/benchflow/trial.py
+++ b/src/benchflow/trial.py
@@ -35,7 +35,7 @@
 
 from __future__ import annotations
 
-import asyncio
+import contextlib
 import json
 import logging
 import shlex
@@ -57,7 +57,6 @@
     _resolve_locked_paths,
     _seed_verifier_workspace,
     _snapshot_build_config,
-    harden_before_verify,
     lockdown_paths,
     setup_sandbox_user,
 )
@@ -66,7 +65,7 @@
     _scrape_agent_trajectory,
 )
 from benchflow.acp.client import ACPClient, ACPError
-from benchflow.agents.registry import AGENT_LAUNCH, AGENTS
+from benchflow.agents.registry import AGENT_LAUNCH
 from benchflow.models import RunResult, TrajectorySource
 
 logger = logging.getLogger(__name__)
@@ -110,7 +109,7 @@ def single(
         prompts: list[str | None] | None = None,
         role_name: str = "agent",
         skills_dir: str | Path | None = None,
-    ) -> "Scene":
+    ) -> Scene:
         """Shortcut for single-agent, single-role scene."""
         prompts = prompts or [None]
         return cls(
@@ -157,7 +156,7 @@ def from_legacy(
         prompts: list[str | None] | None = None,
         skills_dir: str | Path | None = None,
         **kwargs,
-    ) -> "TrialConfig":
+    ) -> TrialConfig:
         """Construct from flat SDK.run()-style args."""
         return cls(
             task_path=task_path,
@@ -440,10 +439,8 @@ async def disconnect(self) -> None:
         # Kill any lingering agent processes to prevent context bleed between scenes
         if self._env and self._agent_launch.strip():
             agent_cmd = self._agent_launch.split()[0].split("/")[-1]
-            try:
+            with contextlib.suppress(Exception):
                 await self._env.exec(f"pkill -f '{agent_cmd}' || true", timeout_sec=10)
-            except Exception:
-                pass
         self._session_tool_count = 0
         self._session_traj_count = 0
         self._phase = "installed"
@@ -630,7 +627,7 @@ async def _run_scene(self, scene: Scene) -> None:
         inbox: dict[str, list[str]] = {r.name: [] for r in scene.roles}
         turn_counter = 0
 
-        for i, turn in enumerate(scene.turns):
+        for _i, turn in enumerate(scene.turns):
             role = role_map.get(turn.role)
             if not role:
                 raise ValueError(f"Turn references unknown role {turn.role!r}")
diff --git a/tests/conformance/proof_multi_agent.py b/tests/conformance/proof_multi_agent.py
index 902d2ca3..9aa2729c 100644
--- a/tests/conformance/proof_multi_agent.py
+++ b/tests/conformance/proof_multi_agent.py
@@ -18,6 +18,8 @@
 
 sys.path.insert(0, str(Path(__file__).resolve().parents[2] / "src"))
 
+import contextlib
+
 from harbor.models.task.task import Task
 
 from benchflow._acp_run import connect_acp, execute_prompts
@@ -73,7 +75,7 @@ async def role_runner(env, role: Role, prompt: str) -> None:
     trial_dir = Path(f"/tmp/multi-agent-proof/{role.name}")
     trial_dir.mkdir(parents=True, exist_ok=True)
     launch_cmd = AGENT_LAUNCH.get(role.agent, role.agent)
-    acp_client, session, agent_name = await connect_acp(
+    acp_client, session, _agent_name = await connect_acp(
         env=env,
         agent=role.agent,
         agent_launch=launch_cmd,
@@ -85,7 +87,7 @@ async def role_runner(env, role: Role, prompt: str) -> None:
         agent_cwd="/app",
     )
     try:
-        trajectory, n_tools = await execute_prompts(
+        _trajectory, n_tools = await execute_prompts(
             acp_client,
             session,
             [prompt],
@@ -93,10 +95,8 @@ async def role_runner(env, role: Role, prompt: str) -> None:
         )
         logging.info(f"[{role.name}] finished: {n_tools} tool calls")
     finally:
-        try:
+        with contextlib.suppress(Exception):
             await acp_client.close()
-        except Exception:
-            pass
 
 
 async def main() -> None:
diff --git a/tests/conformance/run_conformance.py b/tests/conformance/run_conformance.py
index b8aa5038..1f758a13 100644
--- a/tests/conformance/run_conformance.py
+++ b/tests/conformance/run_conformance.py
@@ -53,9 +53,7 @@ def has_creds(agent_name: str) -> bool:
     if any(os.environ.get(k) for k in keys):
         return True
     sub_file = SUBSCRIPTION_AUTH_FILES.get(agent_name)
-    if sub_file and Path(sub_file).expanduser().exists():
-        return True
-    return False
+    return bool(sub_file and Path(sub_file).expanduser().exists())
 
 
 async def run_one(agent_name: str) -> dict:
diff --git a/tests/test_oracle_chokepoint.py b/tests/test_oracle_chokepoint.py
index 142c0634..a61333da 100644
--- a/tests/test_oracle_chokepoint.py
+++ b/tests/test_oracle_chokepoint.py
@@ -21,11 +21,9 @@
 
 from __future__ import annotations
 
-import importlib
 from pathlib import Path
 from unittest.mock import AsyncMock, patch
 
-import pytest
 from typer.testing import CliRunner
 
 
diff --git a/tests/test_rewards_jsonl.py b/tests/test_rewards_jsonl.py
index e81f3164..bbc029df 100644
--- a/tests/test_rewards_jsonl.py
+++ b/tests/test_rewards_jsonl.py
@@ -93,7 +93,7 @@ def test_rubric_plus_terminal_no_rubric_in_meta(tmp_path: Path) -> None:
         json.loads(ln)
         for ln in (tmp_path / "rewards.jsonl").read_text().strip().splitlines()
     ]
-    terminal = [ln for ln in lines if ln["type"] == "terminal"][0]
+    terminal = next(ln for ln in lines if ln["type"] == "terminal")
     assert "rubric" not in terminal["meta"]
 
 
diff --git a/tests/test_scene_outbox_trial.py b/tests/test_scene_outbox_trial.py
index 4e6d4780..5b6f07ad 100644
--- a/tests/test_scene_outbox_trial.py
+++ b/tests/test_scene_outbox_trial.py
@@ -11,8 +11,7 @@
 import json
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock
 
 import pytest
 
@@ -325,7 +324,6 @@ async def test_heterogeneous_agent_install(coder_reviewer_scene: Scene) -> None:
     trial._resolved_prompts = ["Solve the task"]
 
     installed_agents: list[str] = []
-    original_connect_as = Trial.connect_as
 
     async def tracking_connect_as(self_inner, role):
         if role.agent != config.primary_agent:
diff --git a/tests/test_skill_eval.py b/tests/test_skill_eval.py
index c1f2b3e6..0c8c3edc 100644
--- a/tests/test_skill_eval.py
+++ b/tests/test_skill_eval.py
@@ -133,7 +133,7 @@ def test_minimal_dataset(self, minimal_skill_dir):
     def test_missing_evals_json(self, tmp_path):
         skill = tmp_path / "no-evals"
         skill.mkdir()
-        with pytest.raises(FileNotFoundError, match="evals.json"):
+        with pytest.raises(FileNotFoundError, match=r"evals\.json"):
             load_eval_dataset(skill)
 
     def test_empty_cases(self, tmp_path):
diff --git a/tests/test_skill_eval_dryrun.py b/tests/test_skill_eval_dryrun.py
index 39b6a57e..931a8e96 100644
--- a/tests/test_skill_eval_dryrun.py
+++ b/tests/test_skill_eval_dryrun.py
@@ -218,7 +218,7 @@ def test_gepa_export_roundtrip(self, mock_skill):
 
         # Verify trace files
         traces = list((gepa_dir / "traces").iterdir())
-        assert len(traces) == 4  # 2 cases × 2 modes
+        assert len(traces) == 4  # 2 cases x 2 modes
 
         # Verify trace content
         trace = json.loads(traces[0].read_text())
@@ -228,6 +228,7 @@ def test_gepa_export_roundtrip(self, mock_skill):
 
     def test_cli_dryrun_loads_dataset(self, mock_skill):
         from typer.testing import CliRunner
+
         from benchflow.cli.main import app
 
         runner = CliRunner()