dereknorrbom · dereknorrbom · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -64,11 +64,54 @@ Before pushing a branch or marking a PR ready for review, run the following loca
 ```sh
 make fix        # auto-format (black + ruff --fix)
 make check      # verify fmt + lint are clean (CI-equivalent)
-make test       # all tests must pass
+make test       # all tests must pass (unit + BDD)
 ```
 
 CI runs the same checks. A PR with a failing lint or test step will not be merged.
 
+## Behavior-Driven Development (BDD)
+
+This project uses [pytest-bdd](https://pytest-bdd.readthedocs.io/) to make behavior specifications executable. BDD is mandatory for all new user-facing behavior, following the outside-in process prescribed in `CONTRIBUTING_AGENT.md`.
+
+### Directory layout
+
+```
+tests/
+  features/       # Gherkin .feature files — one file per feature area
+  steps/          # Step definition files — test_<feature>.py per feature file
+```
+
+### The process (per CONTRIBUTING_AGENT.md)
+
+1. Write the Gherkin scenario in a `.feature` file before any production code
+2. Run `make test` — confirm the scenario is collected and **fails**
+3. Write the minimum step definitions and production code to make it pass
+4. Refactor while all scenarios stay green
+
+### Writing scenarios
+
+- Feature files live in `tests/features/<area>.feature`
+- Step definitions live in `tests/steps/test_<area>.py`
+- Each step file must call `scenarios("../features/<area>.feature")` to register all scenarios
+- Use `parsers.parse(...)` for steps with quoted parameters, e.g.:
+
+```python
+@then(parsers.parse('the report is named "{name}"'))
+def report_named(ctx, name): ...
+```
+
+- Steps are shared across scenarios via a `ctx` fixture (a plain dict) rather than module-level state
+- BDD scenarios test observable behavior (CLI output, file names, command arguments); they do not test internal implementation details
+
+### Running BDD tests
+
+```sh
+make test                              # runs everything including BDD
+poetry run pytest tests/steps/ -v     # BDD only
+```
+
+All scenarios must be green before a PR is opened.
+
 ## Commands
 
 ```sh

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -41,6 +41,7 @@ packages = [{ include = "run_codeql" }]
 
 [tool.poetry.group.dev.dependencies]
 pytest = ">=8.0"
+pytest-bdd = ">=7.0"
 pytest-cov = ">=6.0"
 black = ">=25.0"
 ruff = ">=0.9"

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,79 @@
+"""Shared pytest fixtures and helpers for both unit and BDD tests."""
+
+import json
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+FIXTURES = Path(__file__).parent / "fixtures"
+
+
+def run_rcql(args: list[str], cwd: Path) -> subprocess.CompletedProcess:
+    return subprocess.run(
+        [sys.executable, "-m", "run_codeql"] + args,
+        cwd=cwd,
+        capture_output=True,
+        text=True,
+    )
+
+
+def make_report_dir(tmp_path: Path, *sarif_names: str) -> Path:
+    report_dir = tmp_path / ".codeql" / "reports"
+    report_dir.mkdir(parents=True, exist_ok=True)
+    for name in sarif_names:
+        shutil.copy(FIXTURES / name, report_dir / name)
+    return report_dir
+
+
+def write_sarif_with_paths(tmp_path: Path, paths: list[str], lang: str = "python") -> None:
+    report_dir = tmp_path / ".codeql" / "reports"
+    report_dir.mkdir(parents=True, exist_ok=True)
+    sarif = {
+        "runs": [
+            {
+                "tool": {
+                    "driver": {
+                        "rules": [
+                            {
+                                "id": "py/unused-import",
+                                "shortDescription": {"text": "Unused import"},
+                            }
+                        ]
+                    }
+                },
+                "results": [
+                    {
+                        "ruleId": "py/unused-import",
+                        "level": "warning",
+                        "message": {"text": f"finding-{idx}"},
+                        "locations": [
+                            {
+                                "physicalLocation": {
+                                    "artifactLocation": {"uri": uri},
+                                    "region": {"startLine": idx + 1},
+                                }
+                            }
+                        ],
+                    }
+                    for idx, uri in enumerate(paths)
+                ],
+            }
+        ]
+    }
+    (report_dir / f"{lang}-code-quality.sarif").write_text(json.dumps(sarif), encoding="utf-8")
+
+
+def write_repo_config(tmp_path: Path, payload: dict) -> None:
+    (tmp_path / ".rcql.json").write_text(json.dumps(payload), encoding="utf-8")
+
+
+@pytest.fixture()
+def cli_ctx(tmp_path):
+    """Shared mutable context used by BDD CLI step definitions."""
+    return {
+        "tmp_path": tmp_path,
+        "result": None,
+    }
diff --git a/tests/features/cli_output.feature b/tests/features/cli_output.feature
@@ -0,0 +1,57 @@
+Feature: CLI output modes
+  As an AI agent consuming rcql output,
+  I want predictable stdout/stderr output and exit codes,
+  So that I can reliably parse results and decide next steps.
+
+  Scenario: Report-only with findings exits non-zero
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only"
+    Then the exit code is non-zero
+
+  Scenario: Report-only with no findings exits zero
+    Given an empty Python SARIF report exists
+    When I run rcql with "--report-only"
+    Then the exit code is zero
+
+  Scenario: --no-fail forces exit zero even with findings
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail"
+    Then the exit code is zero
+
+  Scenario: Report-only output includes language block
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail"
+    Then stdout contains "[python]"
+
+  Scenario: Report-only output includes finding count
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail"
+    Then stdout contains "Total: 3"
+
+  Scenario: Verbose mode includes rule IDs
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --verbose --no-fail"
+    Then stdout contains "py/sql-injection"
+
+  Scenario: Quiet mode suppresses log lines from stdout
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --quiet --no-fail"
+    Then stdout does not contain "[codeql-local]"
+
+  Scenario: Quiet mode prints mode message to stderr
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --quiet --no-fail"
+    Then stderr contains "quiet mode"
+
+  Scenario: Missing SARIF reports exit non-zero with helpful message
+    Given no SARIF reports exist
+    When I run rcql with "--report-only"
+    Then the exit code is non-zero
+    And stderr contains "No SARIF files found"
+
+  Scenario: --lang filter shows only requested language
+    Given a Python SARIF report with findings exists
+    And an empty Rust SARIF report exists
+    When I run rcql with "--report-only --lang=python --no-fail"
+    Then stdout contains "[python]"
+    And stdout does not contain "[rust]"
diff --git a/tests/features/findings_filtering.feature b/tests/features/findings_filtering.feature
@@ -0,0 +1,81 @@
+Feature: Findings filtering
+  As an AI agent using rcql to investigate specific files or rules,
+  I want to filter findings by file path, rule ID, and pagination,
+  So that I can retrieve exactly the findings relevant to my current task.
+
+  Scenario: --files filters to matching path
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail --files src/db.py"
+    Then stdout contains "Shown: 1"
+    And stdout contains "matched: 1"
+
+  Scenario: --files with glob matches multiple paths
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail --files src/*.py"
+    Then stdout contains "Shown: 3"
+
+  Scenario: --files with no match suppresses language block
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail --files nonexistent.py"
+    Then stdout does not contain "[python]"
+
+  Scenario: --rule filters to matching rule ID
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail --rule py/unused-import"
+    Then stdout contains "Shown: 2"
+    And stdout contains "matched: 2"
+
+  Scenario: --rule with glob matches all rules for a language
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail --rule py/*"
+    Then stdout contains "Shown: 3"
+
+  Scenario: --rule with no match suppresses language block
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail --rule js/something"
+    Then stdout does not contain "[python]"
+
+  Scenario: --files and --rule combined filter findings
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail --files src/utils.py --rule py/unused-import"
+    Then stdout contains "Shown: 2"
+
+  Scenario: --limit caps the number of shown findings
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail --limit 1"
+    Then stdout contains "Shown: 1"
+    And stdout contains "matched: 3"
+
+  Scenario: --offset skips leading findings
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail --offset 2"
+    Then stdout contains "Shown: 1"
+    And stdout contains "matched: 3"
+
+  Scenario: Pagination - page one
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail --limit 2 --offset 0"
+    Then stdout contains "Shown: 2"
+    And stdout contains "matched: 3"
+
+  Scenario: Pagination - page two
+    Given a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail --limit 2 --offset 2"
+    Then stdout contains "Shown: 1"
+    And stdout contains "matched: 3"
+
+  Scenario: Default excludes suppress node_modules findings
+    Given a SARIF report exists with findings in "src/app.py" and "app/node_modules/pkg/index.py"
+    When I run rcql with "--report-only --no-fail"
+    Then stdout contains "Total: 1"
+    And stdout does not contain "node_modules"
+
+  Scenario: --include-third-party restores suppressed paths
+    Given a SARIF report exists with findings in "src/app.py" and "node_modules/pkg/index.py"
+    When I run rcql with "--report-only --no-fail --include-third-party"
+    Then stdout contains "Total: 2"
+
+  Scenario: --exclude-files hides matching paths
+    Given a SARIF report exists with findings in "src/app.py" and "src/generated/foo.py"
+    When I run rcql with "--report-only --no-fail --exclude-files src/generated/**"
+    Then stdout contains "Total: 1"
diff --git a/tests/features/language_detection.feature b/tests/features/language_detection.feature
@@ -0,0 +1,61 @@
+Feature: Language detection
+  As an AI agent running rcql on a repository,
+  I want rcql to automatically detect which languages are present,
+  So that I don't need to know the repo's tech stack in advance.
+
+  Scenario: Detects Python from .py files
+    Given the repo contains "src/main.py"
+    When I run language detection
+    Then the detected languages include "python"
+
+  Scenario: Detects Rust from .rs files
+    Given the repo contains "src/main.rs"
+    When I run language detection
+    Then the detected languages include "rust"
+
+  Scenario: Detects JavaScript/TypeScript from .ts files
+    Given the repo contains "src/index.ts"
+    When I run language detection
+    Then the detected languages include "javascript-typescript"
+
+  Scenario: Detects multiple languages
+    Given the repo contains "app.py"
+    And the repo contains "main.rs"
+    When I run language detection
+    Then the detected languages include "python"
+    And the detected languages include "rust"
+
+  Scenario: Detects GitHub Actions from workflow files
+    Given the repo contains ".github/workflows/ci.yml"
+    When I run language detection
+    Then the detected languages include "actions"
+
+  Scenario: Does not detect Actions without workflow directory
+    Given the repo contains "src/app.py"
+    When I run language detection
+    Then the detected languages do not include "actions"
+
+  Scenario: Ignores files inside node_modules
+    Given the repo contains "node_modules/lib/index.js"
+    And the repo contains "src/app.py"
+    When I run language detection
+    Then the detected languages include "python"
+    And the detected languages do not include "javascript-typescript"
+
+  Scenario: Empty repo detects no languages
+    Given the repo is empty
+    When I run language detection
+    Then no languages are detected
+
+  Scenario: Unknown file extensions are ignored
+    Given the repo contains "README.md"
+    And the repo contains "data.csv"
+    When I run language detection
+    Then no languages are detected
+
+  Scenario: Detection results are sorted alphabetically
+    Given the repo contains "a.rs"
+    And the repo contains "b.py"
+    And the repo contains "c.go"
+    When I run language detection
+    Then the detected languages are sorted alphabetically
diff --git a/tests/features/repo_config.feature b/tests/features/repo_config.feature
@@ -0,0 +1,44 @@
+Feature: Repository configuration
+  As an AI agent working in a repo with a .rcql.json config,
+  I want rcql to honour the repo config as a baseline
+  and let CLI flags override it,
+  So that repo-level defaults don't require flags on every invocation.
+
+  Scenario: Repo config files filter is applied automatically
+    Given a Python SARIF report with findings exists
+    And the repo config sets files to "src/utils.py"
+    When I run rcql with "--report-only --no-fail"
+    Then stdout contains "Shown: 2"
+    And stdout contains "matched: 2"
+
+  Scenario: CLI --files overrides repo config files filter
+    Given a Python SARIF report with findings exists
+    And the repo config sets files to "src/db.py"
+    When I run rcql with "--report-only --no-fail --files src/utils.py"
+    Then stdout contains "Shown: 2"
+    And stdout contains "matched: 2"
+
+  Scenario: Repo config exclude_files is applied automatically
+    Given a SARIF report exists with findings in "src/app.py" and "src/generated/foo.py"
+    And the repo config sets exclude_files to "src/generated/**"
+    When I run rcql with "--report-only --no-fail"
+    Then stdout contains "Total: 1"
+
+  Scenario: Repo config include_third_party opt-in
+    Given a SARIF report exists with findings in "src/app.py" and "node_modules/pkg/index.py"
+    And the repo config sets include_third_party to true
+    When I run rcql with "--report-only --no-fail"
+    Then stdout contains "Total: 2"
+
+  Scenario: Missing config file is silently ignored
+    Given no repo config file exists
+    And a Python SARIF report with findings exists
+    When I run rcql with "--report-only --no-fail"
+    Then the exit code is zero
+    And stdout contains "[python]"
+
+  Scenario: --config empty string disables config loading
+    Given a Python SARIF report with findings exists
+    And the repo config sets files to "src/db.py"
+    When I run rcql with "--report-only --no-fail --config ''"
+    Then stdout contains "Total: 3"