diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000..0f34f2be --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,67 @@ +# ============================================================================= +# CodeQL Workflow — static analysis for Rust extension code +# +# Uses build-mode: none (the only mode supported for Rust) so CodeQL analyses +# the source directly. This complements clippy by looking for higher-level +# security and dataflow issues. +# ============================================================================= +name: CodeQL + +on: + push: + branches: [main] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - 'PLAN*.md' + - 'REPORT*.md' + - 'adrs/**' + - 'plans/**' + - 'coverage/**' + pull_request: + branches: [main] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + - 'PLAN*.md' + - 'REPORT*.md' + - 'adrs/**' + - 'plans/**' + - 'coverage/**' + schedule: + - cron: '0 9 * * 1' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + actions: read + contents: read + security-events: write + +env: + CARGO_TERM_COLOR: always + PG_VERSION: '18' + +jobs: + analyze: + name: CodeQL (Rust) + runs-on: ubuntu-latest + timeout-minutes: 25 + steps: + - uses: actions/checkout@v4 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v4 + with: + languages: rust + build-mode: none + + - name: Perform CodeQL analysis + uses: github/codeql-action/analyze@v4 diff --git a/.github/workflows/dependency-policy.yml b/.github/workflows/dependency-policy.yml new file mode 100644 index 00000000..7c0ab1b6 --- /dev/null +++ b/.github/workflows/dependency-policy.yml @@ -0,0 +1,56 @@ +# ============================================================================= +# Dependency Policy Workflow — cargo-deny checks for advisory and supply-chain +# issues in the Rust dependency graph. +# ============================================================================= +name: Dependency policy + +on: + push: + branches: [main] + paths: + - 'Cargo.toml' + - 'Cargo.lock' + - 'deny.toml' + - '.github/workflows/dependency-policy.yml' + pull_request: + branches: [main] + paths: + - 'Cargo.toml' + - 'Cargo.lock' + - 'deny.toml' + - '.github/workflows/dependency-policy.yml' + schedule: + - cron: '30 9 * * 1' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +env: + CARGO_TERM_COLOR: always + +jobs: + cargo-deny: + name: cargo deny + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: stable + + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + + - name: Install cargo-deny + run: cargo install --locked cargo-deny + + - name: Run cargo-deny policy checks + run: cargo deny check advisories bans sources diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml new file mode 100644 index 00000000..5b865024 --- /dev/null +++ b/.github/workflows/semgrep.yml @@ -0,0 +1,60 @@ +# ============================================================================= +# Semgrep Workflow — repository-specific static checks for extension security +# hotspots such as dynamic SPI SQL and SECURITY DEFINER SQL. +# +# This first pass is advisory: findings are uploaded as SARIF but do not fail +# CI yet. That keeps the initial rollout useful without immediately blocking on +# the existing dynamic-SPI surface area. +# ============================================================================= +name: Semgrep + +on: + push: + branches: [main] + paths: + - 'src/**' + - 'sql/**' + - '.semgrep/**' + - '.github/workflows/semgrep.yml' + pull_request: + branches: [main] + paths: + - 'src/**' + - 'sql/**' + - '.semgrep/**' + - '.github/workflows/semgrep.yml' + schedule: + - cron: '0 10 * * 1' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + security-events: write + +jobs: + semgrep: + name: Semgrep + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install Semgrep + run: python -m pip install --upgrade semgrep + + - name: Run Semgrep ruleset + run: semgrep scan --config .semgrep/pg_trickle.yml --sarif --output semgrep.sarif + + - name: Upload SARIF report + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: semgrep.sarif diff --git a/.semgrep/pg_trickle.yml b/.semgrep/pg_trickle.yml new file mode 100644 index 00000000..8e9ad6f1 --- /dev/null +++ b/.semgrep/pg_trickle.yml @@ -0,0 +1,44 @@ +rules: + - id: rust.spi.run.dynamic-format + languages: + - rust + severity: WARNING + message: >- + Dynamic SQL passed to Spi::run() should be reviewed for quoting, + privilege context, and user-controlled interpolation. Prefer fixed SQL + with parameters when possible; otherwise ensure identifiers and literals + are escaped with the repo's quoting helpers. + patterns: + - pattern-either: + - pattern: Spi::run(&format!($FMT, ...)) + - pattern: pgrx::Spi::run(&format!($FMT, ...)) + + - id: rust.spi.query.dynamic-format + languages: + - rust + severity: WARNING + message: >- + Dynamic SQL passed to SPI query helpers should be reviewed for SQL + injection risk, quoting discipline, and object-name construction. + patterns: + - pattern-either: + - pattern: Spi::get_one::<$T>(&format!($FMT, ...)) + - pattern: Spi::get_two::<$T1, $T2>(&format!($FMT, ...)) + - pattern: Spi::get_three::<$T1, $T2, $T3>(&format!($FMT, ...)) + - pattern: pgrx::Spi::get_one::<$T>(&format!($FMT, ...)) + - pattern: pgrx::Spi::get_two::<$T1, $T2>(&format!($FMT, ...)) + - pattern: pgrx::Spi::get_three::<$T1, $T2, $T3>(&format!($FMT, ...)) + + - id: sql.security-definer.present + languages: + - generic + severity: INFO + message: >- + SECURITY DEFINER requires explicit review. Pin search_path and verify the + function body cannot be influenced by attacker-controlled identifiers or + SQL fragments. + paths: + include: + - src/** + - sql/** + pattern-regex: SECURITY\s+DEFINER diff --git a/CHANGELOG.md b/CHANGELOG.md index a99f1be7..e055b2c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,29 @@ For future plans and release milestones, see [ROADMAP.md](ROADMAP.md). ### Added +- **SAST / static security analysis baseline** — new security tooling on the + `codeql-workflow` branch, now merged: + - **GitHub CodeQL** (`.github/workflows/codeql.yml`) — runs 16 Rust security + queries (SQL injection, path injection, SSRF, cleartext logging, weak + crypto, uncontrolled allocation, invalid pointer access, and more) on every + PR and push to `main`. First scan completed with **zero findings** across + all 115 Rust source files. Uses `build-mode: none` (the only mode supported + for Rust) and the `v4` action. + - **`cargo deny`** (`.github/workflows/dependency-policy.yml` + `deny.toml`) + — enforces an explicit license allow-list (Apache-2.0, MIT, BSD-2-Clause, + BSD-3-Clause, ISC, Unlicense, Zlib, BSL-1.0, Unicode-3.0), blocks unknown + registries and git sources, and surfaces unmaintained/yanked crates as + warnings. Duplicate-version skews from upstream pgrx / testcontainers + version mismatches are suppressed via `skip` entries. + - **Semgrep** (`.github/workflows/semgrep.yml` + `.semgrep/pg_trickle.yml`) + — repo-specific rules flagging dynamic SQL passed to `Spi::run` / + `Spi::get_*` and `SECURITY DEFINER` occurrences. Advisory-only (SARIF + upload, no CI failure) until rules are tuned. + - **`plans/testing/PLAN_SAST.md`** — full SAST strategy document including + threat model, five-phase rollout plan, Semgrep rules roadmap, unsafe/FFI + review policy, CI posture table, and a **Security Newbie Checklist** for + reviewers unfamiliar with extension-specific security patterns. + - **TPC-H test suite enhancements (T1–T6)** — second wave of TPC-H correctness coverage, building on the 22/22 passing DIFFERENTIAL baseline: - **T1 — `__pgt_count` guard** in `assert_tpch_invariant`: detects diff --git a/deny.toml b/deny.toml new file mode 100644 index 00000000..d2984f0e --- /dev/null +++ b/deny.toml @@ -0,0 +1,75 @@ +[graph] +all-features = true + +[licenses] +# Allow the standard permissive and weak-copyleft open-source licenses used +# across the dependency tree. Unicode-3.0 is required by the icu_* crates; +# BSL-1.0 is used by Boost-derived crates pulled in by pgrx internals. +version = 2 +allow = [ + "Apache-2.0", + "MIT", + "BSD-2-Clause", + "BSD-3-Clause", + "ISC", + "Unlicense", + "Zlib", + "BSL-1.0", + "Unicode-3.0", +] + +[advisories] +version = 2 +yanked = "warn" +ignore = [ + # paste is used internally by pgrx macros; we have no upstream control over + # this dep. Not a vulnerability, only a maintenance status notice. + "RUSTSEC-2024-0436", + # rustls-pemfile v2 is pulled in by testcontainers/bollard (dev-only path). + # Maintenance moved to rustls-pki-types; will be resolved when bollard updates. + "RUSTSEC-2025-0134", + # serde_cbor is pulled in transitively (dev path). Not a security vulnerability, + # only unmaintained. Use of ciborium is the upstream recommendation but we + # have no direct control over this dep. + "RUSTSEC-2021-0127", +] + +[bans] +multiple-versions = "warn" +wildcards = "deny" +highlight = "all" +# These duplicate versions come from pgrx pulling one version and +# testcontainers/bollard pulling another. We have no direct control over these +# version skews; they are dev-only or cross-compilation stubs. +skip = [ + # thiserror: pgrx uses v1, our code and testcontainers use v2 + { name = "thiserror", version = "=1.0.69" }, + { name = "thiserror-impl", version = "=1.0.69" }, + # getrandom: split between pgrx (0.3) and newer crates (0.4) + { name = "getrandom", version = "=0.3.4" }, + # hashbrown: multiple major versions pulled by indexmap and others + { name = "hashbrown", version = "=0.15.5" }, + # wasi: 0.11 from older crates, 0.14 from newer wasmtime-based stack + { name = "wasi", version = "=0.11.1+wasi-snapshot-preview1" }, + # windows-* crates: version skew between ring (0.52) and socket2/hyper-util + # (0.53+) pulled by bollard/testcontainers + { name = "windows-core", version = "=0.57.0" }, + { name = "windows-implement", version = "=0.57.0" }, + { name = "windows-interface", version = "=0.57.0" }, + { name = "windows-result", version = "=0.1.2" }, + { name = "windows-sys", version = "=0.60.2" }, + { name = "windows-targets", version = "=0.52.6" }, + { name = "windows_aarch64_gnullvm", version = "=0.52.6" }, + { name = "windows_aarch64_msvc", version = "=0.52.6" }, + { name = "windows_i686_gnu", version = "=0.52.6" }, + { name = "windows_i686_gnullvm", version = "=0.52.6" }, + { name = "windows_i686_msvc", version = "=0.52.6" }, + { name = "windows_x86_64_gnu", version = "=0.52.6" }, + { name = "windows_x86_64_gnullvm", version = "=0.52.6" }, + { name = "windows_x86_64_msvc", version = "=0.52.6" }, +] + +[sources] +unknown-registry = "deny" +unknown-git = "deny" +allow-registry = ["https://github.com/rust-lang/crates.io-index"] diff --git a/plans/testing/PLAN_SAST.md b/plans/testing/PLAN_SAST.md new file mode 100644 index 00000000..9c107e86 --- /dev/null +++ b/plans/testing/PLAN_SAST.md @@ -0,0 +1,653 @@ +# PLAN: Static Application Security Testing (SAST) + +**Status:** In progress +**Date:** 2026-03-10 +**Branch:** `codeql-workflow` +**Scope:** Establish a practical SAST program for the `pg_trickle` PostgreSQL +extension that covers Rust application code, PostgreSQL extension attack +surfaces, dependency risk, and extension-specific SQL/security-context hazards. + +--- + +## Table of Contents + +1. [Security Newbie Checklist](#security-newbie-checklist) +2. [Motivation](#motivation) +3. [Threat Model and Security Surfaces](#threat-model-and-security-surfaces) +4. [Current State](#current-state) +5. [Goals and Non-Goals](#goals-and-non-goals) +6. [Proposed SAST Stack](#proposed-sast-stack) +7. [Phase Plan](#phase-plan) +8. [Semgrep Rules Roadmap](#semgrep-rules-roadmap) +9. [Unsafe / FFI Review Strategy](#unsafe--ffi-review-strategy) +10. [CI Integration Strategy](#ci-integration-strategy) +11. [Finding Triage and Policy](#finding-triage-and-policy) +12. [Validation Plan](#validation-plan) +13. [Success Criteria](#success-criteria) +14. [Implementation Checklist](#implementation-checklist) +15. [Open Questions](#open-questions) + +--- + +## Security Newbie Checklist + +> **Not a security expert? Start here.** +> This section explains what this PR does and what you need to know as a +> reviewer or contributor, without requiring a deep background in static +> analysis or PostgreSQL security. + +### What is SAST and why are we adding it? + +**SAST** (Static Application Security Testing) means running automated tools +over source code to spot likely security problems before the code ever runs. +Think of it as a spell-checker, but for security patterns. + +`pg_trickle` is a PostgreSQL extension that runs inside the Postgres server +process with elevated privileges. A bug here can affect the database itself, +not just some isolated application. That means the bar for security hygiene is +higher than a typical Rust CLI or web service. + +The existing tooling (clippy, cargo audit, unit / E2E tests) is great for +correctness. SAST adds a complementary layer that looks specifically for +security-relevant patterns: unsafe pointer use, dynamic SQL that could be +abused, privilege escalation helpers, and risky dependency sources. + +### What files does this PR actually add? + +| File | Plain-English purpose | +|------|-----------------------| +| `.github/workflows/codeql.yml` | Runs GitHub's CodeQL scanner on Rust code. Findings appear in the repo's Security tab. | +| `.github/workflows/dependency-policy.yml` | Runs `cargo deny` to block known-bad or off-registry dependencies. | +| `deny.toml` | Configuration file that tells `cargo deny` what is and isn't allowed. | +| `.github/workflows/semgrep.yml` | Runs Semgrep to detect pg_trickle-specific patterns (see below). | +| `.semgrep/pg_trickle.yml` | Custom Semgrep rules written for this codebase. Currently advisory only. | + +### Will any of these new checks block my PR? + +**Right now:** CodeQL and `cargo deny` are blocking. Semgrep is advisory only +(it uploads findings but will not fail a PR). + +| Check | Breaks the build? | What it catches | +|-------|-------------------|-----------------| +| CodeQL | Yes | Rust dataflow / security bug classes | +| `cargo deny` | Yes | Banned, unlicensed, or sketchy dependencies | +| `cargo audit` (existing) | Yes | Known CVE advisories | +| Semgrep | **No** (advisory) | Extension-specific patterns like dynamic SQL | + +> **Note:** Semgrep warnings are meant to prompt a review conversation, not +> to require an immediate code change. If you see a Semgrep annotation on your +> PR, read the message, decide if it applies, and comment on your decision. +> You do not need to suppress it unless the pattern is a confirmed false +> positive. + +### What does "advisory" mean in practice? + +Advisory means the tool will report findings but will not make CI red. You +(or a reviewer) can choose to ignore them, leave a comment, or address them — +but the PR will not be blocked. The plan is to move from advisory to blocking +only after rules have been tuned and false-positive rates are acceptable. + +### What should I do if CodeQL or cargo-deny flags something? + +1. **Read the message.** It usually says what pattern was detected and why it + can be risky. +2. **Check if it is a real issue.** Is dynamic SQL actually taking unchecked + user input? Is a crate actually disallowed or from an untrusted source? +3. **Fix it if it is real.** Common fixes: use a quoting helper, replace + `format!()` with bound parameters, or swap a dependency. +4. **Suppress with a justification if it is a false positive.** For CodeQL, + use a `// lgtm` or `// codeql` inline suppression. For `cargo deny`, add + an `allow` entry to `deny.toml`. Always include a comment explaining why + the pattern is safe here. +5. **Never add a blanket ignore.** Suppressions should be as narrow as + possible — scoped to the specific line or crate, not the whole file. + +### The two things most likely to confuse reviewers + +1. **Dynamic SPI SQL hits in Semgrep.** + The extension legitimately builds SQL strings to interact with PostgreSQL + internals. Not all of these are bugs. The Semgrep rules are designed to + surface them for review, not to declare them wrong. Ask yourself: "Is the + interpolated value validated or quoted before it reaches the database?" + If yes, leave a comment and move on. + +2. **`unsafe` blocks.** + We use `unsafe` to cross the Rust / PostgreSQL FFI boundary. Every unsafe + block should have a `// SAFETY:` comment explaining the invariant being + upheld. If you add or touch an unsafe block and the comment is missing, + add it. This is not optional. + +### Quick review checklist for this PR + +- [ ] The new workflow files trigger at sensible times (PR, push to main, + weekly schedule) and do not run on every trivial commit. +- [ ] `deny.toml` does not accidentally ban anything already in `Cargo.toml`. +- [ ] Semgrep rules have clear, human-readable `message` fields. +- [ ] All new `unsafe` blocks have a `// SAFETY:` comment. +- [ ] Nothing in the Semgrep or CodeQL advisory output represents an obvious + real vulnerability that should be fixed before merging. + +--- + +## Motivation + +`pg_trickle` is not a typical Rust CLI or web service. It is a PostgreSQL +extension loaded directly into the database server process, which changes the +security profile materially: + +- Memory safety bugs can affect the PostgreSQL backend itself. +- Unsafe FFI boundaries matter more because they run in-process with Postgres. +- Dynamic SQL built via SPI can become an injection or privilege-context risk. +- Security mistakes around `SECURITY DEFINER`, `search_path`, RLS, and GUCs are + extension-specific and will not be caught by generic Rust tooling. +- Dependency and supply-chain issues matter because the extension ships as a + privileged shared object. + +The repo already has strong correctness testing and standard linting, but SAST +coverage is still partial. The goal of this plan is to add security-focused +static analysis without creating a noisy, low-signal CI burden. + +--- + +## Threat Model and Security Surfaces + +This plan assumes the main in-scope risks described in [SECURITY.md](../../SECURITY.md) +remain the priority: + +1. SQL injection or object-name injection via `pgtrickle.*` functions. +2. Privilege escalation through `SECURITY DEFINER`, `SET ROLE`, `row_security`, + or unpinned `search_path`. +3. Memory safety bugs in Rust code that crosses into PostgreSQL FFI. +4. Low-privilege denial-of-service via expensive refreshes, unbounded work, or + unsafe background-worker behavior. +5. Information disclosure through change buffers, monitoring views, or + unexpected visibility semantics. + +### Primary code surfaces + +| Surface | Why it matters | Representative files | +|--------|----------------|----------------------| +| Dynamic SPI SQL | Injection, quoting, object-name construction, role context | `src/api.rs`, `src/refresh.rs`, `src/cdc.rs`, `src/monitor.rs`, `src/hooks.rs` | +| Unsafe / FFI | In-process memory safety, pointer lifetime, Postgres ABI | `src/lib.rs`, `src/shmem.rs`, `src/api.rs`, `src/dvm/parser.rs` | +| Background workers | Long-lived privileged code, scheduling, retry loops | `src/scheduler.rs`, `src/wal_decoder.rs`, `src/shmem.rs` | +| Trigger / function SQL | `SECURITY DEFINER`, `search_path`, runtime SQL generation | `src/api.rs`, `src/ivm.rs`, `sql/*.sql` | +| Dependencies | RustSec CVEs, banned sources, duplicate major versions | `Cargo.toml`, `Cargo.lock` | + +### PostgreSQL-specific security surfaces + +These are important enough that generic Rust SAST alone is insufficient: + +- `Spi::run(&format!(...))` and `Spi::get_one(&format!(...))` +- `quote_identifier()` and other quoting helpers +- `NOTIFY` payload construction +- `ALTER TABLE`, `CREATE FUNCTION`, trigger generation, and `DEALLOCATE` +- `SET LOCAL row_security = off` +- `SECURITY DEFINER` and `SET search_path` +- background-worker bootstrap and shared-memory access + +--- + +## Current State + +### What already exists + +| Control | Status | Notes | +|--------|--------|------| +| `clippy` | Present | Fast lint gate in `lint.yml` and `build.yml` | +| `cargo audit` | Present | Weekly + PR dependency advisory workflow | +| Security reporting policy | Present | `SECURITY.md` documents private disclosure path | +| Extensive E2E correctness tests | Present | Good base, but not SAST | + +### What is missing or incomplete + +| Gap | Why it matters | +|-----|----------------| +| CodeQL for Rust | No higher-level code scanning for Rust dataflow and security patterns | +| Dependency policy enforcement | `cargo audit` does not cover banned sources or dependency hygiene well | +| Repo-specific SPI SQL rules | Generic scanners will miss extension-specific SQL construction risks | +| Unsafe-code inventory | No explicit unsafe budget or automated visibility into unsafe growth | +| Security-context rule checks | No automated review hooks for `SECURITY DEFINER`, `search_path`, RLS toggles | +| Finding policy | No documented path from advisory findings to blocking CI | + +### Initial implementation on this branch + +The following baseline has already been added on `codeql-workflow`: + +| File | Purpose | +|------|---------| +| `.github/workflows/codeql.yml` | CodeQL analysis for Rust | +| `.github/workflows/dependency-policy.yml` | `cargo deny` checks | +| `deny.toml` | Dependency-policy configuration | +| `.github/workflows/semgrep.yml` | Semgrep SARIF upload workflow | +| `.semgrep/pg_trickle.yml` | Initial repo-specific Semgrep rules | + +This plan covers both that baseline and the next iterations needed to make it a +useful long-term SAST program. + +--- + +## Goals and Non-Goals + +### Goals + +1. Catch obvious dependency and supply-chain issues automatically. +2. Make unsafe/FFI growth visible and reviewable. +3. Detect extension-specific SQL/security-context hazards early in PRs. +4. Keep the initial rollout low-noise enough that engineers will not disable it. +5. Evolve from advisory findings to blocking policies only after triage and rule tuning. + +### Non-Goals + +1. Replace runtime security testing or E2E role-based tests. +2. Prove absence of memory safety issues. +3. Fully model PostgreSQL execution semantics statically. +4. Block all dynamic SPI SQL. Dynamic SQL is legitimate in an extension; the + objective is reviewability and safe construction, not blanket prohibition. + +--- + +## Proposed SAST Stack + +### Layer 1: CodeQL (Rust) + +**Tool:** GitHub CodeQL +**Purpose:** General Rust static analysis with control/dataflow analysis. +**Why needed:** Catches classes of issues that clippy and grep-based tooling do +not, while integrating naturally with GitHub Security. + +**Scope:** + +- Rust source in `src/**` +- build configured via the repo's pgrx setup action +- SARIF findings uploaded to GitHub Security tab + +**Expected value:** + +- unsafe API misuse patterns +- suspicious string/data flow +- common Rust bug classes with security impact + +### Layer 2: cargo-deny + +**Tool:** `cargo deny` +**Purpose:** Dependency policy enforcement beyond plain CVE scanning. +**Why needed:** `cargo audit` is useful but narrow; `cargo deny` adds source, +duplicate-version, and registry hygiene checks. + +**Checks to enforce initially:** + +- advisories +- banned dependency patterns +- allowed registries / sources + +**Checks to keep advisory initially:** + +- multiple versions (`warn`) +- yanked crates (`warn`) + +### Layer 3: Semgrep (extension-specific rules) + +**Tool:** Semgrep +**Purpose:** Repo-specific pattern checks that generic Rust scanners will not +model well. +**Why needed:** The main security risk in this codebase is not generic “Rust +security”; it is the combination of SPI SQL, object-name quoting, PostgreSQL +privilege semantics, and generated SQL. + +**Initial rule families:** + +1. Dynamic SQL passed to `Spi::run`. +2. Dynamic SQL passed to `Spi::get_*` helpers. +3. `SECURITY DEFINER` occurrences in Rust-generated SQL or `sql/*.sql`. + +**Initial operating mode:** advisory only (SARIF upload, no CI failure). + +### Layer 4: Unsafe / FFI tracking + +**Planned tools:** + +- `cargo geiger` or equivalent unsafe inventory tooling +- stricter Rust/clippy lints around unsafe blocks where supported + +**Purpose:** + +- quantify unsafe usage growth +- make new unsafe entry points visible in PR review +- ensure every unsafe block remains documented and justified + +### Layer 5: SQL / privilege-context review rules + +This is the most important medium-term layer. The repo should gain static +checks that flag: + +- `SECURITY DEFINER` without explicit `SET search_path` +- `SET LOCAL row_security = off` +- `SET ROLE` / `RESET ROLE` +- generated trigger functions +- dynamic `ALTER TABLE`, `DROP TABLE`, `TRUNCATE`, and `DEALLOCATE` SQL + +These patterns are not automatically wrong, but they are security-sensitive and +deserve intentional review. + +--- + +## Phase Plan + +### Phase 0 — Baseline rollout (current branch) + +**Status:** Implemented +**Goal:** Introduce low-friction SAST foundations with minimal CI disruption. + +| Item | Status | Notes | +|------|--------|------| +| CodeQL workflow | Done | Manual Rust build with pgrx toolchain | +| `cargo deny` workflow + config | Done | Advisory/supply-chain policy checks | +| Semgrep workflow + starter rules | Done | Advisory-only SARIF upload | + +**Exit criteria:** + +- workflows validate cleanly +- repo still passes `just lint` +- branch can be reviewed and merged independently + +### Phase 1 — Triage and noise reduction + +**Priority:** P0 +**Estimated effort:** 2–4 hours after first CI run. + +**Tasks:** + +1. Run the new workflows once on this branch. +2. Export or review initial findings from CodeQL and Semgrep. +3. Classify findings into: + - true positives + - acceptable dynamic SQL requiring documentation + - noisy patterns that need rule refinement +4. Tune Semgrep rules to reduce obvious false positives. +5. Add inline suppression guidance where exceptions are legitimate. + +**Deliverables:** + +- first triage pass documented in PR comments or follow-up issue +- reduced-noise Semgrep ruleset + +### Phase 2 — Extension-specific rule expansion + +**Priority:** P0 +**Estimated effort:** 4–8 hours. + +**Tasks:** + +1. Add Semgrep rules for `SECURITY DEFINER` plus `search_path` pairing. +2. Add rules for `SET LOCAL row_security = off`. +3. Add rules for `SET ROLE` / `RESET ROLE`. +4. Add rules for `Spi::run(&format!(...))` where interpolated values are not + wrapped in local quoting helpers. +5. Add rules for unsafe blocks without a nearby `SAFETY:` comment. + +**Deliverables:** + +- broader Semgrep ruleset +- documented suppression policy for legitimate dynamic SQL + +### Phase 3 — Unsafe growth controls + +**Priority:** P1 +**Estimated effort:** 2–4 hours. + +**Tasks:** + +1. Add `cargo geiger` or an equivalent unsafe inventory step. +2. Record the baseline unsafe count. +3. Fail CI if unsafe count increases unexpectedly, or at minimum surface it in + the PR as a diff signal. +4. Review whether `undocumented_unsafe_blocks` can be enforced through clippy + or another linter in this toolchain. + +**Deliverables:** + +- unsafe inventory workflow/report +- explicit policy on unsafe growth + +### Phase 4 — Move from advisory to blocking + +**Priority:** P1 +**Estimated effort:** 2–3 hours after rules stabilize. + +**Tasks:** + +1. Keep CodeQL and cargo-deny blocking. +2. Change only high-confidence Semgrep rules to blocking. +3. Leave broader “review-required” rules as informational if they remain noisy. +4. Document which classes are blockers and why. + +**Deliverables:** + +- clear blocking policy in CI +- reduced chance of engineers papering over the toolchain + +### Phase 5 — Link static findings to runtime security tests + +**Priority:** P2 +**Estimated effort:** 4–8 hours. + +**Tasks:** + +1. Add E2E tests for security-sensitive paths highlighted by SAST: + - RLS behavior + - SECURITY DEFINER trigger behavior + - low-privilege access to monitoring views / change buffers + - SQL injection attempts through API inputs +2. Map static rules to runtime coverage so a suppressed SAST finding still has a + compensating test where appropriate. + +**Deliverables:** + +- security-focused E2E coverage plan or implementation + +--- + +## Semgrep Rules Roadmap + +### Current rules + +| Rule ID | Purpose | Mode | +|--------|---------|------| +| `rust.spi.run.dynamic-format` | Flag dynamic SQL passed to `Spi::run` | Advisory | +| `rust.spi.query.dynamic-format` | Flag dynamic SQL passed to `Spi::get_*` | Advisory | +| `sql.security-definer.present` | Surface `SECURITY DEFINER` for review | Advisory | + +### Planned next rules + +| Category | Candidate rule | Value | +|----------|----------------|------| +| SQL quoting | Detect string interpolation into SPI SQL without quoting helper | High | +| Privilege context | `SECURITY DEFINER` without `SET search_path` | High | +| Role changes | `SET ROLE`, `RESET ROLE`, `row_security = off` | High | +| Unsafe docs | `unsafe` without nearby `SAFETY:` comment | Medium | +| Panics in SQL path | `unwrap()` / `expect()` / `panic!()` in non-test SQL-reachable code | Medium | +| GUC-sensitive SQL | direct `SET LOCAL work_mem`, `SET LOCAL row_security` review hooks | Medium | + +### False-positive strategy + +Do not attempt to make the first Semgrep rules perfectly precise. Instead: + +1. Start broad. +2. Triage actual hits. +3. Narrow only where noise is unacceptable. +4. Keep “review-required” rules informational instead of blocking. + +This is especially important because a PostgreSQL extension often needs dynamic +object names; the real question is whether they are safely quoted, not whether +dynamic SQL exists at all. + +--- + +## Unsafe / FFI Review Strategy + +Unsafe code in this repo is legitimate but high-impact. The policy should be: + +1. Every unsafe block has a `SAFETY:` comment. +2. Unsafe growth is visible in PRs. +3. New unsafe blocks are reviewed as security-relevant changes, not merely + implementation details. + +### Review checklist for unsafe additions + +- What Postgres invariant does this rely on? +- Is the pointer lifetime guaranteed by the surrounding callback/API? +- Can the same logic be expressed with a safe wrapper from `pgrx`? +- Does this code run in a background worker or user-triggered SQL path? +- What happens if Postgres returns null, stale, or unexpected node types? + +### Candidate automation + +- `cargo geiger` report in CI +- Semgrep rule for `unsafe {` without nearby `SAFETY:` +- code-review requirement for files containing new unsafe blocks + +--- + +## CI Integration Strategy + +The SAST system should respect the repo's existing fast/slow pyramid. + +### Recommended posture + +| Workflow | Trigger | Blocking? | Rationale | +|----------|---------|-----------|-----------| +| CodeQL | PR, push to main, weekly | Yes | High-value, low-maintenance | +| cargo-deny | PR, dependency changes, weekly | Yes | Good supply-chain hygiene | +| cargo-audit | Keep existing | Yes | Advisory DB coverage already in place | +| Semgrep | PR, push, weekly | No initially | Needs tuning first | + +### Why Semgrep starts advisory + +The repo has many legitimate dynamic SPI SQL callsites. If Semgrep is blocking +immediately, engineers will either disable it or add broad suppressions. An +advisory phase preserves signal while the rules are tuned. + +--- + +## Finding Triage and Policy + +### Severity mapping + +| Finding type | Target policy | +|-------------|---------------| +| Known vulnerable dependency | Block merge | +| Unknown crate source / banned source | Block merge | +| High-confidence CodeQL issue | Block merge unless justified | +| `SECURITY DEFINER` without safe context | Block once rule is tuned | +| Generic dynamic SPI SQL | Advisory unless unsafe interpolation is proven | +| Unsafe block increase | Advisory first, then block if policy is accepted | + +### Triage rules + +1. Suppressions must be narrow and justified. +2. A suppression should explain why the pattern is safe in PostgreSQL terms, + not only why the tool is noisy. +3. When a noisy pattern repeats, refine the rule rather than scattering ignores. + +--- + +## Validation Plan + +### Static validation + +After changes to the SAST config itself: + +```bash +just fmt +just lint +``` + +### Workflow validation + +Manual dispatch after merge or on the PR branch: + +1. Run `CodeQL` and confirm SARIF upload succeeds. +2. Run `Dependency policy` and confirm `cargo deny` resolves the graph cleanly. +3. Run `Semgrep` and capture the first findings set. + +### Triage validation + +For each Semgrep rule added: + +1. Confirm it catches at least one real representative pattern. +2. Confirm it does not flood obviously safe code paths with unusable noise. +3. Confirm the remediation message is specific enough for reviewers. + +--- + +## Success Criteria + +This plan is successful when all of the following are true: + +1. Dependency advisories and bad sources are enforced automatically. +2. CodeQL findings appear in GitHub Security for Rust changes. +3. Security-sensitive SPI SQL and privilege-context patterns are surfaced in PRs. +4. Unsafe growth is visible and reviewable. +5. Engineers trust the signal enough to keep the checks enabled. + +### Phase completion criteria + +| Phase | Done when | +|------|-----------| +| Phase 0 | Workflows merged and repo lint remains clean | +| Phase 1 | First findings triaged and Semgrep noise reduced | +| Phase 2 | High-value extension-specific rules added | +| Phase 3 | Unsafe inventory visible in CI | +| Phase 4 | High-confidence rules become blocking | +| Phase 5 | Static findings mapped to runtime security tests | + +--- + +## Implementation Checklist + +### Done on this branch + +- [x] Add CodeQL workflow for Rust +- [x] Add `cargo deny` policy workflow +- [x] Add `deny.toml` +- [x] Add initial Semgrep workflow +- [x] Add starter Semgrep ruleset +- [x] Validate the repo still passes `just lint` + +### Next implementation tasks + +- [ ] Run all new workflows once and capture findings +- [ ] Triage first Semgrep and CodeQL results +- [ ] Add `SECURITY DEFINER` + `search_path` pairing rule +- [ ] Add `row_security` / `SET ROLE` rules +- [ ] Add unsafe inventory reporting +- [ ] Decide which Semgrep rules should become blocking +- [ ] Add documentation for suppression / false-positive handling + +--- + +## Open Questions + +1. Should Semgrep eventually fail PRs, or remain advisory with only select + high-confidence rules blocking? +2. Do we want unsafe growth to hard-fail CI, or just surface as a review signal? +3. Should `cargo audit` remain separate from `cargo deny`, or be consolidated + once confidence in `cargo deny` is established? +4. Do we want a dedicated security-review checklist for PRs that touch: + - `Spi::run(&format!(...))` + - `unsafe` + - `SECURITY DEFINER` + - `row_security` + - background-worker logic? +5. Should a later phase add CodeQL custom queries for pgrx/PostgreSQL-specific + APIs, or is Semgrep sufficient for that layer? + +--- + +## Recommendation + +Merge Phase 0 now. Then treat the first real workflow run as a tuning exercise, +not a pass/fail judgment on the codebase. The highest-value next move is to +reduce Semgrep noise while expanding checks around `SECURITY DEFINER`, +`search_path`, and `row_security`, because those are the extension-specific +security hazards most likely to evade generic Rust tooling. \ No newline at end of file