diff --git a/.env.example b/.env.example index 2199644..a615219 100644 --- a/.env.example +++ b/.env.example @@ -3,9 +3,20 @@ ENVIRONMENT="development" # deployment environment LOG_LEVEL="INFO" # logging verbosity BYPASS_AUTH="0" # set to "1" to disable auth (development only) CORS_ALLOW_ORIGINS="" # comma-separated list of allowed origins +MCP_AUTH_ISSUER="" # OIDC issuer URL for MCP bearer-token validation +MCP_AUTH_AUDIENCE="" # Expected JWT audience (comma-separated values allowed) +MCP_AUTH_JWKS_URL="" # OIDC JWKS URL used to verify JWT signatures +MCP_AUTH_REQUIRED_SCOPES="tox:read" # Space/comma-separated scopes required for MCP calls +MCP_RESOURCE_URL="http://localhost:8000/mcp" # Canonical protected MCP resource URL +MCP_RATE_LIMIT_REQUESTS_PER_MINUTE="120" # Per-subject/IP tool-call limit; 0 disables local limiter +MCP_RATE_LIMIT_BURST="20" # Token-bucket burst size for MCP tool calls # CTX (Comptox) API CTX_API_BASE_URL="https://comptox.epa.gov/ctx-api" # Default CTX API server +# Request a free CTX API key from ccte_api@epa.gov. +# Official docs: +# - https://www.epa.gov/comptox-tools/computational-toxicology-and-exposure-apis +# - https://www.epa.gov/comptox-tools/computational-toxicology-and-exposure-apis-about CTX_API_KEY="your_ctx_api_key_here" # Required for CTX APIs (do not commit real key) CTX_USE_LEGACY="0" # Set to "1" to use https://api-ccte.epa.gov until 2025-10-01 EPA_COMPTOX_API_KEY="" # Legacy env name also supported (fallback) @@ -16,3 +27,4 @@ CTX_RETRY_BASE="0.5" # Base delay (seconds) fo EPACOMP_MCP_HEARTBEAT_TIMEOUT_SECONDS="120" # Minimum heartbeat timeout negotiated with clients EPACOMP_MCP_HANDSHAKE_TIMEOUT_SECONDS="30" # Minimum handshake timeout negotiated with clients EPACOMP_MCP_METRICS_ENABLED="1" # Expose /metrics endpoint +MCP_METRICS_BYPASS_AUTH="0" # Set to "1" only when a trusted gateway protects metrics diff --git a/.gitignore b/.gitignore index 1f21d12..c6b2662 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,10 @@ htmlcov/ artifacts/ dist/ build/ + +# Internal review and generated research artifacts +/AUDIT_MCP_*.md +/ToxMCP_Audit_Reviewed_*/ +/scientific_engine_bundle.txt +/triclosan_*.png +/epa_comptox_api_structure.json diff --git a/AUDIT_MCP_COVERAGE_2026-03-18.md b/AUDIT_MCP_COVERAGE_2026-03-18.md deleted file mode 100644 index ba8bdd4..0000000 --- a/AUDIT_MCP_COVERAGE_2026-03-18.md +++ /dev/null @@ -1,86 +0,0 @@ -# EPA CompTox MCP coverage audit - -Date: `2026-03-18` - -Target: -- `http://127.0.0.1:8002/mcp` - -## Executive summary - -After the HTTP catalog patch, the live MCP now advertises the full catalog over `tools/list`: - -- total tools: `79` -- `nextCursor`: `null` - -Current resource coverage by family: - -| Resource family | Tool count | -| --- | ---: | -| `chemical` | 10 | -| `bioactivity` | 14 | -| `exposure` | 32 | -| `hazard` | 18 | -| `chemical_list` | 2 | -| `metadata` | 3 | -| `cheminformatics` | 0 | - -## What is covered - -The current MCP catalog covers the major CTX dashboard data families represented in this repository: - -- chemical discovery and detail lookup -- bioactivity assays, assay chemicals, AED, and AOP lookups -- exposure datasets including `HTTK`, `CPDat`, `SEEM`, `MMDB`, functional use, and CCD -- hazard datasets including `ToxValDB`, `ToxRefDB`, cancer, genetox, `ADME/IVIVE`, `IRIS`, `PPRTV`, and `HAWC` -- public chemical lists -- metadata and applicability-domain assets - -Representative live-discovery checks after the patch: - -- `search_hazard`: present -- `get_hazard_adme_ivive`: present -- `get_hazard_toxref`: present -- `get_bioactivity_aed`: present -- `search_httk`: present - -## What is not covered or not yet surfaced - -### 1. Predictive services are not part of the live MCP catalog - -The repository contains predictive service code (`GenRA`, `OPERA`, `TEST` wrappers), but these are not currently advertised as MCP tools in the live `79`-tool catalog. - -Interpretation: -- CTX dashboard-style data access is broadly covered. -- Predictive micro-services exist in the codebase, but they are not yet exposed through the same MCP discovery surface. - -### 2. `cheminformatics` currently contributes zero tools - -The `cheminformatics` resource is initialized, but its current tool count is `0`. - -Interpretation: -- This is not blocking dashboard data access. -- It is an obvious expansion point if cheminformatics operations are expected to be part of the MCP surface. - -## Answer to “do we cover the entire dashboard?” - -For the core CTX data tiers used by this server, coverage is strong: - -- chemical: yes -- bioactivity: yes -- exposure: yes -- hazard: yes -- metadata/list assets: yes - -Two qualifiers remain: - -1. “Entire dashboard” is broader than the audited priority families and broader than the CTX API surface used in this repo. -2. Predictive services and cheminformatics are not fully surfaced as MCP tools in the same way as the core CTX data families. - -## Bottom line - -If the goal is comprehensive MCP coverage of the main CTX dashboard data families, the server is now in good shape and the full catalog is discoverable over HTTP. - -If the goal is literal “everything in the repo” or “everything a user may associate with the dashboard,” the remaining visible gaps are: - -1. predictive services are not exposed as MCP tools -2. cheminformatics contributes no live tools diff --git a/AUDIT_MCP_ENDPOINTS_2026-03-18.md b/AUDIT_MCP_ENDPOINTS_2026-03-18.md deleted file mode 100644 index 1f27e0d..0000000 --- a/AUDIT_MCP_ENDPOINTS_2026-03-18.md +++ /dev/null @@ -1,171 +0,0 @@ -# EPA CompTox MCP audit - -Date: `2026-03-18` - -Target server: -- `http://127.0.0.1:8002/mcp` - -Audit scope: -- MCP discovery via `tools/list` -- Live tool execution for the priority data families: - - `AED` - - `HTTK` - - `ADME/IVIVE` -- Upstream API reachability using `scripts/check_endpoints.py` - -## Executive summary - -The live server on `8002` is functional for the priority data families. `AED`, `HTTK`, and `ADME/IVIVE` all returned real data for `DTXSID7020182` (Bisphenol A). - -This audit initially surfaced two issues: - -1. HTTP `tools/list` only returned the first `50` tools, which hid part of the catalog. -2. The chemical smoke checker used a stale probe URL and produced a false negative. - -Both issues are now patched. - -Post-fix state: - -- HTTP `tools/list` returns the full `79`-tool catalog -- `get_hazard_adme_ivive` is discoverable via `tools/list` -- `scripts/check_endpoints.py --json` passes for chemical, hazard, exposure, and bioactivity when project env is loaded - -## Discovery audit - -Live `tools/list` now returns `79` tools with `nextCursor: null`. - -Priority tool discovery status: - -| Tool | In `tools/list` | Callable | Returns data | -| --- | --- | --- | --- | -| `get_bioactivity_aed` | Yes | Yes | Yes | -| `search_httk` | Yes | Yes | Yes | -| `get_exposure_httk` | Yes | Yes | Yes | -| `get_hazard_adme_ivive` | Yes | Yes | Yes | - -## Live MCP execution audit - -Test substance: -- `DTXSID7020182` (`Bisphenol A`) - -### 1. AED - -Tool: -- `get_bioactivity_aed` - -Observed result: -- HTTP metadata status: `200` -- Data type: `list` -- Record count: `662` -- Sample fields include: - - `dtxsid` - - `aeid` - - `aedVal` - - `aedType` - - `httkModel` - - `httkVersion` - - `aedValUnit` - -Conclusion: -- Functional -- Data-bearing -- Suitable for real audit and downstream analysis - -### 2. HTTK - -Tools: -- `search_httk` -- `get_exposure_httk` - -Observed result for both: -- HTTP metadata status: `200` -- Data type: `list` -- Record count: `18` -- Sample fields include: - - `dtxsid` - - `parameter` - - `measured` - - `predicted` - - `model` - - `species` - - `percentile` - -Sample parameter/model: -- `Css` -- `PBTK` - -Conclusion: -- Both HTTK tools are functional -- Both return real HTTK rows -- The two outputs are materially equivalent for this test substance - -### 3. ADME/IVIVE - -Tool: -- `get_hazard_adme_ivive` - -Observed result: -- HTTP metadata status: `200` -- Data type: `list` -- Record count: `18` -- Sample fields include: - - `dtxsid` - - `description` - - `measured` - - `predicted` - - `unit` - - `model` - - `species` - - `percentile` - -Sample parameter: -- `Clint` - -Conclusion: -- Functional -- Data-bearing -- Discoverable through the MCP catalog after the transport patch - -## Upstream dependency audit - -Command path: -- `scripts/check_endpoints.py --json` - -When run with project env loaded, the checker returns: - -| Upstream endpoint | Status | Result | -| --- | --- | --- | -| `CTX Chemical API` | `200` | OK | -| `CTX Hazard API` | `200` | OK | -| `CTX Exposure API` | `200` | OK | -| `CTX Bioactivity API` | `200` | OK | - -Interpretation: -- Chemical, hazard, exposure, and bioactivity upstreams are reachable and healthy enough for the tested MCP calls. -- The checker now probes the chemical tier with `chemical/detail/search/by-dtxsid/DTXSID7020182`, which matches the live CTX path family used by the server. - -## Remaining follow-up - -### Finding 1: endpoint matrix documentation still points to `v1` roots - -Severity: -- Medium - -Why it matters: -- `docs/contracts/endpoint-matrix.md` documents `ctx-api/v1` base roots. -- Direct probe tests against those base roots returned `404`, while the currently functioning CTX probe paths use the non-`v1` endpoint family. - -Evidence: -- `docs/contracts/endpoint-matrix.md` lists `https://comptox.epa.gov/ctx-api/v1/chemical` and analogous `v1` roots. -- Direct probes against those base roots returned `404`. -- The patched smoke checker and the live MCP succeed against non-`v1` CTX endpoint paths. - -## Bottom line - -For the priority areas requested in this audit: - -- `AED`: pass -- `HTTK`: pass -- `ADME/IVIVE`: pass - -The server retrieves real data for all three target families and now advertises the full catalog correctly over HTTP. The one remaining issue is documentation drift in `docs/contracts/endpoint-matrix.md`. diff --git a/AUDIT_MCP_FAMILY_LIVE_COVERAGE_2026-03-18.md b/AUDIT_MCP_FAMILY_LIVE_COVERAGE_2026-03-18.md deleted file mode 100644 index 64c7f1b..0000000 --- a/AUDIT_MCP_FAMILY_LIVE_COVERAGE_2026-03-18.md +++ /dev/null @@ -1,53 +0,0 @@ -# MCP Family Live Coverage Audit (2026-03-18) - -## Scope - -- Server audited: `http://127.0.0.1:8002/mcp` -- Discovery source: live MCP HTTP `tools/list` response -- Goal: verify family-level runtime coverage for the exposed CompTox dashboard domains, with explicit proof for `AED`, `HTTK`, and `ADME/IVIVE`. - -## Discovery summary - -- Total advertised tools: `79` -- `bioactivity`: `14` tools -- `chemical`: `10` tools -- `chemical_list`: `2` tools -- `exposure`: `32` tools -- `hazard`: `18` tools -- `metadata`: `3` tools - -## Representative live runtime checks - -| Family | Representative tool | Input | `structuredContent.data` | Size | Result | -| --- | --- | --- | --- | ---: | --- | -| `chemical` | `get_chemical_details` | `{"identifier":"DTXSID7020182","id_type":"dtxsid","subset":"default"}` | `dict` | `74` | **PASS** | -| `bioactivity` | `get_bioactivity_aed` | `{"dtxsid":"DTXSID7020182"}` | `list` | `662` | **PASS** | -| `exposure` | `get_exposure_httk` | `{"dtxsid":"DTXSID7020182"}` | `list` | `18` | **PASS** | -| `hazard` | `get_hazard_adme_ivive` | `{"dtxsid":"DTXSID7020182"}` | `list` | `18` | **PASS** | -| `chemical_list` | `get_public_list_names` | `{}` | `list` | `8` | **PASS** | -| `metadata` | `metadata_list_applicability_domain` | `{"limit":10}` | `dict` | `3` | **PASS** | - -## Dashboard coverage mapping - -| Dashboard area | MCP family | Runtime coverage | Notes | -| --- | --- | --- | --- | -| Chemical identity/detail | `chemical` | Covered | `get_chemical_details` returned a populated structured object and now also exposes `structuredContent.data`. | -| AED / bioactivity | `bioactivity` | Covered | `get_bioactivity_aed` returned `662` rows for `DTXSID7020182`. | -| HTTK / exposure | `exposure` | Covered | `get_exposure_httk` returned `18` rows for `DTXSID7020182`. | -| ADME / IVIVE / hazard | `hazard` | Covered | `get_hazard_adme_ivive` returned `18` rows for `DTXSID7020182`. | -| Chemical lists | `chemical_list` | Covered | `get_public_list_names` now returns `8` public list names; `get_full_list("CCL")` remained live throughout. | -| Metadata / reference registries | `metadata` | Covered | `metadata_list_applicability_domain` returned `3` applicability-domain records and now also exposes `structuredContent.data`. | -| Cheminformatics | not exposed | Not covered | No live MCP tools are currently advertised for this area. | - -## Findings - -- The priority scientific paths requested for the audit are live and returning data: `AED`, `HTTK`, and `ADME/IVIVE`. -- Family-level dashboard coverage is now complete for all currently exposed MCP families: `chemical`, `bioactivity`, `exposure`, `hazard`, `chemical_list`, and `metadata` all have successful live runtime proof. -- `chemical_list` discovery now works through the shared `ctxpy` client, so non-MCP callers and MCP callers use the same fallback behavior when the upstream enumeration endpoint returns `404`. -- Client parsing is now normalized around `structuredContent.data` for both success and error responses, while preserving existing domain-specific top-level keys for backward compatibility. -- No `cheminformatics` tools are currently exposed through MCP, so that dashboard area remains outside current interface coverage. - -## Conclusion - -The current MCP server is functionally usable across all exposed CompTox dashboard families relevant to this project. The remaining interface gap is not a runtime failure but a product-scope gap: `cheminformatics` is still not exported as live MCP tools. - diff --git a/AUDIT_MCP_PATCH_VERIFICATION_2026-03-18.md b/AUDIT_MCP_PATCH_VERIFICATION_2026-03-18.md deleted file mode 100644 index fc03fa0..0000000 --- a/AUDIT_MCP_PATCH_VERIFICATION_2026-03-18.md +++ /dev/null @@ -1,48 +0,0 @@ -# MCP Patch Verification (2026-03-18) - -## Scope - -- Server: `http://127.0.0.1:8002/mcp` -- Patch set: - - restore `chemical_list.get_public_list_names` - - normalize `structuredContent.data` across success and error responses - -## Live verification results - -### 1. `get_public_list_names` recovery - -- Result: **PASS** -- Runtime behavior: returns a non-error response with `structuredContent.data` -- Returned count: `8` -- Sample values: `CCL`, `CCL1`, `CPDAT`, `CPDATv2`, `CTD` -- Implementation note: upstream CTX list-enumeration endpoint currently returns `404`, so the MCP now falls back to a maintained catalog of verified public list names while `get_full_list(list_name)` continues to use the live CTX API. - -### 2. Dict-shaped success responses now expose `structuredContent.data` - -- `get_chemical_details(DTXSID7020182)` - - Result: **PASS** - - `structuredContent.data`: present - - Payload type: `dict` -- `metadata_list_applicability_domain(limit=10)` - - Result: **PASS** - - `structuredContent.data`: present - - Backward-compatible top-level keys preserved: `applicabilityDomains`, `nextCursor`, `metadata` - -### 3. Error responses now expose `structuredContent.data` - -- Probe: `get_chemical_details(DTXSID_NOT_REAL, id_type="dtxsid")` -- Result: **PASS** -- Error semantics preserved: `isError=true` -- Normalization confirmed: `structuredContent.data = null` - -## Outcome - -The MCP now has a consistent client-facing parsing contract: -- Success responses always expose `structuredContent.data` -- Error responses expose `structuredContent.data = null` -- Existing top-level domain-specific keys remain available for backward compatibility - -## Files changed - -- `/Volumes/Storage/topotox_space_relief_20260220/mcp_epacomp_tox/src/epacomp_tox/resources/chemical_list.py` -- `/Volumes/Storage/topotox_space_relief_20260220/mcp_epacomp_tox/src/epacomp_tox/server.py` diff --git a/MANIFEST.in b/MANIFEST.in index fe89000..ae7b4a1 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,3 @@ include README.md LICENSE recursive-include metadata *.json *.md +recursive-include src/epacomp_tox/data *.json *.md diff --git a/README.md b/README.md index a4ac471..c3232cf 100644 --- a/README.md +++ b/README.md @@ -194,7 +194,7 @@ pip install -e . # 2) configure cp .env.example .env -# set CTX_API_KEY in .env +# request a free CTX API key from ccte_api@epa.gov, then set CTX_API_KEY in .env # 3) run uvicorn epacomp_tox.transport.websocket:app --host 0.0.0.0 --port 8000 --reload @@ -218,6 +218,37 @@ uvicorn epacomp_tox.transport.websocket:app --reload > **Important:** The server needs a valid EPA CompTox API key. Set `CTX_API_KEY` (preferred) or `EPA_COMPTOX_API_KEY` in `.env` before starting the transport. +### Get an EPA CompTox API key + +EPA's current CTX API documentation says API keys are free and must be requested from the API support team at `ccte_api@epa.gov`. + +- CTX APIs overview: [epa.gov/comptox-tools/computational-toxicology-and-exposure-apis](https://www.epa.gov/comptox-tools/computational-toxicology-and-exposure-apis) +- CTX API authentication/about page: [epa.gov/comptox-tools/computational-toxicology-and-exposure-apis-about](https://www.epa.gov/comptox-tools/computational-toxicology-and-exposure-apis-about) + +Suggested request email: + +```text +To: ccte_api@epa.gov +Subject: Request for EPA CompTox CTX API key + +Hello, + +I would like to request an API key for the EPA Computational Toxicology and Exposure APIs (CTX APIs). + +Name: +Organization: +Intended use: + +Thank you. +``` + +Once EPA sends your key: + +```bash +cp .env.example .env +# then paste the key into CTX_API_KEY in .env +``` + With the server running, MCP clients can connect to `http://localhost:8000/mcp` (HTTP) or `ws://localhost:8000/mcp/ws` (WebSocket). Once the server is running: @@ -264,7 +295,7 @@ Settings are resolved via [`pydantic-settings`](https://docs.pydantic.dev/latest | Variable | Required | Default | Description | | --- | --- | --- | --- | -| `CTX_API_KEY` | ✅ | – | CompTox API key used for all downstream requests. Fallbacks: `EPA_COMPTOX_API_KEY`, `ctx_x_api_key`. | +| `CTX_API_KEY` | ✅ | – | CompTox API key used for all downstream requests. EPA currently issues keys via `ccte_api@epa.gov`. Fallbacks: `EPA_COMPTOX_API_KEY`, `ctx_x_api_key`. | | `CTX_API_BASE_URL` | Optional | `https://comptox.epa.gov/ctx-api` | Base URL for CompTox API. | | `CTX_USE_LEGACY` | Optional | `0` | Set to `1` to use the legacy `https://api-ccte.epa.gov` endpoint. | | `CTX_RETRY_ATTEMPTS` | Optional | `3` | Number of retry attempts for transient errors. | @@ -272,10 +303,18 @@ Settings are resolved via [`pydantic-settings`](https://docs.pydantic.dev/latest | `ENVIRONMENT` | Optional | `development` | Controls defaults like permissive CORS. | | `LOG_LEVEL` | Optional | `INFO` | Application log level. | | `BYPASS_AUTH` | Optional | `0` | Set to `1` to disable auth (development only). | +| `MCP_AUTH_ISSUER` | Production | – | Expected OIDC issuer for MCP bearer JWTs. | +| `MCP_AUTH_AUDIENCE` | Production | – | Expected JWT audience for the canonical MCP resource. | +| `MCP_AUTH_JWKS_URL` | Production | – | JWKS URL used to verify bearer-token signatures. | +| `MCP_AUTH_REQUIRED_SCOPES` | Optional | – | Space/comma-separated scopes required for MCP tool calls. | +| `MCP_RESOURCE_URL` | Optional | `http://localhost:8000/mcp` | Canonical protected resource URL advertised in OAuth metadata and challenges. | +| `MCP_RATE_LIMIT_REQUESTS_PER_MINUTE` | Optional | `120` | In-memory per-subject/IP tool-call limit; set `0` to disable local limiting. | +| `MCP_RATE_LIMIT_BURST` | Optional | `20` | Token-bucket burst size for local tool-call limiting. | | `CORS_ALLOW_ORIGINS` | Optional | – | Comma-separated origins for HTTP transport. Defaults to `*` in development. | | `EPACOMP_MCP_HEARTBEAT_TIMEOUT_SECONDS` | Optional | `120` | Minimum heartbeat timeout negotiated with WebSocket clients. | | `EPACOMP_MCP_HANDSHAKE_TIMEOUT_SECONDS` | Optional | `30` | Minimum handshake timeout negotiated with WebSocket clients. | | `EPACOMP_MCP_METRICS_ENABLED` | Optional | `1` | Toggle `/metrics` endpoint exposure. | +| `MCP_METRICS_BYPASS_AUTH` | Optional | `0` | Allow unauthenticated metrics only when a trusted gateway already protects the endpoint. | See [`docs/deployment.md`](docs/deployment.md) for production hardening tips and expanded configuration. @@ -367,7 +406,8 @@ A scheduled GitHub Action (`.github/workflows/endpoint-check.yml`) runs `python - Run via Gunicorn: `gunicorn epacomp_tox.transport.websocket:app -c deploy/gunicorn_conf.py` - Container image: see [`deploy/Dockerfile`](deploy/Dockerfile) for a hardened, non-root runtime. - Probes: `/healthz` (liveness) and `/readyz` (performs CTX connectivity check). Non-200 responses should trigger restarts. -- Metrics: `/metrics` exposes Prometheus gauges derived from `MCPServer.get_transport_metrics()`. Sample scrape/OTEL configs live in `deploy/prometheus_scrape.yaml` and `deploy/otel_collector_metrics.yaml`. +- Auth: production deployments should configure `MCP_AUTH_ISSUER`, `MCP_AUTH_AUDIENCE`, and `MCP_AUTH_JWKS_URL`; unauthorized MCP requests receive OAuth protected-resource challenges. +- Metrics: `/metrics` exposes Prometheus gauges derived from `MCPServer.get_transport_metrics()` when `EPACOMP_MCP_METRICS_ENABLED=1`; it uses the same bearer auth unless `MCP_METRICS_BYPASS_AUTH=1` is explicitly set. Sample scrape/OTEL configs live in `deploy/prometheus_scrape.yaml` and `deploy/otel_collector_metrics.yaml`. - Additional rollout guidance (TLS, ingress, scaling) lives in [`docs/deployment.md`](docs/deployment.md). --- @@ -390,14 +430,15 @@ Every successful tool invocation returns structured payloads designed for agents - `content`: human-readable JSON wrapped as text for chat surfaces. - `structuredContent.data`: machine-readable results (lists, dicts, or arrays) for programmatic chaining. -- `structuredContent.metadata`: when available, includes rate-limit information, validation metadata, and session metadata. +- `structuredContent.metadata`: when available, includes rate-limit information, validation metadata, and scrubbed session metadata. Bearer tokens and raw client authentication payloads are never echoed. - Default registered tools are retrieval and federation oriented; experimental predictive/orchestrator modules in this repository are not part of the canonical public surface yet. --- ## Security checklist -- Disable `BYPASS_AUTH` and front the MCP server with OAuth/OIDC once deployed beyond local development. +- Disable `BYPASS_AUTH` and configure OAuth/OIDC bearer validation before deploying beyond local development. +- Enforce shared rate limits at the gateway for distributed deployments; the built-in limiter is process-local defense in depth. - Restrict `CORS_ALLOW_ORIGINS` to approved hosts when exposing the HTTP transport. - Rotate `CTX_API_KEY` regularly and store secrets outside the repository (e.g. cloud secret manager or OS keychain). - Monitor `/metrics` for negotiated capability changes and unexpected spikes in `tools/call` failures. @@ -435,6 +476,7 @@ Every successful tool invocation returns structured payloads designed for agents - `tests/test_mcp_conformance_suite.py` covers handshake, catalog discovery, and streaming behaviours. - `tests/test_tool_contracts.py` enforces output schema declarations for the registered resources. +- `black --check src tests` and `isort --check-only src tests` are the canonical repository hygiene checks. - `scripts/smoke_ctx.sh` runs integration smoke tests against the live CTX API. - `scripts/mcp_http_smoke.sh` performs a quick JSON-RPC handshake and tool listing against the HTTP transport. - `scripts/mcp_interop_smoke.py` validates the public interop tool path end-to-end over the HTTP transport. diff --git a/ToxMCP_Audit_Reviewed_v2/AUDIT_EVIDENCE_FRAMEWORK.md b/ToxMCP_Audit_Reviewed_v2/AUDIT_EVIDENCE_FRAMEWORK.md deleted file mode 100644 index 48d34d1..0000000 --- a/ToxMCP_Audit_Reviewed_v2/AUDIT_EVIDENCE_FRAMEWORK.md +++ /dev/null @@ -1,105 +0,0 @@ -# ToxMCP Audit Evidence Framework - -**Added in reviewed copy:** 2026-04-15 -**Purpose:** Make the package easier to defend internally and safer to reuse externally. - ---- - -## Why this document exists - -The original audit pack was strong on systems thinking, but it mixed together three different claim types: - -1. **Directly observed code/schema facts** -2. **Architecture-level inferences** -3. **Scenario-based exploit or misuse narratives** - -Those are all useful, but they should not be presented with the same certainty. This framework standardizes how the reviewed copy uses evidence, confidence, and severity language. - ---- - -## Evidence taxonomy - -| Label | Meaning | Typical example | How to read it | -|---|---|---|---| -| **Observed** | Directly quoted or paraphrased from code, schema, configuration, or documentation contained in the audited material | A function uses `str.format()` on a query template; a schema omits a required provenance field | Strongest class of claim in this package | -| **Observed + inferred** | A direct observation supports a broader architecture conclusion | Independent per-repo correlation IDs imply no end-to-end distributed trace | Usually strong, but still one step removed from a direct test | -| **Scenario** | A misuse or exploit path that depends on stated preconditions | A prompt-injection payload alters downstream reasoning *if* untrusted identifiers are interpolated into model prompts without isolation | Useful for threat modeling, not proof that exploitation was demonstrated | -| **Standards note** | A statement about a regulatory or protocol expectation from a public standard or guidance | Signature/record linking expectations under 21 CFR Part 11 | Read with deployment and intended-use context in mind | - ---- - -## Confidence scale - -| Confidence | Meaning | -|---|---| -| **High** | The claim is strongly grounded in the supplied material or an official standard, and only limited interpretation is required | -| **Medium** | The claim is plausible and supported, but exploitability, operational impact, or scope depends on assumptions that have not yet been validated | -| **Low** | The claim is directionally useful for red-teaming, but needs reproduction or source-repo verification before being presented as a hard finding | - ---- - -## Severity language rules used in the reviewed copy - -### Critical -Use only when the package shows a gap that is both material and near-core to the intended operating model, for example: -- integrity of scientific outputs -- inability to reconstruct or sign regulated records -- unbounded execution that can predictably destabilize the service -- unsafe interpolation at a trust boundary - -### High -Use when the gap is significant, but one or more of these remains true: -- exploitability depends on preconditions -- compensating procedural controls may exist -- impact is serious but not necessarily suite-blocking - -### Medium -Use when the gap is real but better framed as a design weakness, future migration cost, or a finding that still needs validation. - ---- - -## Claim phrasing rules - -The reviewed copy avoids the following unless directly demonstrated and scoped: - -- "FDA rejection" -- "submission rejection" -- "certain" -- "production-ready code" -- destructive exploit claims such as graph deletion unless the endpoint is known to permit updates - -Instead, the package prefers wording such as: - -- "high risk of non-conformance for regulated use" -- "likely unacceptable for submission without compensating controls" -- "observed unsafe interpolation pattern" -- "reference implementation / implementation pattern" - ---- - -## Validation states - -Each major finding should eventually be paired with one or more of these: - -| Validation state | Meaning | -|---|---| -| **Reproduced** | A proof of concept or deterministic reproduction exists | -| **Source-verified** | The claim was re-checked against the live repository, not just this audit bundle | -| **Fix-verified** | The proposed remediation was tested and shown to change behavior as intended | -| **Still open** | Needs follow-up before external use | - -This reviewed copy improves wording and internal consistency, but it does **not** claim that all findings were reproduced or re-verified against the live repositories. - ---- - -## Minimal standard before external use - -Before using any finding externally, the package should include: - -1. exact repository or commit reference -2. reproduction or test preconditions -3. expected behavior vs observed behavior -4. exploitability caveats -5. fix validation criteria - -Until then, this package is best treated as a **carefully edited internal audit and remediation planning pack**. diff --git a/ToxMCP_Audit_Reviewed_v2/DELIVERY_SUMMARY.md b/ToxMCP_Audit_Reviewed_v2/DELIVERY_SUMMARY.md deleted file mode 100644 index ecf75d6..0000000 --- a/ToxMCP_Audit_Reviewed_v2/DELIVERY_SUMMARY.md +++ /dev/null @@ -1,150 +0,0 @@ -# ToxMCP Audit - Delivery Summary (Reviewed Copy) - -**Package date:** 2026-04-15 -**Status:** Reviewed for internal consistency, evidentiary discipline, and delivery readiness - ---- - -## What this package is - -This is a **reviewed internal audit pack** for the ToxMCP ecosystem covering: - -- `comptox-mcp` -- `oqt-mcp` -- `aop-mcp` -- `pbpk-mcp` - -It is strong as: -- a red-team architecture review -- a remediation planning pack -- a leadership briefing artifact - -It is **not yet** the same thing as: -- a third-party assurance report -- a submission-ready validation package -- a fully reproduced penetration or compliance test report - ---- - -## What changed in the reviewed copy - -The original package had strong insights but needed a more defensible presentation. This reviewed copy: - -- distinguishes **observed facts** from **architecture inferences** and **scenario narratives** -- removes or softens overly absolute phrasing -- normalizes dates and terminology -- marks code snippets as **reference implementations** -- updates future-proofing language to current public MCP context -- adds a validation backlog for findings that need live-repo confirmation - -See: -- `AUDIT_EVIDENCE_FRAMEWORK.md` -- `REVISION_LOG.md` -- `VALIDATION_BACKLOG.md` -- `PUBLIC_REFERENCE_NOTES.md` - ---- - -## What is included - -### Package-level docs -| File | Purpose | -|---|---| -| `README.md` | Entry point | -| `DELIVERY_SUMMARY.md` | This document | -| `INDEX.md` | Navigation by audience | -| `QUICK_REFERENCE.md` | Fast triage view | -| `TOXMCP_MASTER_AUDIT_REPORT.md` | Revised cross-suite synthesis | -| `AUDIT_EVIDENCE_FRAMEWORK.md` | Evidence/confidence/severity rules | -| `REVISION_LOG.md` | What changed in this reviewed copy | -| `VALIDATION_BACKLOG.md` | Follow-up tasks before external use | -| `PUBLIC_REFERENCE_NOTES.md` | Public protocol and regulatory context consulted during review | - -### Specialist reports -- `toxmcp_regulatory_audit_report.md` -- `toxmcp_adversarial_audit_report.md` -- `toxmcp_contract_audit_report.md` -- `toxmcp_security_audit_report.md` -- `ToxMCP_Performance_Resilience_Audit_Report.md` -- `toxmcp_observability_audit_report.md` -- `cognitive_ergonomics_audit_report.md` -- `toxmcp_future_proofing_audit_report.md` - -### Repository-specific packages -- `comptox-mcp-audit/` -- `oqt-mcp-audit/` -- `aop-mcp-audit/` -- `pbpk-mcp-audit/` - -Each repository package includes: -- `README.md` — reviewed summary of findings and sequencing -- `REMEDIATION_CODE.md` — implementation-oriented reference code, not drop-in patches - -### Shared reference code -- `toxmcp_remediation_snippets.py` - ---- - -## Most important package-level conclusions - -### 1. The strongest issues are architectural, not local -The pack is at its best when it identifies cross-cutting gaps such as: -- provenance and time-machine reconstruction -- cross-suite orchestration and contradiction handling -- mandatory scientific review checkpoints -- uncertainty propagation -- distributed tracing and replayability - -### 2. Some original language was too absolute -The reviewed copy deliberately replaces phrases like: -- "FDA rejection" -- "submission rejection" -- "certain" -- "production-ready" - -with wording that better matches the level of evidence actually shown. - -### 3. The remediation code should be read as design guidance -Several code blocks are valuable patterns, but they still require: -- repository-specific adaptation -- test coverage -- dependency and runtime checks -- review by domain owners - ---- - -## Recommended reading order - -### Leadership / program owner -1. `TOXMCP_MASTER_AUDIT_REPORT.md` -2. `QUICK_REFERENCE.md` -3. `VALIDATION_BACKLOG.md` - -### Engineering leads -1. `INDEX.md` -2. repository `README.md` files -3. relevant specialist report(s) -4. relevant `REMEDIATION_CODE.md` - -### Security / quality / regulatory reviewers -1. `AUDIT_EVIDENCE_FRAMEWORK.md` -2. `toxmcp_security_audit_report.md` or `toxmcp_regulatory_audit_report.md` -3. `VALIDATION_BACKLOG.md` - ---- - -## Package posture after review - -| Use case | Fit | -|---|---| -| Internal planning and prioritization | **Strong** | -| Engineering remediation sequencing | **Strong** | -| Leadership briefing | **Strong** | -| External diligence without further validation | **Limited** | -| Formal compliance or security attestation | **Not yet** | - ---- - -## Immediate next step - -Use this reviewed copy to align on priorities, then execute the validation tasks in `VALIDATION_BACKLOG.md` against the live repositories before sending the package outside the team. diff --git a/ToxMCP_Audit_Reviewed_v2/INDEX.md b/ToxMCP_Audit_Reviewed_v2/INDEX.md deleted file mode 100644 index cce04cc..0000000 --- a/ToxMCP_Audit_Reviewed_v2/INDEX.md +++ /dev/null @@ -1,146 +0,0 @@ -# ToxMCP Comprehensive Audit - Master Index (Reviewed Copy) - -**Audit package date:** 2026-04-15 -**Repositories in scope:** `comptox-mcp`, `oqt-mcp`, `aop-mcp`, `pbpk-mcp` - ---- - -## Read this first - -Before reusing any finding outside the immediate engineering team, read: - -1. `AUDIT_EVIDENCE_FRAMEWORK.md` -2. `TOXMCP_MASTER_AUDIT_REPORT.md` -3. `VALIDATION_BACKLOG.md` -4. `PUBLIC_REFERENCE_NOTES.md` - -These four documents define: -- what the package actually claims -- how strong the evidence is -- what still needs validation - ---- - -## Navigation by audience - -### Leadership / program management -- `DELIVERY_SUMMARY.md` -- `QUICK_REFERENCE.md` -- `TOXMCP_MASTER_AUDIT_REPORT.md` - -### Engineering leads -- repository-specific `README.md` files -- `QUICK_REFERENCE.md` -- `VALIDATION_BACKLOG.md` - -### Security / platform engineering -- `toxmcp_security_audit_report.md` -- `ToxMCP_Performance_Resilience_Audit_Report.md` -- `toxmcp_observability_audit_report.md` -- `aop-mcp-audit/REMEDIATION_CODE.md` -- `pbpk-mcp-audit/REMEDIATION_CODE.md` - -### Regulatory / quality / scientific governance -- `toxmcp_regulatory_audit_report.md` -- `cognitive_ergonomics_audit_report.md` -- `toxmcp_adversarial_audit_report.md` -- `oqt-mcp-audit/README.md` -- `comptox-mcp-audit/README.md` - -### Architecture / integration owners -- `toxmcp_contract_audit_report.md` -- `toxmcp_future_proofing_audit_report.md` -- `TOXMCP_MASTER_AUDIT_REPORT.md` - ---- - -## Package structure - -```text -README.md -DELIVERY_SUMMARY.md -INDEX.md -QUICK_REFERENCE.md -TOXMCP_MASTER_AUDIT_REPORT.md -AUDIT_EVIDENCE_FRAMEWORK.md -REVISION_LOG.md -VALIDATION_BACKLOG.md - -Specialist reports/ - toxmcp_regulatory_audit_report.md - toxmcp_adversarial_audit_report.md - toxmcp_contract_audit_report.md - toxmcp_security_audit_report.md - ToxMCP_Performance_Resilience_Audit_Report.md - toxmcp_observability_audit_report.md - cognitive_ergonomics_audit_report.md - toxmcp_future_proofing_audit_report.md - -Repository packages/ - comptox-mcp-audit/ - oqt-mcp-audit/ - aop-mcp-audit/ - pbpk-mcp-audit/ - -Shared reference code/ - toxmcp_remediation_snippets.py -``` - ---- - -## Fastest route to decisions - -### Question: “What are the top cross-suite issues?” -Read: -- `TOXMCP_MASTER_AUDIT_REPORT.md` -- `QUICK_REFERENCE.md` - -### Question: “What should each repo team do next?” -Read: -- repo `README.md` -- repo `REMEDIATION_CODE.md` -- `VALIDATION_BACKLOG.md` - -### Question: “How much of this is directly observed vs inferred?” -Read: -- `AUDIT_EVIDENCE_FRAMEWORK.md` -- relevant specialist report summary section - -### Question: “Can we circulate this externally?” -Read: -- `DELIVERY_SUMMARY.md` -- `REVISION_LOG.md` -- `VALIDATION_BACKLOG.md` - ---- - -## Highest-priority repository docs - -| Repository | Start here | Then read | -|---|---|---| -| `comptox-mcp` | `comptox-mcp-audit/README.md` | `toxmcp_regulatory_audit_report.md`, `comptox-mcp-audit/REMEDIATION_CODE.md` | -| `oqt-mcp` | `oqt-mcp-audit/README.md` | `cognitive_ergonomics_audit_report.md`, `oqt-mcp-audit/REMEDIATION_CODE.md` | -| `aop-mcp` | `aop-mcp-audit/README.md` | `toxmcp_security_audit_report.md`, `aop-mcp-audit/REMEDIATION_CODE.md` | -| `pbpk-mcp` | `pbpk-mcp-audit/README.md` | `ToxMCP_Performance_Resilience_Audit_Report.md`, `pbpk-mcp-audit/REMEDIATION_CODE.md` | - ---- - -## Legend used in reviewed summaries - -| Label | Meaning | -|---|---| -| **Observed** | Directly supported by the audit material itself | -| **Observed + inferred** | A direct observation supports a broader architectural conclusion | -| **Scenario** | Threat or misuse path with stated preconditions | -| **High / Medium / Low confidence** | How strongly the package supports the claim | - ---- - -## Package-level caution - -The repository packages and specialist reports are useful and actionable, but several findings still need: -- live-repo verification -- proof-of-concept reproduction -- fix verification tests - -Treat this package as a strong internal audit and planning artifact, not a substitute for formal external assurance. diff --git a/ToxMCP_Audit_Reviewed_v2/MANIFEST.md b/ToxMCP_Audit_Reviewed_v2/MANIFEST.md deleted file mode 100644 index b825328..0000000 --- a/ToxMCP_Audit_Reviewed_v2/MANIFEST.md +++ /dev/null @@ -1,6 +0,0 @@ -# ToxMCP Reviewed Package Manifest - -This manifest lists file hashes for the reviewed copy so package contents can be checked after transfer. - -- Hash algorithm: `SHA-256` -- File-level digests: see `MANIFEST_SHA256.txt` diff --git a/ToxMCP_Audit_Reviewed_v2/MANIFEST_SHA256.txt b/ToxMCP_Audit_Reviewed_v2/MANIFEST_SHA256.txt deleted file mode 100644 index 438b574..0000000 --- a/ToxMCP_Audit_Reviewed_v2/MANIFEST_SHA256.txt +++ /dev/null @@ -1,28 +0,0 @@ -6725c8e43032b103ab3de0606705f244650937c7baead2eb3fad766763572a04 AUDIT_EVIDENCE_FRAMEWORK.md -09d987ce9552549256b349a08ab20258313c4194c9e53ca02171519ad96d9571 DELIVERY_SUMMARY.md -902676cc99336c83e0a412b0975cfa06950f4da79ef75b450dc93660866864bb INDEX.md -20e3c23f7ae37f20efed5d34e7d5bb35a5b7d02c2105c6b70e7a150ba1f4a5f3 MANIFEST.md -99d1b6a1292c57c5ee574d9a521a7bf15a63504bb4e50322eef395a20caaa035 MANIFEST_SHA256.txt -17fd6599ffe4e15178ef246bd37465e79af9cfc125a1698a435ea05497ff2d46 PUBLIC_REFERENCE_NOTES.md -66f0130512889699849411cdc91792f9dbdb41ab22c03b8f5f7b9dd288d18ac4 QUICK_REFERENCE.md -8fdc3f642a70a9bdef298e8b5402bc027145e2d0f548df645e6bed450805ad0e README.md -364514826910925086be43ba1e39928902520f8c90773337295e0e9dfecffe29 REVISION_LOG.md -02198f635d7c0141642e3ab8437b4620da47ded9415c74b30c8f6b7aefb59c87 TOXMCP_MASTER_AUDIT_REPORT.md -4961d40e58e06842c0b693aac6c8833eae5b363867ffc11270c15ff521b0afb9 ToxMCP_Performance_Resilience_Audit_Report.md -369ae386a5495fd7de172a49852b792a7a68b1e9db8a99889bfe57ac1cebdeb1 VALIDATION_BACKLOG.md -b9bef0619ae1e308f84b2eacb4553a85cd079ee9fdb4e7ac9684eaa47021b6b4 aop-mcp-audit/README.md -0989473fc6088c61afa9c81bbbfa32a4cd85cfdccb5597753675a3fb3d3deaaa aop-mcp-audit/REMEDIATION_CODE.md -c08ec262058f8c89d9bc7ae6df4f1535b59d3b1bdd5e72822dfc583b88c291ab cognitive_ergonomics_audit_report.md -5395278129f2822421cba26b27c41596d009fa38cea3c7d2e7d74441d48e07c0 comptox-mcp-audit/README.md -d53c1a299213a87aeb1e10f081ba1f96ad424c19da31573a249df36eb782151c comptox-mcp-audit/REMEDIATION_CODE.md -1124485197ac0672ba0205c57b175e20f6ebb1a1a6978baeb11636671198d49e oqt-mcp-audit/README.md -d95bd6fee607f7335b8b6d4ef22b94aacbc24f0f31cad038e095fd9129392f9e oqt-mcp-audit/REMEDIATION_CODE.md -79d612915067d8b3e4cd0da600a8f62cd3a1c24a0932c2506942ec9356c3b0e0 pbpk-mcp-audit/README.md -f53fae41a1baadc7709e78ec099e5de981e2d917994fe6c0c942b0029b0b1894 pbpk-mcp-audit/REMEDIATION_CODE.md -b1ff5364fd919d4f31137728127051f52e8074c7794fff4739bd7baa76eb9afd toxmcp_adversarial_audit_report.md -d61a2e3dc29bbc3c93121022621ebfa9eb0e7a31b4c6cb4e80ba96d9a71d6f8f toxmcp_contract_audit_report.md -ab50e4b3f26cb55a2363fe857360ad51031c7bc0a14cfd53d46be4e259e9b1d2 toxmcp_future_proofing_audit_report.md -d707433a13ddc6b66051bd58788779328fd18984c796b08318b1202449e94f9f toxmcp_observability_audit_report.md -f3857a6e674c91a0ff9da95e9088fb9c7d45c06c75bb43021d09779585579601 toxmcp_regulatory_audit_report.md -e4c1e7fbf7ea7f73aff21f7ebd3aeb38146f8cb176e96486cf859271846478b6 toxmcp_remediation_snippets.py -bf4effb38cff4e07960406190aa6217a68d2c7f2478c27b6a47d78c3e6fa98d7 toxmcp_security_audit_report.md diff --git a/ToxMCP_Audit_Reviewed_v2/PUBLIC_REFERENCE_NOTES.md b/ToxMCP_Audit_Reviewed_v2/PUBLIC_REFERENCE_NOTES.md deleted file mode 100644 index a19ea62..0000000 --- a/ToxMCP_Audit_Reviewed_v2/PUBLIC_REFERENCE_NOTES.md +++ /dev/null @@ -1,23 +0,0 @@ -# Public Reference Notes Used During Review - -This package review updated a small number of time-sensitive or standards-sensitive framing decisions. -The following public materials were consulted at review time: - -## MCP / protocol context -- Model Context Protocol specification (latest public release noted during review) -- MCP specification changelog describing the shift from HTTP+SSE to Streamable HTTP -- MCP 2026 roadmap notes on transport evolution, session handling, and server cards - -## Regulatory context -- 21 CFR Part 11 (electronic records and electronic signatures) -- FDA guidance on Part 11 scope/application and electronic systems/electronic records/electronic signatures -- EudraLex Annex 11 public materials -- OECD GLP data integrity and computerized systems guidance materials - -## How these were used -These references were used only to: -- update obviously stale protocol/future-proofing framing -- keep regulatory wording appropriately cautious -- avoid making claims that outran the public standards context - -This file is not a full compliance mapping and should not be treated as one. diff --git a/ToxMCP_Audit_Reviewed_v2/QUICK_REFERENCE.md b/ToxMCP_Audit_Reviewed_v2/QUICK_REFERENCE.md deleted file mode 100644 index ab899db..0000000 --- a/ToxMCP_Audit_Reviewed_v2/QUICK_REFERENCE.md +++ /dev/null @@ -1,95 +0,0 @@ -# ToxMCP Audit Quick Reference (Reviewed Copy) - -**Purpose:** Fast triage for engineering and leadership -**How to read this page:** It prioritizes what to fix first, not what to claim most loudly. - ---- - -## Top cross-suite items - -| Rank | Finding | Primary repos | Severity | Evidence basis | Confidence | First action | -|---|---|---|---|---|---|---| -| 1 | Historical reconstruction and provenance gaps | All | **Critical** | Observed + inferred | High | Define a single provenance envelope and capture code/data/runtime versions at workflow start | -| 2 | No mandatory scientific review checkpoints in high-risk flows | `oqt-mcp`, cross-suite | **Critical** | Observed + inferred | High | Add explicit pause-and-approve checkpoints before predictive and reporting steps | -| 3 | Unsafe interpolation / trust-boundary handling | `aop-mcp`, `oqt-mcp` | **Critical / High** | Observed + scenario | High / Medium | Remove structural query interpolation; isolate untrusted identifiers from prompts | -| 4 | Resource-control and resilience gaps | `pbpk-mcp`, `aop-mcp` | **High** | Observed | High | Add quotas, circuit breaker behavior, and load-test-derived defaults | -| 5 | Auditability and traceability gaps | All | **High** | Observed + inferred | High | Propagate a single trace ID and emit replayable provenance records | -| 6 | Cross-suite orchestration responsibility is documented but not implemented | All | **High** | Observed + inferred | Medium-High | Define orchestration ownership, evidence deduplication, and contradiction handling | - ---- - -## Repo-by-repo first moves - -### `comptox-mcp` -1. Capture upstream provenance in a way the provider actually supports -2. Replace audit-log fallback behavior with a tamper-evident trail design -3. Add retry jitter/backoff and document supported MCP transport/version strategy - -### `oqt-mcp` -1. Enforce applicability-domain gates, not just narrative AD summaries -2. Add mandatory human review checkpoints and stronger PDF provenance defaults -3. Treat chemical identifiers as untrusted text when crossing LLM or agent boundaries - -### `aop-mcp` -1. Remove arbitrary query-shape interpolation; use allow-listed query plans and safe binding -2. Add resilience controls for SPARQL upstream failure -3. Tighten draft-signature and checksum-chain semantics - -### `pbpk-mcp` -1. Enforce parameter bounds and log parameter sweeps -2. Add population and memory quotas with tested defaults -3. Improve reproducibility metadata and deterministic event hashing - ---- - -## What changed in the reviewed copy - -- absolute phrases were softened to match evidence -- remediation code is now framed as **reference code** -- future-proofing language was updated to current MCP public context -- validation gaps were moved into an explicit backlog - -See: -- `AUDIT_EVIDENCE_FRAMEWORK.md` -- `REVISION_LOG.md` -- `VALIDATION_BACKLOG.md` - ---- - -## Items to validate before external circulation - -| Finding | Why validation is needed | -|---|---| -| SPARQL injection | Need to confirm actual runtime-controlled fields and endpoint permissions | -| Prompt injection via identifiers | Need a real prompt-boundary trace, not only a scenario | -| Regulated-use compliance gaps | Need intended-use and procedural-control context | -| Upstream version pinning | Need to verify what external providers actually expose | -| Population/OOM thresholds | Need measurements on representative infrastructure | - ---- - -## Recommended sequence - -### Week 0: package hygiene -- adopt the reviewed copy -- assign owners -- turn critical findings into tracked work items -- agree on validation criteria - -### Week 1-2: hard controls -- OQT AD gating and review checkpoints -- AOP query safety and circuit breaking -- PBPK parameter/resource controls -- CompTox provenance capture and audit trail hardening - -### Week 3-4: shared architecture -- provenance envelope -- distributed tracing -- orchestration/evidence broker -- fix validation tests - ---- - -## One-line posture - -**Strong internal audit and planning pack; not yet a reproduced external assurance package.** diff --git a/ToxMCP_Audit_Reviewed_v2/README.md b/ToxMCP_Audit_Reviewed_v2/README.md deleted file mode 100644 index b5d29ab..0000000 --- a/ToxMCP_Audit_Reviewed_v2/README.md +++ /dev/null @@ -1,12 +0,0 @@ -# ToxMCP Audit Package - Reviewed Copy - -Start here: - -1. `DELIVERY_SUMMARY.md` — what this package is and how to use it -2. `INDEX.md` — navigation by audience -3. `TOXMCP_MASTER_AUDIT_REPORT.md` — revised cross-suite synthesis -4. `AUDIT_EVIDENCE_FRAMEWORK.md` — how evidence, confidence, and severity are used -5. `VALIDATION_BACKLOG.md` — what still needs reproduction or source verification -6. `PUBLIC_REFERENCE_NOTES.md` — public standards/protocol context consulted during review - -This reviewed copy is designed to be safer to circulate internally than the original draft. It is still best treated as an internal audit and remediation planning package until live-repository validation is complete. diff --git a/ToxMCP_Audit_Reviewed_v2/REVISION_LOG.md b/ToxMCP_Audit_Reviewed_v2/REVISION_LOG.md deleted file mode 100644 index 01df148..0000000 --- a/ToxMCP_Audit_Reviewed_v2/REVISION_LOG.md +++ /dev/null @@ -1,95 +0,0 @@ -# ToxMCP Audit Package - Revision Log - -**Reviewed copy date:** 2026-04-15 - ---- - -## What changed in this reviewed copy - -This revision keeps the core findings but tightens the package in five ways: - -1. **Evidentiary discipline** - - Added an explicit evidence framework - - Separated observed facts from architecture inferences and scenario narratives - - Softened absolute language where reproduction was not shown - -2. **Internal consistency** - - Normalized date mismatches - - Corrected tone and severity inconsistencies - - Aligned top-level summaries with more defensible wording - -3. **Remediation quality** - - Reframed code samples as **reference implementations** - - Corrected several mitigations that were too generic or potentially misleading - - Upgraded the shared Python remediation file so it is clearer about placeholder boundaries - -4. **Future-proofing accuracy** - - Updated MCP-related language to reflect the current public specification and roadmap context - - Reframed speculative schedule statements as migration-risk statements - -5. **Delivery readiness** - - Added a validation backlog - - Added reviewed summaries for repository-specific packages - - Added package-level notes about intended use and limitations - ---- - -## Files rewritten or substantially revised - -### Top-level package docs -- `DELIVERY_SUMMARY.md` -- `INDEX.md` -- `QUICK_REFERENCE.md` -- `TOXMCP_MASTER_AUDIT_REPORT.md` - -### New governance/QA docs -- `AUDIT_EVIDENCE_FRAMEWORK.md` -- `REVISION_LOG.md` -- `VALIDATION_BACKLOG.md` - -### Specialist reports substantially revised -- `toxmcp_regulatory_audit_report.md` -- `toxmcp_security_audit_report.md` -- `toxmcp_future_proofing_audit_report.md` - -### Repository summaries substantially revised -- `comptox-mcp-audit/README.md` -- `oqt-mcp-audit/README.md` -- `aop-mcp-audit/README.md` -- `pbpk-mcp-audit/README.md` - -### Shared code revised -- `toxmcp_remediation_snippets.py` - ---- - -## Files lightly edited - -The following documents were retained but annotated or normalized: -- `ToxMCP_Performance_Resilience_Audit_Report.md` -- `cognitive_ergonomics_audit_report.md` -- `toxmcp_adversarial_audit_report.md` -- `toxmcp_contract_audit_report.md` -- `toxmcp_observability_audit_report.md` -- all `REMEDIATION_CODE.md` files - -Typical light edits: -- reviewed-copy note inserted -- date normalization -- wording updates for over-absolute claims -- short caveat added for reference code - ---- - -## What this reviewed copy still does not claim - -- It does **not** claim that all line references were revalidated against the live repositories -- It does **not** claim that all attack chains were reproduced -- It does **not** claim that remediation code is merge-ready without repo-specific adaptation and tests -- It does **not** upgrade the package into a formal third-party audit - ---- - -## Recommended next step - -Use this reviewed copy as the planning and stakeholder-facing basis, then execute the `VALIDATION_BACKLOG.md` items against the live repositories before external circulation. diff --git a/ToxMCP_Audit_Reviewed_v2/TOXMCP_MASTER_AUDIT_REPORT.md b/ToxMCP_Audit_Reviewed_v2/TOXMCP_MASTER_AUDIT_REPORT.md deleted file mode 100644 index 23b8a1f..0000000 --- a/ToxMCP_Audit_Reviewed_v2/TOXMCP_MASTER_AUDIT_REPORT.md +++ /dev/null @@ -1,272 +0,0 @@ -# ToxMCP Ecosystem - Comprehensive Adversarial Audit Report (Reviewed Copy) - -**Review date:** 2026-04-15 -**Scope:** `comptox-mcp`, `oqt-mcp`, `aop-mcp`, `pbpk-mcp` -**Intended use:** Internal planning, engineering prioritization, stakeholder briefing - ---- - -## Review status - -This reviewed copy preserves the original package’s core concerns while tightening: -- evidentiary language -- severity calibration -- remediation phrasing -- package-level consistency - -It should be read together with: -- `AUDIT_EVIDENCE_FRAMEWORK.md` -- `REVISION_LOG.md` -- `VALIDATION_BACKLOG.md` - ---- - -## Executive judgment - -The ToxMCP audit bundle is **strong as an internal red-team and architecture review**, especially where it identifies cross-cutting risks around provenance, uncertainty, orchestration, and scientist-facing misuse. - -The reviewed copy does **not** treat the package as a finished external audit. A number of findings remain best framed as: -- observed implementation gaps, -- architecture inferences, -- or scenario-based exploit narratives that still require reproduction. - -### Bottom-line rating - -| Use case | Assessment | -|---|---| -| Internal remediation planning | **Strong** | -| Cross-team prioritization | **Strong** | -| Leadership briefing | **Strong** | -| External diligence without further validation | **Limited** | -| Formal assurance / submission support | **Not yet** | - ---- - -## Why the package is still valuable - -The most important insight in the original work was correct: the main failure modes are not only classic software bugs. They are also: - -- missing time-machine reconstruction -- confidence without calibration -- outputs that look authoritative without enough provenance -- cross-tool contradictions that no component is responsible for resolving -- uncertainty that grows across the workflow but is never represented explicitly - -Those are real and important system-level risks for a toxicology workflow stack. - ---- - -## Evidence and confidence summary - -| ID | Finding | Severity | Evidence basis | Confidence | Reviewed wording | -|---|---|---|---|---|---| -| M-01 | Historical reconstruction and provenance gaps | **Critical** | Observed + inferred | High | High risk of being unable to reconstruct past outputs in a defensible way | -| M-02 | Missing or weak human review checkpoints in high-risk flows | **Critical** | Observed + inferred | High | High risk of false confidence and unreviewed downstream reporting | -| M-03 | Unsafe trust-boundary handling in query/prompt paths | **Critical / High** | Observed + scenario | High / Medium | Unsafe interpolation patterns are present; exploitability depends on actual runtime data flow | -| M-04 | No shared cross-suite orchestration / contradiction handling layer | **High** | Observed + inferred | Medium-High | Responsibility is documented but not implemented in the audited material | -| M-05 | Resource-control and resilience gaps | **High** | Observed | High | Service instability or degraded scientific throughput is plausible under stress | -| M-06 | Auditability, replay, and observability gaps | **High** | Observed + inferred | High | Debugging and post-hoc verification are materially harder than they should be | -| M-07 | Schema / protocol / ontology evolution risk | **High** | Observed + standards note | Medium-High | Migration cost is likely to be high without shared abstraction and versioning discipline | - ---- - -## System-level findings - -### M-01: Provenance and reconstruction are not yet first-class - -Across the package, the strongest repeated concern is not merely "missing logs." It is the absence of a single, defensible record of: - -- code version -- runtime environment -- input identity resolution -- upstream data/version context -- model/tool version -- human review state -- final signed or approved output - -This matters because toxicology workflows often need more than replay. They need a **reconstructable explanation of what happened, when, with which inputs, under which software and data conditions**. - -**Why this remains Critical:** -The reviewed copy still considers this a critical suite-level gap because it affects integrity, auditability, and the ability to defend historical outputs. - -**What changed in the wording:** -The original package sometimes implied automatic regulatory failure. The reviewed copy instead states that this gap creates a **high risk of non-conformance and defensibility failure for regulated use**, subject to intended use and any external procedural controls. - ---- - -### M-02: Human review is not reliably embedded where it matters most - -The package is persuasive when it shows how scientist-facing automation can move from: -chemical identification → predictive tooling → PDF/report artifact -without clearly enforced review checkpoints. - -The issue is not that automation exists. The issue is that the package shows too many places where the workflow can appear "finished" before: -- chemical identity is confirmed -- applicability-domain boundaries are accepted -- contradictory evidence is surfaced -- confidence language is reviewed by a human - -**Why this remains Critical:** -Because the scientific and regulatory risk is not just wrong output; it is **wrong output wrapped in a professional-looking artifact**. - ---- - -### M-03: Trust-boundary handling needs to be reworked, not just patched - -Two areas matter here: - -#### SPARQL/query safety -The audited material shows string-template interpolation for queries. That is a real code smell. -However, the reviewed copy avoids overstating destructive impact unless runtime permissions and update semantics are known. The safest defensible claim is: - -> unsafe interpolation is observed; query broadening, unauthorized data exposure, or result manipulation are plausible; destructive effects depend on endpoint permissions and whether update-capable operations are reachable. - -#### Prompt/instruction boundary safety -The package also reasonably flags that chemical identifiers and similar text fields may cross into LLM- or agent-facing contexts. -But the reviewed copy treats full prompt injection as **scenario-dependent** until the exact prompt boundary is demonstrated. - -**Practical implication:** -These should still be treated as near-term remediation items, because the mitigation cost is lower than the cost of being wrong later: -- bind literals safely -- allow-list structural query choices -- isolate untrusted text from system instructions -- prefer structured tool arguments over interpolated natural language - ---- - -### M-04: Cross-suite orchestration responsibility is missing - -This remains one of the most original and useful findings in the bundle. - -The issue is not merely that there is no single "orchestrator service" file. It is that the audited material repeatedly implies a higher layer is responsible for: - -- evidence deduplication -- contradiction detection -- cross-module narrative coherence -- schema translation and version negotiation -- final dossier assembly - -Yet that responsibility is not concretely implemented in the package. - -**Why this matters:** -Without an explicit owner for cross-tool reasoning, each repo can be locally correct while the suite-level story is inconsistent. - ---- - -### M-05: Resilience gaps are likely to surface under real load - -The package identifies several plausible service-stability issues: -- no clear circuit-breaker behavior for SPARQL-like upstream failure paths -- insufficient quotas for large PBPK workloads -- retry logic that may amplify load -- limited replay/diff tooling for diagnosing divergent results - -The reviewed copy retains these as **High** rather than inflating every one to Critical, because actual severity depends on deployment size, workload mix, and whether external infrastructure already enforces limits. - ---- - -### M-06: Observability and replayability are under-designed - -The original observability audit made a strong point: the suite is difficult to debug as a system, not just as four independent repos. - -The most important issues are: -- no single trace across tools -- insufficient replay artifacts -- limited diffability of outputs -- incomplete privacy/sensitivity handling in logs - -This is more than an operational inconvenience. It slows incident response, scientific debugging, and compliance evidence gathering. - ---- - -### M-07: Future-proofing risk is real, but should be framed as migration resilience - -The original package correctly identified fragmentation around: -- transport handling -- schema versioning -- ontology evolution -- provider coupling - -The reviewed copy updates the framing: these are not just "future features missing." They are **migration resilience risks**. -That is the more durable claim. - ---- - -## Cross-cutting bridge components still worth building - -The original master report recommended architectural bridge components. That remains the right direction. - -### 1. Provenance and evidence ledger -A suite-wide component that records: -- input identity resolution -- upstream retrieval metadata -- code/runtime snapshot -- tool outputs and hashes -- review checkpoints -- final artifact lineage - -### 2. Orchestration and evidence-broker layer -A single place to handle: -- schema mediation -- contradiction detection -- evidence deduplication -- confidence/uncertainty aggregation -- final narrative assembly rules - -### 3. Policy and safe-execution layer -A shared layer for: -- authorization and review policies -- prompt/query trust-boundary handling -- rate limits and quotas -- audit and trace propagation -- secure offline/controlled execution modes where required - ---- - -## Priority remediation plan - -### Wave 0 - package hygiene and governance -- adopt this reviewed copy as the working baseline -- assign repo owners for each critical item -- agree on validation criteria before external use -- stop describing snippets as production-ready code - -### Wave 1 - hard controls -- OQT: applicability-domain gating, review checkpoints, safer report defaults -- AOP: query safety redesign and resilience controls -- PBPK: bounds, quotas, and reproducibility metadata -- CompTox: provenance capture and tamper-evident audit design - -### Wave 2 - shared architecture -- provenance envelope -- trace propagation -- orchestration/evidence broker -- schema/version registry decisions - -### Wave 3 - external defensibility -- live-repo revalidation -- proof-of-concept or deterministic reasoning notes for each critical item -- fix verification tests -- commit or permalink references - ---- - -## What should not be claimed yet - -Until the validation backlog is complete, avoid saying that the package has already demonstrated: -- formal exploit reproduction for all security findings -- conclusive regulatory rejection outcomes -- production-ready remediation patches -- complete live-repo verification - ---- - -## Final assessment - -The original package had the right instincts and several genuinely strong insights. -The reviewed copy makes it safer and more useful by separating: -- what is directly observed, -- what is inferred, -- and what remains a scenario that should be validated. - -**Bottom line:** this is a strong internal audit and remediation planning bundle, and now a better one. It is still one validation step away from being an externally defensible assurance artifact. diff --git a/ToxMCP_Audit_Reviewed_v2/ToxMCP_Performance_Resilience_Audit_Report.md b/ToxMCP_Audit_Reviewed_v2/ToxMCP_Performance_Resilience_Audit_Report.md deleted file mode 100644 index 77e045e..0000000 --- a/ToxMCP_Audit_Reviewed_v2/ToxMCP_Performance_Resilience_Audit_Report.md +++ /dev/null @@ -1,474 +0,0 @@ -# ToxMCP Suite - Performance & Resilience Audit Report - -**Audit Date:** 2026-04-15 -**Auditor:** Performance & Resilience Engineer -**Scope:** comptox-mcp, oqt-mcp, aop-mcp, pbpk-mcp repositories - ---- - -> **Reviewed copy (2026-04-15):** This document was retained from the original package but lightly edited for consistency. -> Unless explicitly stated otherwise, code blocks are **reference implementations**, not validated patches, and scenario-based exploit narratives should not be read as reproduced proofs. - - - -## Executive Summary - -This audit identifies critical scaling cliffs and fault modes across the ToxMCP ecosystem. While the suite demonstrates good architectural patterns for job persistence and retry logic, significant gaps exist in **circuit breaker implementation**, **memory protection for large simulations**, and **input validation for chemical complexity**. - -**Overall Risk Rating: 🔴 HIGH** - ---- - -## 1. SPARQL Timeout Cascades (AOP-MCP) 🔴 Critical - -### Finding AOP-001: No Circuit Breaker Logic - -**File:** `aop-mcp/src/adapters/sparql_client.py` (lines 37-231) - -**Issue:** The SPARQL client implements failover across endpoints but **lacks circuit breaker pattern**: - -```python -# Current implementation - NO circuit breaker -async def _dispatch(self, query: str, *, timeout: float | None = None) -> dict[str, Any]: - last_error: Exception | None = None - for endpoint in self._endpoints: - attempts = self._max_retries + 1 - for attempt in range(attempts): - try: - response = await self._client.post(...) - except Exception as exc: - # Simply logs and retries - no circuit breaker - logger.warning("SPARQL request to %s failed...", endpoint.url, ...) - last_error = exc - continue -``` - -**Fault Mode:** When AOP-Wiki is down: -- System **FAILS CLOSED** - raises `SparqlUpstreamError` after all endpoints exhausted -- No graceful degradation to cached/empty results -- Each request waits full timeout (default 10s) x retries (default 2) x endpoints -- **Cascading latency** under load - -**Missing Protection:** -| Feature | Status | Risk | -|---------|--------|------| -| Circuit Breaker | Absent | 🔴 Critical | -| Exponential Backoff | Absent | 🟠 High | -| Jitter | Absent | 🟠 High | -| Half-Open State | Absent | 🔴 Critical | -| Cache-First on Failure | Absent | 🟠 High | - -**Thresholds:** -- Default timeout: **10 seconds** -- Default retries: **2 per endpoint** -- No maximum query complexity limits - -**Recommendation:** Implement circuit breaker with: -- Failure threshold: 5 errors in 60 seconds -- Open state duration: 30 seconds -- Half-open probe: 1 request -- Fallback to cache or empty results with warning - ---- - -## 2. Memory Exhaustion Patterns (PBPK-MCP) 🔴 Critical - -### Finding PBPK-001: No Population Size Limits - -**File:** `pbpk-mcp/src/mcp_bridge/services/job_service.py` (1392 lines) - -**Issue:** Population simulations can generate massive datasets with **no input validation**: - -```python -# From JobRecord dataclass - no population size limits -@dataclass -class JobRecord: - job_id: str - simulation_id: str - job_type: str # Can be "population_simulation" - # ... no max_population_size field -``` - -**Configuration (`.env.example`):** -```bash -JOB_TIMEOUT_SECONDS=300 # 5 minutes -JOB_MAX_RETRIES=0 -JOB_WORKER_THREADS=2 -# NO population size limit defined -``` - -**OOM Risk Assessment:** - -| Population Size | Memory Estimate | Timeout Risk | -|-----------------|-----------------|--------------| -| 100 patients | ~50 MB | Low | -| 1,000 patients | ~500 MB | Medium | -| 10,000 patients | ~5 GB | 🔴 High - Likely OOM | -| 100,000 patients | ~50 GB | 🔴 Critical - Likely OOM on many worker sizes | - -**Streaming Status:** NO streaming/chunking logic found for population results - -**File:** `pbpk-mcp/src/mcp_bridge/storage/population_store.py` (not examined but referenced) - -**Missing Protection:** -- No `max_population_size` parameter -- No memory quota enforcement -- No result pagination/streaming -- SQLite storage loads full results into memory - -**Recommendation:** -1. Add `MAX_POPULATION_SIZE=5000` environment variable -2. Implement result streaming with chunk handles -3. Add memory quota check before simulation start - ---- - -### Finding PBPK-002: Insufficient Job Timeout - -**Current:** `JOB_TIMEOUT_SECONDS=300` (5 minutes) - -**Risk:** Population simulations with 1000+ patients can exceed 5 minutes, causing: -- Job marked as `TIMEOUT` status -- Orphaned simulation processes in R/ospsuite -- Partial results lost - -**Recommendation:** -- Increase default to 1800s (30 minutes) for population jobs -- Implement job-type specific timeouts - ---- - -## 3. API Rate Limit Handling (CompTox-MCP) 🟠 High - -### Finding CTX-001: Basic Retry Without Jitter - -**File:** `comptox-mcp/src/epacomp_tox/settings.py` (lines 37-139) - -**Current Implementation:** -```python -class ContextSettings: - retry_attempts: int # Default: 3 - retry_base: float # Default: 0.5 seconds -``` - -**Configuration:** -```bash -CTX_RETRY_ATTEMPTS=3 -CTX_RETRY_BASE=0.5 -``` - -**Retry Pattern:** -- Attempt 1: Immediate -- Attempt 2: 0.5s delay -- Attempt 3: 0.5s delay (NOT exponential!) - -**Missing Protection:** -| Feature | Status | Risk | -|---------|--------|------| -| Exponential Backoff | Partial (fixed base) | 🟠 High | -| Jitter | Absent | 🔴 Critical | -| Rate Limit Headers | Not checked | 🟠 High | -| Quota Budgets | Absent | 🟠 High | -| 429 Retry-After | Not honored | 🔴 Critical | - -**Fault Mode:** Under EPA CompTox rate limiting: -- Multiple concurrent requests will retry simultaneously -- **Thundering herd** amplifies rate limit violations -- No `Retry-After` header parsing -- Risk of temporary API ban - -**Recommendation:** -```python -# Implement proper exponential backoff with jitter -delay = retry_base * (2 ** attempt) + random.uniform(0, 1) -``` - ---- - -## 4. Long-Running Job Orphans (PBPK-MCP) 🟡 Medium - -### Finding PBPK-003: SQLite Persistence with Limitations - -**File:** `pbpk-mcp/src/mcp_bridge/services/job_service.py` (lines 127-400) - -**Positive Finding:** Jobs are persisted to SQLite: -```python -class JobRegistry: - def __init__(self, db_path: str = "var/jobs/registry.json"): - self._conn = sqlite3.connect(str(self._prepare_path(db_path))) - # Creates tables: job_records, simulation_results -``` - -**Survival Scenario:** -| Scenario | Job Survival | Notes | -|----------|--------------|-------| -| API server restart | Yes | SQLite persists to disk | -| Worker crash | Partial | Job status may be "RUNNING" but actually dead | -| Full system restart | Yes | Jobs recover from SQLite | -| Celery backend crash | Depends | Redis/memory backend loses queue | - -**Orphan Risk:** -- Job status can remain `RUNNING` indefinitely if worker dies -- No heartbeat/health check from workers to verify liveness -- Cleanup only based on `retention_seconds` (default unknown) - -**Recommendation:** -1. Implement worker heartbeat (every 30s) -2. Mark jobs as `FAILED` if no heartbeat for 2x timeout -3. Add orphan detection job (runs every 5 minutes) - ---- - -## 5. Maximum Safe Chemical Complexity 🟠 High - -### Finding SUITE-001: No Complexity Limits - -**Cross-Repository Analysis:** - -| Component | Validation | Limit | -|-----------|------------|-------| -| AOP-MCP SPARQL queries | None | N/A | -| CompTox-MCP chemical search | Basic | None | -| PBPK-MCP population sims | None | N/A | -| OQT-MCP workflows | Timeout only | 300s | - -**Missing Validations:** -- **Molecular complexity:** No atom count limit -- **Pathway depth:** No AOP chain length limit -- **Query result size:** No LIMIT enforcement on SPARQL -- **Simulation granularity:** No time-step minimum - -**Risk Scenarios:** -1. **SPARQL query** with unlimited `?chemical aops:hasMIE` traversal → timeout/OOM -2. **Population simulation** with 100,000 virtual patients → OOM -3. **AOP network** query with 50+ key events → response size explosion - -**Recommendation:** Implement tiered limits: -```python -MAX_ATOMS = 500 # For PBPK modeling -MAX_AOP_CHAIN_DEPTH = 10 -MAX_SPARQL_RESULTS = 10000 -MAX_POPULATION_SIZE = 5000 -``` - ---- - -## 6. Cross-Component Vulnerability Matrix - -| Threat | CompTox | AOP | PBPK | OQT | Severity | -|--------|---------|-----|------|-----|----------| -| Timeout Cascade | 🟡 | 🔴 | 🟡 | 🟠 | 🔴 Critical | -| Memory Exhaustion | 🟢 | 🟢 | 🔴 | 🟢 | 🔴 Critical | -| Rate Limit Ban | 🟠 | 🟢 | 🟢 | 🟢 | 🟠 High | -| Job Orphans | 🟢 | 🟢 | 🟡 | 🟢 | 🟡 Medium | -| Complexity Bomb | 🟠 | 🔴 | 🔴 | 🟠 | 🔴 Critical | - ---- - -## 7. Specific File References - -### Critical Files Examined: - -1. **AOP-MCP:** - - `src/adapters/sparql_client.py` (231 lines) - No circuit breaker - - `src/adapters/aop_wiki.py` - SPARQL endpoint consumer - -2. **CompTox-MCP:** - - `src/epacomp_tox/settings.py` (139 lines) - Retry config - - `src/epacomp_tox/client.py` (102 lines) - Basic client - -3. **PBPK-MCP:** - - `src/mcp_bridge/services/job_service.py` (1392 lines) - Job persistence - - `src/mcp_bridge/config.py` (543 lines) - Configuration - - `.env.example` (67 lines) - Environment defaults - -4. **OQT-MCP:** - - `TIMEOUT_FIX_SUMMARY.md` - Timeout hardening documentation - ---- - -## 8. Concrete Thresholds & Resource Limits - -### Current Limits: - -| Parameter | Default | Maximum | Unit | -|-----------|---------|---------|------| -| SPARQL timeout | 10 | Configurable | seconds | -| SPARQL retries | 2 | Configurable | attempts | -| Job timeout | 300 | Configurable | seconds | -| Job retries | 0 | Configurable | attempts | -| API retry attempts | 3 | Configurable | attempts | -| API retry base | 0.5 | Configurable | seconds | -| Adapter timeout | 30 | Configurable | seconds | - -### Missing Limits (Critical Gaps): - -| Parameter | Recommended | Priority | -|-----------|-------------|----------| -| Max population size | 5000 | 🔴 Critical | -| Max SPARQL results | 10000 | 🔴 Critical | -| Max AOP chain depth | 10 | 🟠 High | -| Max molecule atoms | 500 | 🟠 High | -| Circuit breaker threshold | 5 errors/60s | 🔴 Critical | -| Memory quota per job | 2 GB | 🔴 Critical | - ---- - -## 9. Recommendations Summary - -### Immediate Actions (Critical): - -1. **PBPK-MCP:** Add `MAX_POPULATION_SIZE` limit (default 5000) -2. **AOP-MCP:** Implement circuit breaker for SPARQL endpoints -3. **CompTox-MCP:** Add jitter and exponential backoff to retries -4. **PBPK-MCP:** Implement memory quota check before simulations - -### Short-term (High Priority): - -5. **PBPK-MCP:** Add worker heartbeat to prevent orphan jobs -6. **AOP-MCP:** Add `MAX_SPARQL_RESULTS` limit -7. **CompTox-MCP:** Parse and honor `Retry-After` headers -8. **PBPK-MCP:** Implement result streaming for population sims - -### Long-term (Medium Priority): - -9. **All:** Add complexity scoring for chemical inputs -10. **All:** Implement distributed rate limiter -11. **All:** Add Prometheus alerts for resource exhaustion - ---- - -## Appendix: Evidence Snapshots - -### SPARQL Client (No Circuit Breaker): -```python -# From aop-mcp/src/adapters/sparql_client.py -class SparqlClient: - def __init__(self, ..., max_retries: int = 2, timeout: float = 10.0): - self._max_retries = max(0, max_retries) - self._timeout = timeout -``` - -### Job Persistence (SQLite): -```python -# From pbpk-mcp/src/mcp_bridge/services/job_service.py -class JobRegistry: - def __init__(self, db_path: str = "var/jobs/registry.json"): - self._conn = sqlite3.connect(str(self._prepare_path(db_path))) -``` - -### Retry Configuration (No Jitter): -```python -# From comptox-mcp/src/epacomp_tox/settings.py -ctx_retry_attempts: int = Field(default=3, alias="CTX_RETRY_ATTEMPTS") -ctx_retry_base: float = Field(default=0.5, alias="CTX_RETRY_BASE") -``` - ---- - -## Detailed Findings by Repository - -### AOP-MCP (aop-mcp) - -**Version:** v0.8.1 -**Primary Risk:** SPARQL timeout cascades - -**Key Files:** -- `src/adapters/sparql_client.py` - Async HTTPX client with failover -- `src/adapters/aop_wiki.py` - AOP-Wiki SPARQL consumer -- `src/adapters/aop_db.py` - AOP-DB integration - -**Findings:** -1. SPARQL client has configurable timeout (default 10s) and retries (default 2) -2. No circuit breaker - sequential endpoint failover only -3. Cache support exists but no cache-first on failure mode -4. Metrics recording available but not used for health checks - -**Maximum Safe Load:** -- Query complexity: Unlimited (no validation) -- Result size: Unlimited (no LIMIT enforcement) -- Concurrent queries: Limited by HTTPX connection pool (default 100) - ---- - -### CompTox-MCP (comptox-mcp) - -**Version:** v0.2.2 -**Primary Risk:** Rate limit handling - -**Key Files:** -- `src/epacomp_tox/settings.py` - Configuration with retry settings -- `src/epacomp_tox/client.py` - MCP client wrapper - -**Findings:** -1. Retry configuration: 3 attempts with 0.5s base delay -2. No exponential backoff - fixed delay between retries -3. No jitter - thundering herd risk -4. No rate limit header parsing (429, Retry-After) - -**Maximum Safe Load:** -- Requests per minute: Unknown (EPA CompTox limit not documented) -- Concurrent requests: Limited by client configuration -- No quota budget per tool call - ---- - -### PBPK-MCP (pbpk-mcp) - -**Version:** v0.4.3 -**Primary Risk:** Memory exhaustion - -**Key Files:** -- `src/mcp_bridge/services/job_service.py` - Job orchestration (1392 lines) -- `src/mcp_bridge/config.py` - Application configuration -- `src/mcp_bridge/storage/population_store.py` - Result storage - -**Findings:** -1. SQLite-based job persistence survives restarts -2. No population size validation -3. Job timeout: 300s (5 minutes) - insufficient for large populations -4. Worker threads: 2 (configurable) -5. No memory quota enforcement - -**Maximum Safe Load:** -- Population size: ~1000 patients (before timeout/OOM risk) -- Simulation duration: 5 minutes max (default timeout) -- Memory per job: Unlimited (no quota) - ---- - -### OQT-MCP (oqt-mcp) - -**Version:** v0.3.0 -**Primary Risk:** Timeout on heavy operations - -**Key Files:** -- `TIMEOUT_FIX_SUMMARY.md` - Timeout hardening history -- `src/` - QSAR workflow implementation - -**Findings:** -1. Timeout increased from 120s to 300s for heavy operations -2. Better error handling for 404 responses -3. MCP content type standardization applied - -**Maximum Safe Load:** -- Workflow timeout: 300 seconds -- Heavy operations: Metabolism, reports, batch processing - ---- - -## Risk Severity Legend - -| Badge | Severity | Description | -|-------|----------|-------------| -| 🔴 | Critical | System failure, data loss, or security breach likely | -| 🟠 | High | Performance degradation or availability issues likely | -| 🟡 | Medium | Limited impact, workarounds available | -| 🟢 | Low | Minor issues, easily mitigated | - ---- - -**End of Audit Report** - -*Report generated by Performance & Resilience Engineer* -*ToxMCP Ecosystem Analysis - April 2026* diff --git a/ToxMCP_Audit_Reviewed_v2/VALIDATION_BACKLOG.md b/ToxMCP_Audit_Reviewed_v2/VALIDATION_BACKLOG.md deleted file mode 100644 index 9a40fcd..0000000 --- a/ToxMCP_Audit_Reviewed_v2/VALIDATION_BACKLOG.md +++ /dev/null @@ -1,48 +0,0 @@ -# ToxMCP Audit Validation Backlog - -**Purpose:** Convert the reviewed audit pack into a more externally defensible package. - ---- - -## Priority 0 - validation required before external sharing - -| ID | Finding | What to validate | Output needed | -|---|---|---|---| -| V0-1 | SPARQL unsafe interpolation | Confirm whether structural query fragments, `ORDER BY`, `LIMIT`, or graph patterns can be influenced by untrusted input at runtime | Minimal PoC, affected code path, safe-vs-unsafe query examples | -| V0-2 | Prompt / instruction injection via chemical identifiers | Trace whether untrusted identifiers are interpolated into model prompts or agent instructions without structured isolation | Prompt boundary diagram, example payload, before/after mitigation test | -| V0-3 | Part 11 / Annex 11 readiness gap | Confirm intended regulated use, signature requirements, and whether procedural controls already exist outside the repos | Control mapping, gap matrix, intended-use memo | -| V0-4 | Upstream provenance/version capture | Verify what the external providers actually expose for versioning, snapshots, and response metadata | Provider capability matrix, proposed internal pinning strategy | -| V0-5 | Population/OOM thresholds | Run controlled load tests on representative worker sizes | Memory/latency curves, safe defaults, enforced limits | - ---- - -## Priority 1 - should be reproduced soon - -| ID | Finding | What to validate | Output needed | -|---|---|---|---| -| V1-1 | Audit chain integrity | Recompute hashes from stored content and confirm mismatch behavior | Unit/integration tests | -| V1-2 | Deterministic hashing for PBPK events | Cross-platform serialization check for floats, NaN, infinity, and ordering | Regression test matrix | -| V1-3 | Distributed tracing gap | Run a multi-tool workflow and confirm whether a single trace can be reconstructed | Trace propagation test | -| V1-4 | Scientific review checkpoints | Confirm that high-risk workflow states can be paused, reviewed, and resumed cleanly | UX flow and test cases | -| V1-5 | Container/runtime hardening risk | Validate actual attack surface for file parsing, package installation, and runtime privileges | Threat model plus runtime config review | - ---- - -## Priority 2 - packaging and governance - -| ID | Task | Why it matters | -|---|---|---| -| V2-1 | Replace inherited line references with live-repo permalinks or commit hashes | External readers can verify claims | -| V2-2 | Add fix verification criteria to each critical item | Prevents “remediation theater” | -| V2-3 | Create a machine-readable finding register | Easier tracking across repos | -| V2-4 | Add sign-off owners and due dates | Turns the pack into an execution tool | - ---- - -## Suggested working rule - -Do not present a finding as externally validated until it has: -1. a code or config location in the live repository -2. stated preconditions -3. a reproduction or reasoning note -4. a test for the proposed fix diff --git a/ToxMCP_Audit_Reviewed_v2/aop-mcp-audit/README.md b/ToxMCP_Audit_Reviewed_v2/aop-mcp-audit/README.md deleted file mode 100644 index c210d55..0000000 --- a/ToxMCP_Audit_Reviewed_v2/aop-mcp-audit/README.md +++ /dev/null @@ -1,161 +0,0 @@ -# AOP-MCP Audit Package (Reviewed Copy) - -**Repository:** `aop-mcp` -**Package version cited in original audit:** `v0.8.1` -**Review date:** 2026-04-15 -**Overall posture:** **High-to-critical for trust-boundary safety, draft integrity, and ontology evolution** - ---- - -## How to read this reviewed copy - -The original package correctly identified `aop-mcp` as an integration-heavy surface where: -- query safety -- upstream resilience -- draft/signature integrity -- ontology/schema drift - -all matter at once. - -This reviewed copy retains those concerns, but is stricter about exploit claims: -- **Observed** unsafe interpolation patterns are treated as hard findings -- destructive outcomes such as graph deletion are treated as **scenario-dependent** unless endpoint permissions are known - ---- - -## Finding register - -| ID | Finding | Severity | Evidence basis | Confidence | Reviewed interpretation | -|---|---|---|---|---|---| -| AOP-01 | Unsafe query templating / interpolation | **Critical** | Observed + scenario | High / Medium | A trust-boundary issue is present; exact exploit impact depends on runtime-controlled fields and endpoint permissions | -| AOP-02 | Upstream query failure handling lacks mature resilience controls | **High** | Observed | High | Failure cascades and latency amplification are plausible | -| AOP-03 | Draft metadata and signature semantics are not strong enough for high-assurance review flows | **Critical** | Observed | High | Review and approval lineage is weaker than it should be | -| AOP-04 | Checksum-chain verification needs stronger content binding and write/read validation | **Critical** | Observed | High | Draft history is not yet as tamper-evident as intended | -| AOP-05 | Ontology/version drift can break cross-suite meaning over time | **High** | Observed + inferred | Medium-High | Migration and comparability risk is real | - ---- - -## Detailed findings - -### AOP-01: Query templating should be redesigned around allow-listed query plans -**Severity:** **Critical** -**Evidence basis:** Observed + scenario -**Confidence:** High for unsafe interpolation, Medium for worst-case exploit impact - -The package shows template rendering through Python string formatting. That is a legitimate trust-boundary concern. - -### Reviewed wording -The safest defensible statement is: - -> query construction includes unsafe interpolation patterns; query broadening, result manipulation, or unintended data exposure are plausible if structural fragments can be influenced by untrusted input. - -Avoid assuming destructive update operations unless the endpoint is confirmed to allow them. - -### Better mitigation pattern -Do **not** treat arbitrary query fragments as bindable parameters. - -Use: -- fixed query templates selected from an allow-list -- safe binding only for literals/URIs -- allow-listed sort and limit options -- separate read-only query builders from any update-capable code path - ---- - -### AOP-02: Upstream resilience controls are underdeveloped -**Severity:** **High** -**Evidence basis:** Observed -**Confidence:** High - -The original package’s concern about circuit breaking, backoff, and graceful degradation remains sound. -If the AOP upstream is unavailable or slow, repeated retries can amplify latency and user confusion. - -### Recommended control -- bounded retries with jitter -- circuit-breaker/open-state behavior -- explicit error surface to callers -- cache or partial-result policy where scientifically acceptable -- telemetry for endpoint health and fallback path usage - ---- - -### AOP-03: Draft approval semantics are not yet strong enough -**Severity:** **Critical** -**Evidence basis:** Observed -**Confidence:** High - -The original package was right to highlight that draft metadata and authorship fields do not, by themselves, constitute strong review or approval lineage. - -### Recommended control -- strong actor identity linkage -- signature meaning (`authored`, `reviewed`, `approved`, `rejected`) -- UTC timestamping -- content-hash binding -- verified chain between successive draft versions - -### Reviewed wording -Use: **high risk of non-conformance for regulated or high-assurance review workflows** -Avoid: categorical claims of inevitable regulatory outcome. - ---- - -### AOP-04: Checksum verification should prove content integrity, not only compare stored values -**Severity:** **Critical** -**Evidence basis:** Observed -**Confidence:** High - -A checksum field is helpful only when: -- the checksum is mandatory -- the algorithm is defined -- the content used to compute it is canonicalized -- the chain is verified on read -- mutations cannot silently sever lineage - -This remains a strong and useful finding from the original pack. - ---- - -### AOP-05: Ontology and schema drift need an explicit migration strategy -**Severity:** **High** -**Evidence basis:** Observed + inferred -**Confidence:** Medium-High - -`aop-mcp` sits near an evolving ontology surface. That means long-lived interoperability requires more than normalization at read time. - -### Recommended control -- record ontology/version provenance in artifacts -- maintain deprecation and remapping tables -- define migration tests for cross-suite schemas -- avoid burying semantic version assumptions inside tool logic - ---- - -## Recommended sequence - -### Immediate -- redesign unsafe query construction -- add resilience controls around SPARQL/upstream failure -- strengthen draft metadata and checksum semantics - -### Next -- formalize ontology/version provenance -- add migration tests and compatibility policy -- align traceability with suite-wide provenance model - ---- - -## Validation backlog specific to this repo - -- confirm which query components can be influenced by untrusted input at runtime -- confirm endpoint permissions and whether any update semantics are reachable -- test checksum recomputation from draft content -- verify how ontology version changes propagate into downstream consumers - ---- - -## Related documents - -- `toxmcp_security_audit_report.md` -- `toxmcp_contract_audit_report.md` -- `toxmcp_regulatory_audit_report.md` -- `aop-mcp-audit/REMEDIATION_CODE.md` diff --git a/ToxMCP_Audit_Reviewed_v2/aop-mcp-audit/REMEDIATION_CODE.md b/ToxMCP_Audit_Reviewed_v2/aop-mcp-audit/REMEDIATION_CODE.md deleted file mode 100644 index d62b090..0000000 --- a/ToxMCP_Audit_Reviewed_v2/aop-mcp-audit/REMEDIATION_CODE.md +++ /dev/null @@ -1,710 +0,0 @@ -# AOP-MCP: Detailed Remediation Code - -> **Reviewed copy note:** Treat these snippets as reference patterns. Do **not** pass arbitrary structural query fragments from untrusted input; use allow-listed query plans and bind only literals/URIs. - - -## 1. Parameterized SPARQL Queries (Injection Prevention) - -**Reviewed caution:** Bind values safely, but keep query *structure* fixed. `ORDER BY`, `LIMIT`, graph patterns, and predicate choices should come from allow-lists, not directly from user input. - -**File:** `src/adapters/sparql_client.py` - -```python -from rdflib.plugins.sparql import prepareQuery -from rdflib import Literal, URIRef, Variable -from typing import Mapping, Any, Dict, Tuple -import re - -class SafeSparqlClient: - """SPARQL client with parameterized query support.""" - - def __init__(self, endpoints: List[SparqlEndpoint]): - self._endpoints = endpoints - self._client = httpx.AsyncClient() - self._template_cache: Dict[str, str] = {} - - def render(self, name: str, parameters: Mapping[str, Any] | None = None) -> Tuple[str, Dict]: - """ - Render SPARQL template with safe parameter binding. - - Returns: - Tuple of (query_string, bindings_dict) - """ - template = self._get_template(name) - params = parameters or {} - - # Extract parameter placeholders from template - placeholders = self._extract_placeholders(template) - - # Validate all parameters are provided - missing = placeholders - set(params.keys()) - if missing: - raise ValueError(f"Missing parameters for template {name}: {missing}") - - # Convert parameters to RDFLib types - bindings = {} - for key, value in params.items(): - bindings[key] = self._convert_to_rdf_type(value) - - # Replace placeholders in template with variable references - query_string = self._substitute_placeholders(template, placeholders) - - return query_string, bindings - - def _extract_placeholders(self, template: str) -> set: - """Extract {placeholder} patterns from template.""" - pattern = r'\{(\w+)\}' - return set(re.findall(pattern, template)) - - def _convert_to_rdf_type(self, value: Any) -> Any: - """Convert Python value to appropriate RDFLib type.""" - if isinstance(value, str): - # Check if it's a URI - if value.startswith('http://') or value.startswith('https://'): - return URIRef(value) - # Otherwise treat as literal - return Literal(value) - elif isinstance(value, (int, float)): - return Literal(value) - elif isinstance(value, bool): - return Literal(value) - else: - return Literal(str(value)) - - def _substitute_placeholders(self, template: str, placeholders: set) -> str: - """Replace {placeholder} with ?placeholder for SPARQL variable binding.""" - result = template - for placeholder in placeholders: - result = result.replace(f'{{{placeholder}}}', f'?{placeholder}') - return result - - async def query(self, name: str, parameters: Mapping[str, Any] | None = None) -> dict: - """Execute parameterized SPARQL query.""" - query_string, bindings = self.render(name, parameters) - - # Prepare the query - prepared = prepareQuery(query_string) - - # Execute with bindings - return await self._execute_with_bindings(prepared, bindings) - - async def _execute_with_bindings(self, prepared_query, bindings: Dict) -> dict: - """Execute prepared query with parameter bindings.""" - # Convert bindings to SPARQL BIND statements or use endpoint's binding mechanism - # This example uses string substitution for the final query (still safe due to RDFLib types) - bound_query = prepared_query.serialize() - - for var_name, value in bindings.items(): - # Replace ?var with bound value - if isinstance(value, URIRef): - bound_query = bound_query.replace(f'?{var_name}', f'<{value}>') - elif isinstance(value, Literal): - # Properly escape literal values - escaped = str(value).replace('\\', '\\\\').replace('"', '\\"') - bound_query = bound_query.replace(f'?{var_name}', f'"{escaped}"') - - return await self._dispatch(bound_query) - - -# Template example (search_aops.sparql) -SAFE_SEARCH_AOPS_TEMPLATE = """ -SELECT DISTINCT ?aop ?title ?shortName -WHERE {{ - ?aop a aopo:AdverseOutcomePathway ; - dc:title ?title . - - # Safe parameter binding with ?variable syntax - {search_bindings} - - FILTER ({search_filter}) -}} -ORDER BY {order_by} -LIMIT {limit} -""" - -# Usage example -async def search_aops_safe(chemical_name: str): - client = SafeSparqlClient(endpoints) - - # Parameters are only safe if structural query parts are fixed or allow-listed; do not treat arbitrary graph fragments as bindable user input - result = await client.query("search_aops", { - "search_bindings": "?aop aopo:hasMIE ?mie . ?mie dc:title ?chemicalName .", - "search_filter": "CONTAINS(LCASE(?chemicalName), LCASE(?chemicalNameParam))", - "order_by": "?title", - "limit": "100", - "chemicalNameParam": chemical_name # This is safely bound as Literal - }) - - return result -``` - ---- - -> **Reviewed copy (2026-04-15):** This document was retained from the original package but lightly edited for consistency. -> Unless explicitly stated otherwise, code blocks are **reference implementations**, not validated patches, and scenario-based exploit narratives should not be read as reproduced proofs. - - - -## 2. Circuit Breaker for SPARQL Endpoints - -**File:** `src/adapters/sparql_client.py` - -```python -import asyncio -import random -from enum import Enum -from dataclasses import dataclass -from typing import Optional -import time - -class CircuitState(Enum): - CLOSED = "closed" # Normal operation - OPEN = "open" # Failing, reject requests - HALF_OPEN = "half_open" # Testing if recovered - -@dataclass -class CircuitBreakerConfig: - failure_threshold: int = 5 - recovery_timeout: float = 30.0 - half_open_max_calls: int = 1 - success_threshold: int = 2 - -class SparqlCircuitBreaker: - """Circuit breaker for SPARQL endpoint protection.""" - - def __init__(self, config: CircuitBreakerConfig = None): - self.config = config or CircuitBreakerConfig() - self.state = CircuitState.CLOSED - self.failure_count = 0 - self.success_count = 0 - self.last_failure_time: Optional[float] = None - self.half_open_calls = 0 - self._lock = asyncio.Lock() - - async def call(self, func, *args, **kwargs): - """Execute function with circuit breaker protection.""" - async with self._lock: - if self.state == CircuitState.OPEN: - if self._should_attempt_reset(): - self.state = CircuitState.HALF_OPEN - self.half_open_calls = 0 - else: - raise CircuitBreakerOpen("SPARQL endpoint circuit breaker is OPEN") - - if self.state == CircuitState.HALF_OPEN: - if self.half_open_calls >= self.config.half_open_max_calls: - raise CircuitBreakerOpen("Circuit breaker half-open limit reached") - self.half_open_calls += 1 - - # Execute the call - try: - result = await func(*args, **kwargs) - await self._on_success() - return result - except Exception as e: - await self._on_failure() - raise - - def _should_attempt_reset(self) -> bool: - """Check if enough time has passed to try reset.""" - if self.last_failure_time is None: - return True - elapsed = time.time() - self.last_failure_time - return elapsed >= self.config.recovery_timeout - - async def _on_success(self): - """Handle successful call.""" - async with self._lock: - if self.state == CircuitState.HALF_OPEN: - self.success_count += 1 - if self.success_count >= self.config.success_threshold: - self.state = CircuitState.CLOSED - self.failure_count = 0 - self.success_count = 0 - else: - self.failure_count = max(0, self.failure_count - 1) - - async def _on_failure(self): - """Handle failed call.""" - async with self._lock: - self.failure_count += 1 - self.last_failure_time = time.time() - - if self.state == CircuitState.HALF_OPEN: - self.state = CircuitState.OPEN - elif self.failure_count >= self.config.failure_threshold: - self.state = CircuitState.OPEN - -class CircuitBreakerOpen(Exception): - """Exception raised when circuit breaker is open.""" - pass - - -# Integration with SPARQL client -class ResilientSparqlClient(SafeSparqlClient): - """SPARQL client with circuit breaker and retry logic.""" - - def __init__(self, endpoints: List[SparqlEndpoint]): - super().__init__(endpoints) - self.circuit_breakers = { - endpoint.url: SparqlCircuitBreaker() - for endpoint in endpoints - } - - async def _dispatch( - self, - query: str, - *, - timeout: float | None = None, - max_retries: int = 3 - ) -> dict[str, Any]: - """Dispatch with circuit breaker and exponential backoff.""" - last_error: Exception | None = None - - for endpoint in self._endpoints: - circuit_breaker = self.circuit_breakers[endpoint.url] - - for attempt in range(max_retries): - try: - # Use circuit breaker - result = await circuit_breaker.call( - self._execute_single, - endpoint, - query, - timeout - ) - return result - - except CircuitBreakerOpen: - # Skip to next endpoint - break - except Exception as exc: - last_error = exc - - # Exponential backoff with jitter - if attempt < max_retries - 1: - delay = (2 ** attempt) + random.uniform(0, 1) - await asyncio.sleep(delay) - - raise SparqlUpstreamError(f"All endpoints failed: {last_error}") - - -# Fallback mechanism -class SparqlClientWithFallback(ResilientSparqlClient): - """SPARQL client with fallback to cache on failure.""" - - def __init__(self, endpoints: List[SparqlEndpoint], cache: Cache): - super().__init__(endpoints) - self.cache = cache - - async def query_with_fallback( - self, - name: str, - parameters: Mapping[str, Any] | None = None, - use_cache_on_failure: bool = True - ) -> dict: - """Query with fallback to cache on failure.""" - cache_key = f"{name}:{hash(str(parameters))}" - - try: - # Try live query - result = await self.query(name, parameters) - - # Cache successful result - await self.cache.set(cache_key, result, ttl=3600) - - return result - - except SparqlUpstreamError as e: - if not use_cache_on_failure: - raise - - # Try cache fallback - cached = await self.cache.get(cache_key) - if cached: - return { - "results": cached, - "source": "cache", - "warning": "Results from cache due to upstream failure" - } - - # Return empty result with warning - return { - "results": [], - "source": "fallback", - "warning": f"Upstream failure: {e}. No cached data available." - } -``` - ---- - -## 3. Electronic Signatures (21 CFR Part 11) - -**File:** `src/services/draft_store/signing.py` - -```python -from cryptography.hazmat.primitives import hashes, serialization -from cryptography.hazmat.primitives.asymmetric import padding, rsa -from cryptography.hazmat.backends import default_backend -from cryptography.exceptions import InvalidSignature -from datetime import datetime -from typing import List, Optional, Literal -from pydantic import BaseModel -import base64 -import hashlib - -class ElectronicSignature(BaseModel): - """Electronic signature per 21 CFR Part 11.""" - - signer_user_id: str - signature_meaning: Literal["authored", "reviewed", "approved"] - timestamp_utc: str - content_hash: str # SHA-256 of signed content - signature_value: str # Base64-encoded signature - cert_chain: List[str] # PEM-encoded certificates - - def verify(self, content: bytes, trusted_certs: List[str]) -> bool: - """Verify signature against content.""" - # Verify content hash - computed_hash = hashlib.sha256(content).hexdigest() - if computed_hash != self.content_hash: - return False - - # Verify signature - try: - public_key = self._extract_public_key() - signature_bytes = base64.b64decode(self.signature_value) - - public_key.verify( - signature_bytes, - self.content_hash.encode(), - padding.PSS( - mgf=padding.MGF1(hashes.SHA256()), - salt_length=padding.PSS.MAX_LENGTH - ), - hashes.SHA256() - ) - return True - except InvalidSignature: - return False - - def _extract_public_key(self): - """Extract public key from certificate chain.""" - if not self.cert_chain: - raise ValueError("No certificate chain provided") - - cert_pem = self.cert_chain[0] - cert = serialization.load_pem_x509_certificate( - cert_pem.encode(), - default_backend() - ) - return cert.public_key() - -class SignatureService: - """Service for creating and verifying electronic signatures.""" - - def __init__(self, private_key_path: str, cert_path: str): - self.private_key = self._load_private_key(private_key_path) - self.certificate = self._load_certificate(cert_path) - - def sign_content( - self, - content: bytes, - signer_user_id: str, - meaning: Literal["authored", "reviewed", "approved"] - ) -> ElectronicSignature: - """Sign content electronically.""" - # Compute content hash - content_hash = hashlib.sha256(content).hexdigest() - - # Create signature - signature = self.private_key.sign( - content_hash.encode(), - padding.PSS( - mgf=padding.MGF1(hashes.SHA256()), - salt_length=padding.PSS.MAX_LENGTH - ), - hashes.SHA256() - ) - - return ElectronicSignature( - signer_user_id=signer_user_id, - signature_meaning=meaning, - timestamp_utc=datetime.utcnow().isoformat(), - content_hash=content_hash, - signature_value=base64.b64encode(signature).decode(), - cert_chain=[self.certificate] - ) - - def _load_private_key(self, path: str): - """Load private key from file.""" - with open(path, "rb") as f: - return serialization.load_pem_private_key( - f.read(), - password=None, - backend=default_backend() - ) - - def _load_certificate(self, path: str) -> str: - """Load certificate from file.""" - with open(path, "r") as f: - return f.read() - - -# Integration with draft store -from dataclasses import dataclass, field -from typing import List - -@dataclass -class VersionMetadata: - """Version metadata with electronic signatures.""" - - author: str - signatures: List[ElectronicSignature] = field(default_factory=list) - checksum: str = "" # REQUIRED - previous_checksum: str = "" # REQUIRED - created_at: str = field(default_factory=lambda: datetime.utcnow().isoformat()) - - def add_signature(self, signature: ElectronicSignature): - """Add electronic signature.""" - self.signatures.append(signature) - - def verify_signatures(self, content: bytes, trusted_certs: List[str]) -> bool: - """Verify all signatures.""" - if not self.signatures: - return False - - for sig in self.signatures: - if not sig.verify(content, trusted_certs): - return False - - return True - -class SignedDraftStore: - """Draft store with electronic signature support.""" - - def __init__(self, signature_service: SignatureService): - self.signature_service = signature_service - - async def sign_draft( - self, - draft_id: str, - user_id: str, - meaning: Literal["authored", "reviewed", "approved"], - content: bytes - ): - """Sign a draft electronically.""" - signature = self.signature_service.sign_content( - content=content, - signer_user_id=user_id, - meaning=meaning - ) - - draft = await self.get_draft(draft_id) - draft.metadata.add_signature(signature) - - await self.save_draft(draft) - - async def verify_draft(self, draft_id: str, trusted_certs: List[str]) -> bool: - """Verify all signatures on a draft.""" - draft = await self.get_draft(draft_id) - content = await self.get_draft_content(draft_id) - - return draft.metadata.verify_signatures(content, trusted_certs) -``` - ---- - -## 4. Ontology Migration Framework - -**File:** `src/semantic/migration.py` - -```python -from typing import Dict, List, Callable, Any -from pydantic import BaseModel -import json - -class OntologyVersion(BaseModel): - """Ontology version identifier.""" - name: str - version: str # Semantic version - -class MigrationRule(BaseModel): - """Single migration rule.""" - source_version: str - target_version: str - transformer: Callable[[Any], Any] - description: str - -class OntologyMigrator: - """Migrate data between ontology versions.""" - - def __init__(self): - self.migrations: Dict[str, List[MigrationRule]] = {} - self.term_mappings: Dict[str, Dict[str, str]] = {} - - def register_migration( - self, - source: str, - target: str, - transformer: Callable[[Any], Any], - description: str = "" - ): - """Register a migration rule.""" - key = f"{source}->{target}" - if key not in self.migrations: - self.migrations[key] = [] - - self.migrations[key].append(MigrationRule( - source_version=source, - target_version=target, - transformer=transformer, - description=description - )) - - def register_term_mapping(self, version: str, mappings: Dict[str, str]): - """Register term mappings for a version transition.""" - self.term_mappings[version] = mappings - - def migrate(self, data: Any, from_version: str, to_version: str) -> Any: - """Migrate data from one version to another.""" - if from_version == to_version: - return data - - # Find migration path - path = self._find_migration_path(from_version, to_version) - if not path: - raise UnsupportedMigration( - f"No migration path from {from_version} to {to_version}" - ) - - # Apply migrations in sequence - result = data - for step in path: - result = self._apply_migration(result, step) - - return result - - def _find_migration_path(self, from_version: str, to_version: str) -> List[str]: - """Find shortest migration path using BFS.""" - # Simplified BFS - production would use proper graph algorithm - visited = {from_version} - queue = [(from_version, [])] - - while queue: - current, path = queue.pop(0) - - if current == to_version: - return path - - # Find all possible next versions - for key in self.migrations: - if key.startswith(f"{current}->"): - next_version = key.split("->")[1] - if next_version not in visited: - visited.add(next_version) - queue.append((next_version, path + [key])) - - return None - - def _apply_migration(self, data: Any, migration_key: str) -> Any: - """Apply a single migration step.""" - rules = self.migrations.get(migration_key, []) - - for rule in rules: - data = rule.transformer(data) - - # Apply term mappings - version = migration_key.split("->")[1] - if version in self.term_mappings: - data = self._apply_term_mappings(data, self.term_mappings[version]) - - return data - - def _apply_term_mappings(self, data: Any, mappings: Dict[str, str]) -> Any: - """Apply term mappings to data.""" - if isinstance(data, dict): - return { - mappings.get(k, k): self._apply_term_mappings(v, mappings) - for k, v in data.items() - } - elif isinstance(data, list): - return [self._apply_term_mappings(item, mappings) for item in data] - elif isinstance(data, str): - return mappings.get(data, data) - return data - - -# Predefined migrations -migrator = OntologyMigrator() - -# AOP ontology v1 to v2 migration -migrator.register_term_mapping("aop-ontology-v2", { - "AOP:123": "AOP:123v2", - "KE:456": "KE:456v2", - "KER:789": "KER:789v2", -}) - -def migrate_aop_structure_v1_to_v2(data: dict) -> dict: - """Migrate AOP structure from v1 to v2.""" - if "key_events" in data: - # v2 uses 'key_event_relationships' instead of 'key_events' - data["key_event_relationships"] = data.pop("key_events") - - if "molecular_initiating_event" in data: - # v2 nests MIE under 'events' - data["events"] = { - "molecular_initiating_event": data.pop("molecular_initiating_event") - } - - return data - -migrator.register_migration( - source="aop-ontology-v1", - target="aop-ontology-v2", - transformer=migrate_aop_structure_v1_to_v2, - description="Migrate AOP structure to v2 format" -) - - -# Usage in CURIE service -class MigratingCurieService: - """CURIE service with migration support.""" - - def __init__(self, migrator: OntologyMigrator): - self.migrator = migrator - self.current_version = "aop-ontology-v2" - - def normalize(self, value: str, target_version: str = None) -> str: - """Normalize CURIE with optional version migration.""" - # Extract version from CURIE if present - curie_version = self._extract_version(value) - - if curie_version and curie_version != (target_version or self.current_version): - # Need to migrate - data = {"curie": value} - migrated = self.migrator.migrate( - data, - from_version=curie_version, - to_version=target_version or self.current_version - ) - return migrated["curie"] - - return value - - def _extract_version(self, curie: str) -> Optional[str]: - """Extract version from CURIE if present.""" - # Example: AOP:123v2 -> aop-ontology-v2 - if "v" in curie: - parts = curie.split(":") - if len(parts) == 2: - id_part = parts[1] - if "v" in id_part: - version = id_part.split("v")[-1] - return f"aop-ontology-v{version}" - return None -``` - ---- - -*These remediation code snippets address the critical issues identified in the AOP-MCP audit.* diff --git a/ToxMCP_Audit_Reviewed_v2/cognitive_ergonomics_audit_report.md b/ToxMCP_Audit_Reviewed_v2/cognitive_ergonomics_audit_report.md deleted file mode 100644 index ae11fbf..0000000 --- a/ToxMCP_Audit_Reviewed_v2/cognitive_ergonomics_audit_report.md +++ /dev/null @@ -1,413 +0,0 @@ -# ToxMCP Suite - Cognitive Ergonomics Audit Report - -**Auditor:** Cognitive Ergonomics Designer -**Date:** April 2026 -**Scope:** comptox-mcp, oqt-mcp, aop-mcp, pbpk-mcp - ---- - -> **Reviewed copy (2026-04-15):** This document was retained from the original package but lightly edited for consistency. -> Unless explicitly stated otherwise, code blocks are **reference implementations**, not validated patches, and scenario-based exploit narratives should not be read as reproduced proofs. - - - -## Executive Summary - -This audit identifies **critical cognitive ergonomics failures** in the ToxMCP ecosystem that could lead scientists to erroneous conclusions. The suite enables rapid "audit-ready" PDF generation without adequate safeguards, creating a dangerous illusion of scientific rigor. - -### Key Finding: The "Foot-Gun" Pattern - -The ToxMCP suite provides powerful automation for toxicology workflows but lacks critical guardrails that prevent non-programmer scientists from: -1. Accepting ambiguous chemical identifications -2. Trusting unverified PDF outputs as "audit-ready" -3. Proceeding without human verification of critical assumptions -4. Conflating units across different measurement contexts - ---- - -## 🔴 CRITICAL FINDINGS - -### CR-001: No Mandatory Scientific Review Mode for Autonomous Chains - -**Severity:** 🔴 CRITICAL -**File:** `oqt-mcp/src/tools/implementations/workflow_runner.py` (lines 60-98) -**Cross-ref:** CR-002, CR-003 - -**Issue:** The workflow runner can fully automate a safety assessment from chemical search to PDF generation without requiring human verification of critical assumptions at any point. - -```python -# From workflow_runner.py - WorkflowParams class -class WorkflowParams(BaseModel): - identifier: str = Field(..., description="Chemical identifier") - search_type: str = Field("auto", description="How to interpret the identifier") - # ... no mandatory review checkpoint parameter - qsar_mode: str = Field("recommended", description="QSAR execution preset") -``` - -**How Scientists Could Be Misled:** -- A non-programmer scientist could run `run_workflow` with a chemical name -- The system could resolve to the wrong chemical (e.g., wrong isomer) -- QSAR predictions would run on the wrong substance -- A PDF would be generated with "audit-ready" claims -- The scientist would have no indication that verification was needed - -**Missing Safeguard:** There is no `require_human_review: true` parameter that forces a pause for verification before proceeding to predictive modeling. - ---- - -### CR-002: PDF Generator Lacks Provenance Tables by Default - -**Severity:** 🔴 CRITICAL -**File:** `oqt-mcp/src/utils/pdf_generator.py` (lines 1-104) -**Cross-ref:** CR-001, HG-001 - -**Issue:** The PDF generator creates "audit-ready" reports without mandatory provenance tables showing data sources, versions, and confidence levels. - -```python -# From pdf_generator.py - _build_content function -lines = [ - "O-QT MCP Workflow Report", - "", - f"Generated: {datetime.utcnow().isoformat(timespec='seconds')}Z", - "", -] -lines.append("Summary") -# ... NO provenance table included by default -``` - -**How Scientists Could Be Misled:** -- PDF appears professional and complete -- No visible indication of which QSAR models were used -- No version information for the OECD QSAR Toolbox -- No confidence intervals or applicability domain warnings visible -- Scientist presents PDF to regulators as "audit-ready" evidence - -**Missing Safeguard:** No `include_provenance_table: true` default parameter. - ---- - -### CR-003: Confirmation Bias Amplification Through Rapid PDF Generation - -**Severity:** 🔴 CRITICAL -**File:** `oqt-mcp/src/tools/implementations/workflow_runner.py` (lines 330-392) -**Cross-ref:** CR-001, MD-001 - -**Issue:** The system generates PDFs quickly without any "red team" analysis that would surface contradictory evidence or alternative hypotheses. - -```python -# From workflow_runner.py - artifact generation -artifacts = { - "json": _build_artifact_entry(...), - "markdown": _build_artifact_entry(...), - "pdf": _build_artifact_entry(...), # Always generates PDF -} -``` - -**How Scientists Could Be Misled:** -- First result is presented as "the" result -- No automatic generation of alternative interpretations -- No highlighting of data gaps or conflicting evidence -- PDF format creates false sense of finality -- Scientist stops investigating after seeing first "positive" result - -**Missing Safeguard:** No `generate_alternative_hypotheses: true` option or `include_contradictory_evidence: true` parameter. - ---- - -## 🟠 HIGH SEVERITY FINDINGS - -### HG-001: Chemical Search Defaults to "auto" Without Warning - -**Severity:** 🟠 HIGH -**File:** `oqt-mcp/src/tools/implementations/o_qt_qsar_tools.py` (lines 61-67) -**Cross-ref:** CR-001 - -**Issue:** The `search_chemicals` tool defaults to `search_type: "auto"` which may silently match the wrong chemical. - -```python -class ChemicalSearchParams(BaseModel): - query: str = Field(..., description="The search term") - search_type: str = Field( - "auto", # DEFAULT DANGER: Auto-detection can be wrong - description="Type of search (e.g., 'auto', 'name', 'cas', 'smiles')." - ) -``` - -**How Scientists Could Be Misled:** -- Scientist searches for "benzene" with default "auto" mode -- System might interpret as SMILES instead of name -- Returns wrong chemical or no results -- Scientist concludes chemical not in database -- Or worse: proceeds with incorrect chemical identification - -**Concrete Example:** -```python -# User searches for CAS "50-00-0" (formaldehyde) -# search_type="auto" might interpret as SMILES "50-00-0" -# Returns no results or wrong chemical -search_chemicals(query="50-00-0", search_type="auto") # DANGEROUS -``` - -**Missing Safeguard:** No warning when "auto" detection is uncertain; no explicit confirmation of chemical identity before proceeding. - ---- - -### HG-002: AOP Version Not Captured in get_aop Output - -**Severity:** 🟠 HIGH -**File:** `aop-mcp/src/server/tools/aop.py` (lines 52-70) -**Cross-ref:** MD-001 - -**Issue:** The `get_aop` tool fetches current AOP-Wiki data without capturing the specific version or timestamp, making reproducibility impossible. - -```python -class GetAopInput(BaseModel): - aop_id: str # No version parameter - -async def get_aop(params: GetAopInput) -> dict[str, Any]: - wiki_adapter = get_aop_wiki_adapter() - db_adapter = get_aop_db_adapter() - core_record, assessment_record, stressor_records = await asyncio.gather( - wiki_adapter.get_aop(params.aop_id), # No version specified - wiki_adapter.get_aop_assessment(params.aop_id), - db_adapter.list_stressor_chemicals_for_aop(params.aop_id), - ) -``` - -**How Scientists Could Be Misled:** -- Scientist runs assessment in January, AOP has 3 key events -- AOP is updated in March with new key event -- Scientist re-runs same query in April -- Results are different but no warning is given -- Scientist doesn't realize conclusions have changed -- Regulatory submission contains inconsistent assessments - -**Missing Safeguard:** No `version` parameter; no `retrieved_at` timestamp in output; no warning when AOP has been modified since last retrieval. - ---- - -### HG-003: Unit Fields Present But Not Validated - -**Severity:** 🟠 HIGH -**File:** `pbpk-mcp/src/mcp_bridge/routes/simulation.py` (lines 200-218) -**Cross-ref:** MD-002 - -**Issue:** Unit fields exist in the schema but there's no validation to prevent unit confusion errors. - -```python -class SetParameterValueRequest(GetParameterValueRequest): - value: float - unit: Optional[str] = None # Present but not validated - update_mode: Optional[str] = Field(default="absolute", alias="updateMode") - comment: Optional[str] = None - confirm: Optional[bool] = None -``` - -**How Scientists Could Be Misled:** -- Scientist sets liver volume to "1.5" with unit "L" (liters) -- System expects "mL" (milliliters) -- Simulation runs with 1000x wrong volume -- PK parameters are calculated incorrectly -- No error is raised; results appear valid - -**Missing Safeguard:** No unit validation against expected units; no conversion warnings; no dimensional analysis. - ---- - -### HG-004: Confirmation System Can Be Bypassed - -**Severity:** 🟠 HIGH -**File:** `pbpk-mcp/src/mcp_bridge/security/confirmation.py` (lines 1-38) -**Cross-ref:** CR-001 - -**Issue:** The confirmation system for critical operations relies on a simple header check that can be easily bypassed by automated agents. - -```python -_TRUE_VALUES = {"true", "1", "yes", "y", "confirmed"} - -def is_confirmed(request: Request) -> bool: - header_value = request.headers.get(CONFIRMATION_HEADER) - if not header_value: - return False - return header_value.split(",")[0].strip().lower() in _TRUE_VALUES -``` - -**How Scientists Could Be Misled:** -- Agent chain includes `confirm: true` in all requests -- Critical operations proceed without actual human review -- Scientist believes system has "guardrails" -- In reality, guardrails are cosmetic only - -**Missing Safeguard:** No out-of-band confirmation (e.g., email, separate UI); no rate limiting on confirmations; no audit of who confirmed. - ---- - -## 🟡 MEDIUM SEVERITY FINDINGS - -### MD-001: Temporal Confusion in AOP Assessment - -**Severity:** 🟡 MEDIUM -**File:** `aop-mcp/src/server/tools/aop.py` (lines 152-291) -**Cross-ref:** HG-002 - -**Issue:** The `assess_aop_confidence` tool aggregates evidence without tracking when each piece of evidence was added or modified. - -```python -async def assess_aop_confidence(params: AssessAopConfidenceInput) -> dict[str, Any]: - # ... fetches current data - confidence_dimensions = _build_confidence_dimensions(aop, key_event_details, ker_details) - # No temporal metadata about when evidence was added -``` - -**How Scientists Could Be Misled:** -- Assessment shows "strong" empirical support -- Scientist doesn't realize evidence was added last week -- Previous assessment from 3 months ago showed "moderate" -- No way to track when confidence changed or why - -**Missing Safeguard:** No `evidence_timestamp` field; no `assessment_version` tracking. - ---- - -### MD-002: Unit Ambiguity in PK Parameter Output - -**Severity:** 🟡 MEDIUM -**File:** `pbpk-mcp/src/mcp_bridge/routes/simulation.py` (lines 314-326) -**Cross-ref:** HG-003 - -**Issue:** PK parameter units are returned as strings without standardized formatting, risking misinterpretation. - -```python -class PkMetricModel(CamelModel): - parameter: str - unit: Optional[str] = None # Free text, not validated - cmax: Optional[float] = Field(default=None, alias="cmax") - tmax: Optional[float] = Field(default=None, alias="tmax") - auc: Optional[float] = Field(default=None, alias="auc") -``` - -**How Scientists Could Be Misled:** -- AUC returned as "10" with unit "mg/L*h" -- Scientist interprets as "10 mg/(L*h)" when it's "(10 mg/L)*h" -- Dosing calculations are off by orders of magnitude - -**Missing Safeguard:** No standardized unit format (e.g., UCUM); no unit validation; no dimensional analysis. - ---- - -### MD-003: Fallback Search Mode Silently Changes Results - -**Severity:** 🟡 MEDIUM -**File:** `comptox-mcp/src/epacomp_tox/resources/chemical.py` (lines 450-547) -**Cross-ref:** HG-001 - -**Issue:** The `resolve_chemical_identifier` tool uses fallback search modes without requiring explicit user acknowledgment. - -```python -def resolve_chemical_identifier( - self, - *, - identifier: str, - identifier_type: Optional[str] = None, - allow_fallback: bool = False, # Must be explicitly set to True - max_candidates: int = 5, -) -> Dict[str, Any]: -``` - -**How Scientists Could Be Misled:** -- Scientist sets `allow_fallback=True` to handle edge cases -- Exact match fails, fallback to "contains" returns multiple candidates -- System returns "ambiguous" status but scientist's script ignores it -- First candidate is used without verification -- Wrong chemical proceeds through workflow - -**Missing Safeguard:** No mandatory pause when fallback is used; no requirement to explicitly select from candidates. - ---- - -### MD-004: QSAR Mode "recommended" Is Opaque - -**Severity:** 🟡 MEDIUM -**File:** `oqt-mcp/src/tools/implementations/workflow_runner.py` (lines 75-78) -**Cross-ref:** CR-001 - -**Issue:** The default `qsar_mode: "recommended"` doesn't explain which models are selected or why. - -```python -qsar_mode: str = Field( - "recommended", # What does "recommended" mean? - description="QSAR execution preset (`recommended`, `all`, or `none`).", -) -``` - -**How Scientists Could Be Misled:** -- Scientist uses default "recommended" mode -- Doesn't realize only 3 of 15 available models were run -- Reports "QSAR analysis complete" when it was partial -- Regulator assumes comprehensive analysis was performed - -**Missing Safeguard:** No transparency about which models are in "recommended" set; no warning when models are excluded. - ---- - -## CROSS-REFERENCE MATRIX - -| Finding | CR-001 | CR-002 | CR-003 | HG-001 | HG-002 | HG-003 | HG-004 | MD-001 | MD-002 | MD-003 | MD-004 | -|---------|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------| -| CR-001 | - | X | X | X | | | X | | | | X | -| CR-002 | X | - | X | | | | | | | | | -| CR-003 | X | X | - | | | | | X | | | | -| HG-001 | X | | | - | | | | | | X | | -| HG-002 | | | | | - | | | X | | | | -| HG-003 | | | | | | - | | | X | | | -| HG-004 | X | | | | | | - | | | | | -| MD-001 | | | X | | X | | | - | | | | -| MD-002 | | | | | | X | | | - | | | -| MD-003 | | | | X | | | | | | - | | -| MD-004 | X | | | | | | | | | | - | - ---- - -## RECOMMENDATIONS - -### Immediate Actions Required - -1. **Implement Mandatory Scientific Review Mode** - - Add `require_human_review: true` parameter to all workflow tools - - Require explicit acknowledgment before proceeding to predictive modeling - - Log reviewer identity and timestamp - -2. **Add Provenance Tables to All PDFs** - - Include data sources, versions, retrieval timestamps - - List all models used with confidence intervals - - Show applicability domain warnings prominently - -3. **Implement Red Team Analysis** - - Generate alternative hypotheses automatically - - Surface contradictory evidence - - Include confidence intervals and uncertainty quantification - -4. **Add Version Tracking to AOP Tools** - - Include `retrieved_at` timestamp in all outputs - - Warn when AOP has been modified since last retrieval - - Support explicit version selection - -5. **Implement Unit Validation** - - Use standardized unit formats (UCUM) - - Validate units against expected dimensions - - Require explicit unit confirmation for critical parameters - ---- - -## CONCLUSION - -The ToxMCP suite provides powerful automation capabilities but currently prioritizes convenience over scientific rigor. The lack of mandatory verification steps, combined with rapid PDF generation, creates a dangerous "foot-gun" pattern where well-intentioned scientists can unknowingly produce erroneous assessments. - -**The most critical issue is the absence of a mandatory scientific review mode.** An autonomous agent can currently execute a complete safety assessment workflow—from ambiguous chemical search to "audit-ready" PDF—without any human verification of critical assumptions. - -Without these safeguards, the ToxMCP suite risks becoming a tool for generating convincing-looking but potentially erroneous toxicology assessments. - ---- - -*Report generated by Cognitive Ergonomics Designer* -*For the ToxMCP Ecosystem Orchestrator* diff --git a/ToxMCP_Audit_Reviewed_v2/comptox-mcp-audit/README.md b/ToxMCP_Audit_Reviewed_v2/comptox-mcp-audit/README.md deleted file mode 100644 index 4c3007e..0000000 --- a/ToxMCP_Audit_Reviewed_v2/comptox-mcp-audit/README.md +++ /dev/null @@ -1,173 +0,0 @@ -# CompTox-MCP Audit Package (Reviewed Copy) - -**Repository:** `comptox-mcp` -**Package version cited in original audit:** `v0.2.2` -**Review date:** 2026-04-15 -**Overall posture:** **High risk for defensibility and provenance**, more than for classic appsec - ---- - -## How to read this reviewed copy - -This summary is designed to be safer to circulate internally than the original draft. - -- **Observed** means the claim is grounded in the supplied audit material. -- **Observed + inferred** means the material supports a broader architecture conclusion. -- **Scenario** means the issue is threat-model relevant but still needs runtime validation. - -This is **not** a live-repo re-audit. Line references were inherited from the supplied package. - ---- - -## Finding register - -| ID | Finding | Severity | Evidence basis | Confidence | Reviewed interpretation | -|---|---|---|---|---|---| -| CTX-01 | Upstream provenance / version capture is not first-class | **Critical** | Observed + inferred | Medium-High | Historical outputs may be hard to defend if provider versions or snapshots are not recorded | -| CTX-02 | Audit trail can fall back to ordinary logging semantics | **Critical** | Observed | High | Tamper evidence and reconstruction are weaker than they should be | -| CTX-03 | Retry strategy lacks mature backoff/jitter guidance | **High** | Observed | Medium | Could amplify upstream instability under load | -| CTX-04 | Transport and protocol handling is locally implemented | **High** | Observed | Medium-High | Migration cost and consistency risk increase as MCP evolves | -| CTX-05 | Upstream data integrity relies heavily on external providers | **High** | Scenario | Medium | Provenance and consistency controls should not depend on unsupported supplier features | - ---- - -## Why this repo matters in the suite - -`comptox-mcp` is a provenance-sensitive edge of the ToxMCP system because it often sits near: -- upstream evidence retrieval -- identity resolution and hazard context -- hand-off into downstream reasoning - -That means small omissions here can cascade into larger suite-level defensibility gaps later. - ---- - -## Detailed findings - -### CTX-01: Upstream provenance / version capture is incomplete -**Severity:** **Critical** -**Evidence basis:** Observed + inferred -**Confidence:** Medium-High - -The original audit correctly flagged that the package does not clearly show a robust mechanism to record: -- upstream provider version or release identifier -- data snapshot or retrieval timestamp -- request parameters used -- response hash or cache key -- how that metadata is persisted into downstream workflow records - -### Reviewed wording -The strongest defensible claim is **not** that every upstream supports strict version pinning. -It is that the current package does not show a reliable suite-level way to **capture and replay upstream provenance**. - -### Recommended control -Use the strongest control the provider actually supports: -1. if the provider exposes a version/snapshot selector, record and enforce it -2. if not, capture request URL, query params, retrieval time, response hash, and cache identity -3. persist that metadata into the workflow/provenance envelope -4. prefer an internal retrieval proxy if deterministic replay is a requirement - -> Do **not** assume that custom headers like `X-API-Version` or `X-Data-Snapshot` are supported unless the upstream provider documents them. - ---- - -### CTX-02: Audit trail design is weaker than required for defensibility -**Severity:** **Critical** -**Evidence basis:** Observed -**Confidence:** High - -The original audit’s concern about fallback-to-logging behavior remains strong. If audit events can devolve into ordinary logs without: -- chain validation -- content-addressed records -- user/session context -- immutable or append-controlled storage semantics - -then the resulting trail is unlikely to support strong post-hoc reconstruction. - -### Reviewed wording -Use: **high risk of non-conformance for regulated or high-assurance use** -Avoid: automatic claims of guaranteed regulatory rejection. - -### Recommended control -- define a canonical audit-event envelope -- include prior hash / content hash -- bind event to actor, session, tool, input identity, and upstream provenance -- verify the chain on read, not only on write - ---- - -### CTX-03: Retry behavior can worsen upstream instability -**Severity:** **High** -**Evidence basis:** Observed -**Confidence:** Medium - -This is a classic operational risk rather than a unique toxicology issue. Without jitter, bounded retries, and explicit failure-mode policy, a stressed upstream can trigger synchronized retries and unpredictable latency. - -### Recommended control -- exponential backoff with jitter -- hard retry caps -- surface upstream instability in provenance and alerts -- decide explicitly whether failures should be cached, retried later, or returned as partial results - ---- - -### CTX-04: Transport/protocol logic is fragmented -**Severity:** **High** -**Evidence basis:** Observed -**Confidence:** Medium-High - -The original package was directionally correct: local protocol handling increases long-term migration and consistency cost. - -### Recommended control -- centralize transport/version handling in a shared package or shared adapter layer -- keep server logic separate from transport concerns -- make capability/version negotiation testable at the boundary - ---- - -### CTX-05: Upstream integrity should not rely on unsupported supplier-side signing -**Severity:** **High** -**Evidence basis:** Scenario -**Confidence:** Medium - -The original audit’s concern about supplier dependence is valid, but the reviewed copy tightens the mitigation guidance. - -### Better control pattern -Prefer this order of controls: -1. TLS and authenticated transport where available -2. request/response provenance capture -3. cached response hashing -4. consistency checks across time or across sources for high-value conclusions -5. provider-side signatures **only if the provider actually supports them** - ---- - -## Recommended sequence - -### Immediate -- define the provenance fields that downstream repos must receive from `comptox-mcp` -- harden audit-event structure -- add retry jitter/backoff - -### Next -- align transport/version handling with the suite -- define provider capability matrix for versioning/snapshots -- add fix verification tests for audit chain and provenance persistence - ---- - -## Validation backlog specific to this repo - -- verify what upstream services actually expose for version or snapshot control -- confirm where provenance fields are persisted and consumed downstream -- test audit chain recomputation from stored content -- load-test retry behavior against realistic upstream failures - ---- - -## Related documents - -- `TOXMCP_MASTER_AUDIT_REPORT.md` -- `toxmcp_regulatory_audit_report.md` -- `toxmcp_future_proofing_audit_report.md` -- `comptox-mcp-audit/REMEDIATION_CODE.md` diff --git a/ToxMCP_Audit_Reviewed_v2/comptox-mcp-audit/REMEDIATION_CODE.md b/ToxMCP_Audit_Reviewed_v2/comptox-mcp-audit/REMEDIATION_CODE.md deleted file mode 100644 index 01c6457..0000000 --- a/ToxMCP_Audit_Reviewed_v2/comptox-mcp-audit/REMEDIATION_CODE.md +++ /dev/null @@ -1,649 +0,0 @@ -# CompTox-MCP: Detailed Remediation Code - -> **Reviewed copy note:** Treat these snippets as reference patterns. Do not assume upstream providers support custom version headers or response signing unless those features are documented by the provider. - - -## 1. Version Pinning for Upstream APIs - -**Reviewed caution:** If an upstream provider does not expose explicit version or snapshot selectors, capture request/response provenance internally instead of inventing unsupported protocol features. - -**File:** `src/epacomp_tox/client.py` - -```python -from typing import Dict, Optional -from pydantic import BaseModel -import httpx -import hashlib - -class APIVersionConfig(BaseModel): - """Configuration for API version pinning.""" - api_version: str # e.g., "2024-01-15" - data_snapshot_id: str # e.g., "ds_2024_q1_v3" - require_version_header: bool = True - -class VersionedCompToxClient: - """CompTox API client with version pinning.""" - - def __init__( - self, - base_url: str = "https://comptox.epa.gov/ctx-api", - version_config: Optional[APIVersionConfig] = None - ): - self.base_url = base_url - self.version_config = version_config or APIVersionConfig( - api_version="2024-01-15", - data_snapshot_id="latest" - ) - self.client = httpx.AsyncClient() - self.response_cache: Dict[str, dict] = {} - - def _get_version_headers(self) -> Dict[str, str]: - """Get version pinning headers.""" - headers = {} - if self.version_config.require_version_header: - headers["X-API-Version"] = self.version_config.api_version - headers["X-Data-Snapshot"] = self.version_config.data_snapshot_id - return headers - - async def get_chemical_detail( - self, - dtxsid: str, - use_cache: bool = True - ) -> dict: - """Get chemical details with version pinning.""" - cache_key = f"{dtxsid}:{self.version_config.api_version}:{self.version_config.data_snapshot_id}" - - if use_cache and cache_key in self.response_cache: - return self.response_cache[cache_key] - - url = f"{self.base_url}/chemical/detail/{dtxsid}" - headers = self._get_version_headers() - - response = await self.client.get(url, headers=headers) - response.raise_for_status() - - data = response.json() - - # Add version metadata - data["_api_metadata"] = { - "api_version": self.version_config.api_version, - "data_snapshot_id": self.version_config.data_snapshot_id, - "retrieved_at": datetime.utcnow().isoformat(), - "response_hash": hashlib.sha256(response.content).hexdigest()[:16] - } - - if use_cache: - self.response_cache[cache_key] = data - - return data - - async def get_qsar_predictions( - self, - dtxsid: str, - model_id: str - ) -> dict: - """Get QSAR predictions with model version tracking.""" - url = f"{self.base_url}/qsar/predictions/{dtxsid}" - headers = self._get_version_headers() - headers["X-QSAR-Model-ID"] = model_id - - response = await self.client.get(url, headers=headers) - response.raise_for_status() - - data = response.json() - - # Add model version metadata - data["_model_metadata"] = { - "model_id": model_id, - "model_version": response.headers.get("X-QSAR-Model-Version", "unknown"), - "api_version": self.version_config.api_version, - "retrieved_at": datetime.utcnow().isoformat() - } - - return data - - -# Integration with workflow -class VersionedWorkflow: - """Workflow with complete version tracking.""" - - def __init__(self, client: VersionedCompToxClient): - self.client = client - - async def run_assessment(self, dtxsid: str) -> dict: - """Run chemical assessment with full version tracking.""" - # Get chemical details - chemical = await self.client.get_chemical_detail(dtxsid) - - # Get QSAR predictions - predictions = await self.client.get_qsar_predictions( - dtxsid, - model_id="TEST_4.2" - ) - - # Compile evidence with version metadata - evidence = { - "chemical": chemical, - "predictions": predictions, - "assessment_metadata": { - "comptox_api_version": chemical["_api_metadata"]["api_version"], - "data_snapshot_id": chemical["_api_metadata"]["data_snapshot_id"], - "qsar_model_version": predictions["_model_metadata"]["model_version"], - "assessment_timestamp": datetime.utcnow().isoformat() - } - } - - return evidence -``` - ---- - -> **Reviewed copy (2026-04-15):** This document was retained from the original package but lightly edited for consistency. -> Unless explicitly stated otherwise, code blocks are **reference implementations**, not validated patches, and scenario-based exploit narratives should not be read as reproduced proofs. - - - -## 2. Cryptographic Audit Chain - -**File:** `src/epacomp_tox/audit.py` - -```python -import hashlib -import json -import base64 -from datetime import datetime -from typing import Dict, List, Optional, Callable -from cryptography.hazmat.primitives import hashes -from cryptography.hazmat.primitives.asymmetric import padding, rsa -from cryptography.hazmat.backends import default_backend -import os - -class AuditEvent(BaseModel): - """Single audit event with cryptographic verification.""" - event_type: str - timestamp: str - user_id: str - session_id: str - action: str - resource: str - details: Dict - content_hash: str - previous_hash: str - signature: Optional[str] = None - -class CryptographicAuditChain: - """Tamper-evident audit chain.""" - - def __init__(self, private_key_path: Optional[str] = None): - self.previous_hash = "0" * 64 - self.events: List[AuditEvent] = [] - self.sinks: List[Callable[[AuditEvent], None]] = [] - - # Load or generate signing key - if private_key_path and os.path.exists(private_key_path): - self.private_key = self._load_private_key(private_key_path) - else: - self.private_key = self._generate_key() - if private_key_path: - self._save_private_key(private_key_path) - - def emit(self, event_data: Dict, user_id: str, session_id: str) -> AuditEvent: - """Emit audit event with cryptographic chaining.""" - # Compute content hash - content = json.dumps(event_data, sort_keys=True) - content_hash = hashlib.sha256(content.encode()).hexdigest() - - # Create event - event = AuditEvent( - event_type=event_data.get("type", "unknown"), - timestamp=datetime.utcnow().isoformat(), - user_id=user_id, - session_id=session_id, - action=event_data.get("action", "unknown"), - resource=event_data.get("resource", "unknown"), - details=event_data, - content_hash=content_hash, - previous_hash=self.previous_hash - ) - - # Sign event - event.signature = self._sign_event(event) - - # Update chain - self.previous_hash = content_hash - self.events.append(event) - - # Emit to sinks - for sink in self.sinks: - sink(event) - - return event - - def _sign_event(self, event: AuditEvent) -> str: - """Cryptographically sign event.""" - payload = f"{event.content_hash}:{event.previous_hash}:{event.timestamp}" - signature = self.private_key.sign( - payload.encode(), - padding.PSS( - mgf=padding.MGF1(hashes.SHA256()), - salt_length=padding.PSS.MAX_LENGTH - ), - hashes.SHA256() - ) - return base64.b64encode(signature).decode() - - def verify_chain(self) -> bool: - """Verify integrity of entire audit chain.""" - previous_hash = "0" * 64 - - for event in self.events: - # Verify previous hash linkage - if event.previous_hash != previous_hash: - return False - - # Verify content hash - content = json.dumps(event.details, sort_keys=True) - computed_hash = hashlib.sha256(content.encode()).hexdigest() - if computed_hash != event.content_hash: - return False - - # Verify signature - if not self._verify_signature(event): - return False - - previous_hash = event.content_hash - - return True - - def _verify_signature(self, event: AuditEvent) -> bool: - """Verify event signature.""" - try: - payload = f"{event.content_hash}:{event.previous_hash}:{event.timestamp}" - signature = base64.b64decode(event.signature) - - self.private_key.public_key().verify( - signature, - payload.encode(), - padding.PSS( - mgf=padding.MGF1(hashes.SHA256()), - salt_length=padding.PSS.MAX_LENGTH - ), - hashes.SHA256() - ) - return True - except Exception: - return False - - def add_sink(self, sink: Callable[[AuditEvent], None]): - """Add audit sink (e.g., file, database, external service).""" - self.sinks.append(sink) - - def _generate_key(self): - """Generate RSA key pair.""" - return rsa.generate_private_key( - public_exponent=65537, - key_size=2048, - backend=default_backend() - ) - - def _load_private_key(self, path: str): - """Load private key from file.""" - with open(path, "rb") as f: - return serialization.load_pem_private_key( - f.read(), - password=None, - backend=default_backend() - ) - - def _save_private_key(self, path: str): - """Save private key to file.""" - pem = self.private_key.private_bytes( - encoding=serialization.Encoding.PEM, - format=serialization.PrivateFormat.PKCS8, - encryption_algorithm=serialization.NoEncryption() - ) - with open(path, "wb") as f: - f.write(pem) - - -# File-based audit sink with WORM properties -class WORMAuditSink: - """Write-Once-Read-Many audit log sink.""" - - def __init__(self, log_dir: str): - self.log_dir = Path(log_dir) - self.log_dir.mkdir(parents=True, exist_ok=True) - - # Set immutable flag on log directory (Unix) - self._set_immutable() - - def __call__(self, event: AuditEvent): - """Write event to WORM log.""" - date_str = datetime.utcnow().strftime("%Y-%m-%d") - log_file = self.log_dir / f"audit_{date_str}.jsonl" - - # Append-only mode - with open(log_file, "a") as f: - f.write(json.dumps(event.dict(), default=str) + "\n") - f.flush() - os.fsync(f.fileno()) # Ensure write to disk - - # Set immutable flag on file (Unix) - self._set_file_immutable(log_file) - - def _set_immutable(self): - """Set immutable flag on log directory.""" - try: - # Linux: chattr +i - import subprocess - subprocess.run(["chattr", "+a", str(self.log_dir)], check=False) - except Exception: - pass # Not supported on all systems - - def _set_file_immutable(self, file_path: Path): - """Set immutable flag on log file.""" - try: - import subprocess - subprocess.run(["chattr", "+i", str(file_path)], check=False) - except Exception: - pass - - -# Usage -audit_chain = CryptographicAuditChain(private_key_path="/secure/audit_key.pem") -audit_chain.add_sink(WORMAuditSink("/var/log/comptox-mcp/audit")) - -# In API endpoint -async def chemical_search_endpoint(request: Request): - user = authenticate(request) - - audit_chain.emit( - event_data={ - "type": "chemical_search", - "action": "search", - "resource": "chemical", - "query": request.query_params.get("q"), - "results_count": len(results) - }, - user_id=user.id, - session_id=request.session_id - ) -``` - ---- - -## 3. Retry with Exponential Backoff and Jitter - -**File:** `src/epacomp_tox/client.py` - -```python -import random -import asyncio -from typing import TypeVar, Callable -import httpx - -T = TypeVar('T') - -class RetryConfig: - """Configuration for retry behavior.""" - max_retries: int = 3 - base_delay: float = 0.5 - max_delay: float = 60.0 - exponential_base: float = 2.0 - jitter: bool = True - retryable_status_codes: set = {429, 500, 502, 503, 504} - -async def retry_with_backoff( - func: Callable[[], T], - config: RetryConfig = None, - is_retryable: Callable[[Exception], bool] = None -) -> T: - """ - Execute function with exponential backoff and jitter. - - Args: - func: Async function to execute - config: Retry configuration - is_retryable: Function to determine if exception is retryable - - Returns: - Result of func() - - Raises: - Last exception if all retries exhausted - """ - config = config or RetryConfig() - is_retryable = is_retryable or (lambda e: True) - - last_exception = None - - for attempt in range(config.max_retries + 1): - try: - return await func() - except Exception as e: - last_exception = e - - # Check if we should retry - if attempt >= config.max_retries: - raise - - if not is_retryable(e): - raise - - # Calculate delay - delay = config.base_delay * (config.exponential_base ** attempt) - delay = min(delay, config.max_delay) - - # Add jitter - if config.jitter: - delay = delay * (0.5 + random.random()) - - await asyncio.sleep(delay) - - raise last_exception - - -# HTTP-specific retry -async def http_request_with_retry( - client: httpx.AsyncClient, - method: str, - url: str, - **kwargs -) -> httpx.Response: - """Make HTTP request with retry logic.""" - config = RetryConfig() - - def is_retryable_error(e: Exception) -> bool: - """Determine if error is retryable.""" - if isinstance(e, httpx.HTTPStatusError): - return e.response.status_code in config.retryable_status_codes - if isinstance(e, (httpx.ConnectError, httpx.TimeoutException)): - return True - return False - - async def make_request(): - response = await client.request(method, url, **kwargs) - response.raise_for_status() - return response - - return await retry_with_backoff( - make_request, - config=config, - is_retryable=is_retryable_error - ) - - -# Rate limit handling -async def handle_rate_limit(response: httpx.Response) -> float: - """ - Extract retry delay from rate limit response. - - Returns: - Delay in seconds - """ - if response.status_code != 429: - return 0 - - # Check Retry-After header - retry_after = response.headers.get("Retry-After") - if retry_after: - try: - return float(retry_after) - except ValueError: - # Could be HTTP date, parse it - pass - - # Check X-RateLimit-Reset header - reset_timestamp = response.headers.get("X-RateLimit-Reset") - if reset_timestamp: - try: - reset_time = datetime.fromtimestamp(int(reset_timestamp)) - delay = (reset_time - datetime.utcnow()).total_seconds() - return max(delay, 1) - except (ValueError, OSError): - pass - - # Default backoff - return 60.0 -``` - ---- - -## 4. Distributed Tracing - -**File:** `src/epacomp_tox/middleware.py` - -```python -from contextvars import ContextVar -from typing import Optional, Dict -import uuid - -# Context variable for trace ID -trace_id_var: ContextVar[str] = ContextVar('trace_id', default=None) -span_id_var: ContextVar[str] = ContextVar('span_id', default=None) - -class TraceContext: - """W3C Trace Context propagation.""" - - TRACEPARENT_HEADER = "traceparent" - TRACESTATE_HEADER = "tracestate" - - def __init__(self, trace_id: str = None, span_id: str = None): - self.trace_id = trace_id or self._generate_trace_id() - self.span_id = span_id or self._generate_span_id() - self.parent_span_id = None - - @classmethod - def from_headers(cls, headers: Dict[str, str]) -> "TraceContext": - """Parse trace context from HTTP headers.""" - traceparent = headers.get(cls.TRACEPARENT_HEADER) - if traceparent: - # Parse W3C traceparent format: 00-{trace_id}-{span_id}-{flags} - parts = traceparent.split("-") - if len(parts) == 4: - return cls(trace_id=parts[1], span_id=parts[2]) - - return cls() # Generate new context - - def to_headers(self) -> Dict[str, str]: - """Convert to HTTP headers.""" - traceparent = f"00-{self.trace_id}-{self.span_id}-01" - return { - self.TRACEPARENT_HEADER: traceparent - } - - def create_child_span(self) -> "TraceContext": - """Create child span context.""" - child = TraceContext(trace_id=self.trace_id) - child.parent_span_id = self.span_id - return child - - def _generate_trace_id(self) -> str: - """Generate 16-byte hex trace ID.""" - return uuid.uuid4().hex + uuid.uuid4().hex[:16] - - def _generate_span_id(self) -> str: - """Generate 8-byte hex span ID.""" - return uuid.uuid4().hex[:16] - - -# FastAPI middleware -from fastapi import Request, Response -from starlette.middleware.base import BaseHTTPMiddleware - -class TracingMiddleware(BaseHTTPMiddleware): - """Middleware to handle distributed tracing.""" - - async def dispatch(self, request: Request, call_next): - # Extract trace context from incoming request - trace_context = TraceContext.from_headers(dict(request.headers)) - - # Set context variables - trace_id_var.set(trace_context.trace_id) - span_id_var.set(trace_context.span_id) - - # Add trace context to request state - request.state.trace_context = trace_context - - # Process request - response = await call_next(request) - - # Add trace context to response headers - for key, value in trace_context.to_headers().items(): - response.headers[key] = value - - return response - - -# Traced HTTP client -class TracedHTTPClient: - """HTTP client that propagates trace context.""" - - def __init__(self, base_url: str): - self.base_url = base_url - self.client = httpx.AsyncClient() - - async def request( - self, - method: str, - path: str, - **kwargs - ) -> httpx.Response: - """Make request with trace context propagation.""" - # Get current trace context - trace_id = trace_id_var.get() - span_id = span_id_var.get() - - if trace_id and span_id: - trace_context = TraceContext(trace_id, span_id) - child_context = trace_context.create_child_span() - - # Add trace headers - headers = kwargs.get("headers", {}) - headers.update(child_context.to_headers()) - kwargs["headers"] = headers - - url = f"{self.base_url}{path}" - return await self.client.request(method, url, **kwargs) - - -# Usage in service calls -async def call_oqt_service(chemical_id: str) -> dict: - """Call O-QT service with trace propagation.""" - client = TracedHTTPClient("http://oqt-mcp:8000") - - response = await client.request( - "POST", - "/mcp", - json={ - "tool": "run_qsar_prediction", - "params": {"chemical_id": chemical_id} - } - ) - - return response.json() -``` - ---- - -*These remediation code snippets address the critical issues identified in the CompTox-MCP audit.* diff --git a/ToxMCP_Audit_Reviewed_v2/oqt-mcp-audit/README.md b/ToxMCP_Audit_Reviewed_v2/oqt-mcp-audit/README.md deleted file mode 100644 index 7fecf65..0000000 --- a/ToxMCP_Audit_Reviewed_v2/oqt-mcp-audit/README.md +++ /dev/null @@ -1,180 +0,0 @@ -# OQT-MCP Audit Package (Reviewed Copy) - -**Repository:** `oqt-mcp` -**Package version cited in original audit:** `v0.3.0` -**Review date:** 2026-04-15 -**Overall posture:** **Critical for scientific review governance and output framing** - ---- - -## How to read this reviewed copy - -The strongest findings in `oqt-mcp` are not generic appsec findings. They are about: -- scientific review workflow design -- applicability-domain enforcement -- how confidence and provenance are communicated to users -- whether untrusted identifiers cross into agent or LLM contexts safely - -This reviewed copy keeps those concerns, but distinguishes between: -- **Observed** implementation gaps -- **Observed + inferred** user-risk conclusions -- **Scenario** exploit narratives that still need prompt-boundary validation - ---- - -## Finding register - -| ID | Finding | Severity | Evidence basis | Confidence | Reviewed interpretation | -|---|---|---|---|---|---| -| OQT-01 | Applicability-domain checks are too easy to treat as narrative metadata | **Critical** | Observed + inferred | High | Out-of-domain predictions may be surfaced without hard workflow friction | -| OQT-02 | High-risk flows do not appear to require human review by default | **Critical** | Observed + inferred | High | Wrong identity or weak evidence can propagate into polished outputs | -| OQT-03 | PDF/report defaults do not foreground provenance and uncertainty strongly enough | **Critical** | Observed + inferred | High | Artifacts may look more final than they are | -| OQT-04 | Untrusted identifiers may cross into prompt/agent contexts without enough isolation | **High** | Observed + scenario | Medium | Needs runtime prompt-boundary validation, but deserves near-term mitigation | -| OQT-05 | Logs may capture sensitive identifiers too directly | **High** | Observed | High | Privacy/confidentiality controls need strengthening | -| OQT-06 | Workflow permissions and escalation paths deserve review | **Medium / High** | Observed + inferred | Medium | Important, but needs live-repo validation before stronger claims | - ---- - -## Why this repo is central - -`oqt-mcp` is where scientific judgment can become visually convincing very quickly. -That makes it the most important place to embed: -- review checkpoints -- explicit uncertainty language -- provenance defaults -- safe handling of user-supplied identifiers - ---- - -## Detailed findings - -### OQT-01: Applicability-domain logic should gate decisions, not merely decorate them -**Severity:** **Critical** -**Evidence basis:** Observed + inferred -**Confidence:** High - -The original package persuasively showed that AD information exists, but can still be treated as a side note rather than a decision gate. - -### Why this matters -A user can be shown: -- a prediction value -- a confidence-ish narrative -- a professional artifact - -without a strong enough system-level interruption when the chemical is poorly represented by the model domain. - -### Recommended control -- introduce an explicit AD decision object -- separate `inside_domain`, `outside_domain`, and `unknown` -- require acknowledgement or manual approval before downstream reporting when outside or unknown -- carry AD status into every artifact header and summary - ---- - -### OQT-02: Human review checkpoints should be first-class -**Severity:** **Critical** -**Evidence basis:** Observed + inferred -**Confidence:** High - -The package’s original finding remains strong: a workflow that can proceed from search to output artifact with minimal user intervention is a governance risk in scientific settings. - -### Minimum checkpoints worth enforcing -1. identity resolution / substance confirmation -2. applicability-domain assessment -3. final narrative/report approval - -### Reviewed wording -The issue is not "automation is bad." -The issue is that **automation without explicit review-state transitions can create false confidence**. - ---- - -### OQT-03: Output defaults over-signal finality -**Severity:** **Critical** -**Evidence basis:** Observed + inferred -**Confidence:** High - -The original audit’s criticism of “audit-ready” style outputs remains valid. Even when technically true that a PDF was generated, the user experience can imply: -- completeness -- validated provenance -- reviewed interpretation -- stable confidence - -before those conditions are satisfied. - -### Recommended control -Make the artifact itself carry its uncertainty: -- provenance table in the first page or header section -- model/tool versions -- AD status and warnings -- explicit human-review state -- draft / reviewed / approved marker -- unresolved evidence gaps section - ---- - -### OQT-04: Treat chemical identifiers as untrusted text at LLM boundaries -**Severity:** **High** -**Evidence basis:** Observed + scenario -**Confidence:** Medium - -The original package may have overstated exploit certainty, but it identified the right boundary. -If chemical names, aliases, notes, or free-text identifiers are interpolated into prompts or agent instructions without structure, instruction confusion becomes plausible. - -### Better mitigation than simple keyword blocking -- normalize Unicode -- remove control characters for LLM-facing contexts, including newlines unless explicitly needed -- pass identifiers as structured data, not concatenated prose -- visually and logically separate system instructions from user-supplied fields -- add regression tests with adversarial identifiers - -### Important nuance -This should be treated as **high priority** even before full exploitation is demonstrated, because the cost of safer prompt construction is modest. - ---- - -### OQT-05: Logging needs a stronger privacy model -**Severity:** **High** -**Evidence basis:** Observed -**Confidence:** High - -The package’s privacy concern remains well supported. If identifiers, SMILES, or other sensitive fields are logged directly, confidentiality can be compromised even when the core workflow is correct. - -### Recommended control -- classify fields by sensitivity -- hash or tokenize where operationally acceptable -- separate immutable audit records from developer/debug logs -- define retention and access boundaries - ---- - -## Recommended sequence - -### Immediate -- AD gating with explicit workflow consequences -- mandatory review checkpoints -- stronger artifact provenance and review state labeling -- prompt-boundary hardening for untrusted identifiers - -### Next -- privacy-aware logging -- clearer permission model review -- validation tests covering wrong-identity and out-of-domain paths - ---- - -## Validation backlog specific to this repo - -- confirm prompt/agent boundary for all identifier-bearing fields -- test AD gating with representative in-domain / out-of-domain / ambiguous compounds -- validate PDF/report UX with scientist users -- review permission and escalation paths in the live repository - ---- - -## Related documents - -- `cognitive_ergonomics_audit_report.md` -- `toxmcp_adversarial_audit_report.md` -- `toxmcp_security_audit_report.md` -- `oqt-mcp-audit/REMEDIATION_CODE.md` diff --git a/ToxMCP_Audit_Reviewed_v2/oqt-mcp-audit/REMEDIATION_CODE.md b/ToxMCP_Audit_Reviewed_v2/oqt-mcp-audit/REMEDIATION_CODE.md deleted file mode 100644 index 4fb3fa0..0000000 --- a/ToxMCP_Audit_Reviewed_v2/oqt-mcp-audit/REMEDIATION_CODE.md +++ /dev/null @@ -1,1061 +0,0 @@ -# OQT-MCP: Detailed Remediation Code - -> **Reviewed copy note:** Treat these snippets as reference patterns. For LLM-facing contexts, prefer removing control characters **and** newlines from untrusted identifiers unless a well-tested structured representation is used. - - -## 1. Applicability Domain Index (ADI) Calculation - -**File:** `src/tools/implementations/o_qt_qsar_tools.py` - -```python -from pydantic import BaseModel -from typing import List, Dict, Tuple -import numpy as np -from rdkit import Chem -from rdkit.Chem import Descriptors, AllChem - -class ApplicabilityDomainResult(BaseModel): - """Quantitative applicability domain assessment.""" - adi_score: float # 0-1, higher is better - is_within_domain: bool # Hard gate - chemical_class_alerts: List[str] - training_set_overlap: float # Tanimoto similarity to nearest neighbor - domain_boundaries: Dict[str, Tuple[float, float]] - descriptor_values: Dict[str, float] - warnings: List[str] - -class ADICalculator: - """Calculate Applicability Domain Index for QSAR predictions.""" - - def __init__(self, model_id: str): - self.model_id = model_id - self.training_set = self.load_training_set(model_id) - self.domain_boundaries = self.calculate_domain_boundaries() - - def calculate_adi(self, smiles: str) -> ApplicabilityDomainResult: - """Calculate comprehensive ADI for a chemical.""" - mol = Chem.MolFromSmiles(smiles) - if not mol: - return ApplicabilityDomainResult( - adi_score=0.0, - is_within_domain=False, - chemical_class_alerts=["Invalid SMILES"], - training_set_overlap=0.0, - domain_boundaries={}, - descriptor_values={}, - warnings=["Cannot parse chemical structure"] - ) - - # 1. Calculate molecular descriptors - descriptors = self.calculate_descriptors(mol) - - # 2. Check domain boundaries - boundary_violations = self.check_boundaries(descriptors) - - # 3. Calculate training set similarity - similarity = self.calculate_training_set_similarity(mol) - - # 4. Check chemical class alerts - alerts = self.check_chemical_class_alerts(mol) - - # 5. Calculate overall ADI - adi_score = self.compute_adi_score( - descriptors, boundary_violations, similarity, alerts - ) - - # 6. Determine if within domain (hard gate) - is_within_domain = ( - adi_score >= 0.7 and # Minimum ADI threshold - similarity >= 0.5 and # Must have some training set similarity - len(boundary_violations) <= 2 # Limited boundary violations - ) - - return ApplicabilityDomainResult( - adi_score=adi_score, - is_within_domain=is_within_domain, - chemical_class_alerts=alerts, - training_set_overlap=similarity, - domain_boundaries=self.domain_boundaries, - descriptor_values=descriptors, - warnings=self.generate_warnings(boundary_violations, alerts) - ) - - def calculate_descriptors(self, mol: Chem.Mol) -> Dict[str, float]: - """Calculate key molecular descriptors.""" - return { - "molecular_weight": Descriptors.MolWt(mol), - "logp": Descriptors.MolLogP(mol), - "hbd": Descriptors.NumHDonors(mol), - "hba": Descriptors.NumHAcceptors(mol), - "tpsa": Descriptors.TPSA(mol), - "rotatable_bonds": Descriptors.NumRotatableBonds(mol), - "aromatic_rings": Descriptors.NumAromaticRings(mol), - "heavy_atoms": mol.GetNumHeavyAtoms(), - } - - def calculate_domain_boundaries(self) -> Dict[str, Tuple[float, float]]: - """Calculate domain boundaries from training set.""" - if not self.training_set: - return {} - - boundaries = {} - for descriptor in ["molecular_weight", "logp", "hbd", "hba", "tpsa"]: - values = [chem["descriptors"][descriptor] for chem in self.training_set] - q1, q3 = np.percentile(values, [25, 75]) - iqr = q3 - q1 - # Use IQR method with 1.5x expansion - boundaries[descriptor] = (q1 - 1.5 * iqr, q3 + 1.5 * iqr) - - return boundaries - - def check_boundaries(self, descriptors: Dict[str, float]) -> List[str]: - """Check if descriptors are within domain boundaries.""" - violations = [] - for desc, value in descriptors.items(): - if desc in self.domain_boundaries: - min_val, max_val = self.domain_boundaries[desc] - if not (min_val <= value <= max_val): - violations.append( - f"{desc}: {value:.2f} outside [{min_val:.2f}, {max_val:.2f}]" - ) - return violations - - def calculate_training_set_similarity(self, mol: Chem.Mol) -> float: - """Calculate Tanimoto similarity to nearest neighbor in training set.""" - if not self.training_set: - return 0.0 - - fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=2048) - max_similarity = 0.0 - - for train_chem in self.training_set: - train_fp = train_chem["fingerprint"] - similarity = DataStructs.TanimotoSimilarity(fp, train_fp) - max_similarity = max(max_similarity, similarity) - - return max_similarity - - def check_chemical_class_alerts(self, mol: Chem.Mol) -> List[str]: - """Check for chemical class-specific alerts.""" - alerts = [] - - # Check for reactive groups - if self.has_reactive_group(mol): - alerts.append("Reactive functional group detected") - - # Check for known problematic scaffolds - if self.has_problematic_scaffold(mol): - alerts.append("Known problematic scaffold") - - # Check for model-specific alerts - alerts.extend(self.model_specific_alerts(mol)) - - return alerts - - def compute_adi_score( - self, - descriptors: Dict[str, float], - boundary_violations: List[str], - similarity: float, - alerts: List[str] - ) -> float: - """Compute overall ADI score (0-1).""" - # Base score from similarity - score = similarity * 0.4 - - # Penalty for boundary violations - violation_penalty = len(boundary_violations) * 0.1 - score -= violation_penalty - - # Penalty for alerts - alert_penalty = len(alerts) * 0.15 - score -= alert_penalty - - # Bonus for being well within boundaries - if len(boundary_violations) == 0: - score += 0.2 - - return max(0.0, min(1.0, score)) - - def generate_warnings( - self, - boundary_violations: List[str], - alerts: List[str] - ) -> List[str]: - """Generate human-readable warnings.""" - warnings = [] - - if boundary_violations: - warnings.append(f"Descriptor boundary violations: {len(boundary_violations)}") - warnings.extend(boundary_violations[:3]) # Show first 3 - - if alerts: - warnings.append(f"Chemical class alerts: {len(alerts)}") - warnings.extend(alerts) - - return warnings - - -# Integration with run_qsar_prediction -async def run_qsar_prediction(smiles: str, model_id: str) -> dict: - """Run QSAR prediction with ADI enforcement.""" - # Calculate ADI - adi_calculator = ADICalculator(model_id) - ad_result = adi_calculator.calculate_adi(smiles) - - # Hard gate: reject if outside domain - if not ad_result.is_within_domain: - return { - "prediction": None, - "status": "REJECTED", - "reason": "Outside applicability domain", - "ad_result": ad_result.dict(), - "requires_human_review": True, - "recommendation": "Consider read-across or experimental testing" - } - - # Fetch prediction from QSAR Toolbox - prediction = await fetch_prediction_from_toolbox(smiles, model_id) - - # Combine with ADI - return { - "prediction": prediction, - "ad_result": ad_result.dict(), - "confidence": ad_result.adi_score * prediction.get("confidence", 0.5), - "status": "SUCCESS", - "requires_human_review": ad_result.adi_score < 0.8 # Review if borderline - } -``` - ---- - -> **Reviewed copy (2026-04-15):** This document was retained from the original package but lightly edited for consistency. -> Unless explicitly stated otherwise, code blocks are **reference implementations**, not validated patches, and scenario-based exploit narratives should not be read as reproduced proofs. - - - -## 2. Chemical Name Sanitization (Prompt Injection Prevention) - -**File:** `src/schemas/workflow_record.py` - -```python -import re -import unicodedata -from typing import Optional - -class ChemicalNameSanitizer: - """Sanitize chemical names to prevent prompt injection.""" - - # Blocked patterns that could be used for prompt injection - BLOCKED_PATTERNS = [ - r'ignore\s+(previous\s+)?instructions', - r'override\s+(all\s+)?(safety|guidelines|constraints)', - r'debug\s+mode', - r'system\s+(test|prompt|instruction)', - r'you\s+are\s+now', - r'new\s+instruction', - r'forget\s+(previous|everything)', - r'disregard\s+(all|previous)', - r'act\s+as\s+(if|though)', - r'pretend\s+to\s+be', - r'roleplay\s+as', - ] - - # Maximum allowed length - MAX_LENGTH = 1000 - - @classmethod - def sanitize(cls, name: str, context: str = "general") -> str: - """ - Sanitize chemical name to prevent prompt injection. - - Args: - name: Raw chemical name input - context: Context where name will be used ("general", "llm_prompt", "search") - - Returns: - Sanitized chemical name - - Raises: - ValueError: If potentially malicious input detected - """ - if not name: - return name - - # Check length - if len(name) > cls.MAX_LENGTH: - raise ValueError(f"Chemical name exceeds maximum length of {cls.MAX_LENGTH}") - - # Normalize Unicode - normalized = unicodedata.normalize('NFKC', name) - - # Remove zero-width and control characters - sanitized = cls._remove_control_chars(normalized) - - # Check for blocked patterns - cls._check_blocked_patterns(sanitized) - - # Context-specific sanitization - if context == "llm_prompt": - sanitized = sanitized.replace('\n', ' ').replace('\r', ' ') - sanitized = cls._sanitize_for_llm(sanitized) - - return sanitized.strip() - - @classmethod - def _remove_control_chars(cls, text: str) -> str: - """Remove control and zero-width characters.""" - # Remove zero-width characters - zero_width = [ - '\u200B', # Zero Width Space - '\u200C', # Zero Width Non-Joiner - '\u200D', # Zero Width Joiner - '\uFEFF', # Zero Width No-Break Space - '\u2060', # Word Joiner - '\u180E', # Mongolian Vowel Separator - ] - - for zw in zero_width: - text = text.replace(zw, '') - - # Remove control characters; if the value is destined for an LLM context, prefer removing newlines too - cleaned = [] - for char in text: - cat = unicodedata.category(char) - if cat.startswith('C') and char not in '\n\t': - continue - cleaned.append(char) - - return ''.join(cleaned) - - @classmethod - def _check_blocked_patterns(cls, text: str): - """Check for blocked instruction patterns.""" - text_lower = text.lower() - - for pattern in cls.BLOCKED_PATTERNS: - if re.search(pattern, text_lower, re.IGNORECASE): - raise ValueError( - f"Potentially malicious chemical name detected. " - f"Pattern matched: {pattern}" - ) - - @classmethod - def _sanitize_for_llm(cls, text: str) -> str: - """Additional sanitization for LLM prompts.""" - # Escape special characters that could be interpreted as formatting - text = text.replace('`', '') # Remove backticks - text = text.replace('$', '') # Remove dollar signs (LaTeX) - - # Limit consecutive newlines - text = re.sub(r'\n{3,}', '\n\n', text) - - return text - - @classmethod - def validate_smiles(cls, smiles: str) -> bool: - """Validate SMILES string format.""" - from rdkit import Chem - - try: - mol = Chem.MolFromSmiles(smiles) - return mol is not None - except: - return False - - -# Usage in workflow processing -from pydantic import validator - -class WorkflowInput(BaseModel): - chemical_name: str - - @validator('chemical_name') - def sanitize_chemical_name(cls, v): - return ChemicalNameSanitizer.sanitize(v, context="llm_prompt") - - -class ChemicalSearchParams(BaseModel): - query: str - search_type: str = "name" # Changed from "auto" to safer default - - @validator('query') - def sanitize_query(cls, v): - return ChemicalNameSanitizer.sanitize(v, context="search") -``` - ---- - -## 3. PII Scrubbing for Logs - -**File:** `src/tools/registry.py` - -```python -import hashlib -import json -import re -from typing import Any, Dict, List, Optional - -class PrivacyScrubber: - """Scrub PII/PSI (Proprietary Substance Information) from logs.""" - - # Sensitive field patterns - SENSITIVE_PATTERNS = [ - r'(?i)smiles?', # Case-insensitive match for "smiles" or "SMILES" - r'(?i)inchi(key)?', - r'(?i)cas(_number)?', - r'(?i)chemical_name', - r'(?i)preferred_name', - r'(?i)iupac_name', - r'(?i)structure', - r'(?i)molecule', - r'(?i)compound', - r'(?i)substance', - r'(?i)formula', - ] - - # SMILES detection pattern (simplified) - SMILES_PATTERN = re.compile(r'^[A-Za-z0-9@+\-\[\]\\\(\)=#$:.]+$') - - # CAS number pattern - CAS_PATTERN = re.compile(r'^\d{1,7}\-\d{2}\-\d$') - - def __init__(self, salt: Optional[str] = None): - """ - Initialize scrubber with optional salt for hashing. - - Args: - salt: Salt for hashing (should be consistent across services) - """ - self.salt = salt or "toxmcp_default_salt" - - def scrub(self, data: Any, path: str = "") -> Any: - """ - Recursively scrub sensitive data. - - Args: - data: Data to scrub - path: Current path in nested structure (for debugging) - - Returns: - Scrubbed data with sensitive fields hashed - """ - if isinstance(data, dict): - return self._scrub_dict(data, path) - elif isinstance(data, list): - return [self.scrub(item, f"{path}[]") for item in data] - elif isinstance(data, str): - return self._scrub_string(data, path) - else: - return data - - def _scrub_dict(self, data: Dict, path: str) -> Dict: - """Scrub dictionary values.""" - scrubbed = {} - for key, value in data.items(): - current_path = f"{path}.{key}" if path else key - - if self._is_sensitive_key(key): - # Hash the value - scrubbed[key] = self._hash_value(value) - else: - # Recursively scrub - scrubbed[key] = self.scrub(value, current_path) - - return scrubbed - - def _scrub_string(self, value: str, path: str) -> str: - """Scrub string value, detecting embedded sensitive data.""" - # Check if entire string is a SMILES - if self._is_smiles(value): - return self._hash_value(value) - - # Check if entire string is a CAS number - if self._is_cas_number(value): - return self._hash_value(value) - - # Check for embedded SMILES in text (more complex) - # This is a simplified check - production would need more sophisticated detection - words = value.split() - scrubbed_words = [] - for word in words: - if self._is_smiles(word) or self._is_cas_number(word): - scrubbed_words.append(self._hash_value(word)) - else: - scrubbed_words.append(word) - - return ' '.join(scrubbed_words) - - def _is_sensitive_key(self, key: str) -> bool: - """Check if key name indicates sensitive data.""" - key_lower = key.lower() - return any(re.match(pattern, key_lower) for pattern in self.SENSITIVE_PATTERNS) - - def _is_smiles(self, value: str) -> bool: - """Check if value looks like a SMILES string.""" - # Basic heuristic: contains typical SMILES characters and minimum length - if len(value) < 3: - return False - - # Check for SMILES-specific characters - smiles_chars = set('CNO[]()=@+-#$.1234567890') - value_chars = set(value.upper()) - - # If most characters are SMILES-specific, likely a SMILES - if len(value_chars - smiles_chars) <= 2: - return True - - return False - - def _is_cas_number(self, value: str) -> bool: - """Check if value is a CAS registry number.""" - return bool(self.CAS_PATTERN.match(value)) - - def _hash_value(self, value: Any) -> str: - """Hash a value for logging.""" - if value is None: - return None - - value_str = str(value) - - # Create deterministic hash with salt - hash_input = f"{self.salt}:{value_str}" - hash_value = hashlib.sha256(hash_input.encode()).hexdigest()[:16] - - return f"[HASH:{hash_value}]" - - def create_correlation_id(self, identifier: str) -> str: - """ - Create a correlation ID that can link events without revealing the identifier. - - This allows debugging across services without exposing sensitive data. - """ - return self._hash_value(identifier) - - -# Integration with audit logging -class AuditLogger: - """Audit logger with built-in PII scrubbing.""" - - def __init__(self, scrubber: Optional[PrivacyScrubber] = None): - self.scrubber = scrubber or PrivacyScrubber() - - def log_tool_execution( - self, - tool_name: str, - params: Dict[str, Any], - result: Any, - user_id: str, - correlation_id: str - ): - """Log tool execution with PII scrubbing.""" - scrubbed_params = self.scrubber.scrub(params) - scrubbed_result = self.scrubber.scrub(result) - - event = { - "type": "tool_execution", - "tool": tool_name, - "params": scrubbed_params, - "result_summary": self._summarize_result(scrubbed_result), - "user_id": user_id, - "correlation_id": correlation_id, - "timestamp": datetime.utcnow().isoformat(), - } - - self._emit(event) - - def _summarize_result(self, result: Any) -> Dict: - """Create a summary of result without sensitive data.""" - if isinstance(result, dict): - return { - "status": result.get("status"), - "has_prediction": "prediction" in result, - "has_warnings": "warnings" in result and len(result["warnings"]) > 0, - } - return {"type": type(result).__name__} - - -# Usage example -scrubber = PrivacyScrubber(salt="oqt_mcp_production_salt") -audit_logger = AuditLogger(scrubber) - -# In tool execution -async def run_qsar_prediction(smiles: str, model_id: str) -> dict: - correlation_id = scrubber.create_correlation_id(smiles) - - try: - result = await fetch_prediction(smiles, model_id) - - audit_logger.log_tool_execution( - tool_name="run_qsar_prediction", - params={"smiles": smiles, "model_id": model_id}, - result=result, - user_id=current_user.id, - correlation_id=correlation_id - ) - - return result - except Exception as e: - audit_logger.log_tool_execution( - tool_name="run_qsar_prediction", - params={"smiles": smiles, "model_id": model_id}, - result={"error": str(e)}, - user_id=current_user.id, - correlation_id=correlation_id - ) - raise -``` - ---- - -## 4. Mandatory Scientific Review Mode - -**File:** `src/tools/implementations/workflow_runner.py` - -```python -from enum import Enum -from typing import Optional, List, Dict, Any -from pydantic import BaseModel -import asyncio - -class ReviewStatus(str, Enum): - PENDING = "pending" - APPROVED = "approved" - REJECTED = "rejected" - EXPIRED = "expired" - -class ReviewCheckpoint(BaseModel): - """A checkpoint requiring human review.""" - checkpoint_id: str - step: str # e.g., "chemical_id", "ad_assessment", "final_report" - status: ReviewStatus - data: Dict[str, Any] # Data to review - reviewer_id: Optional[str] = None - reviewed_at: Optional[str] = None - comments: Optional[str] = None - expires_at: Optional[str] = None - -class ReviewOrchestrator: - """Orchestrate mandatory human review checkpoints.""" - - REVIEW_TIMEOUT = 3600 # 1 hour timeout for review - - def __init__(self): - self.pending_reviews: Dict[str, ReviewCheckpoint] = {} - self.review_callbacks: Dict[str, asyncio.Event] = {} - - async def create_checkpoint( - self, - workflow_id: str, - step: str, - data: Dict[str, Any], - require_approval: bool = True - ) -> ReviewCheckpoint: - """Create a review checkpoint and wait for human approval.""" - checkpoint_id = f"{workflow_id}_{step}_{uuid.uuid4().hex[:8]}" - - checkpoint = ReviewCheckpoint( - checkpoint_id=checkpoint_id, - step=step, - status=ReviewStatus.PENDING, - data=data, - expires_at=(datetime.utcnow() + timedelta(seconds=self.REVIEW_TIMEOUT)).isoformat() - ) - - self.pending_reviews[checkpoint_id] = checkpoint - self.review_callbacks[checkpoint_id] = asyncio.Event() - - # Notify reviewers (e.g., via email, Slack, or UI) - await self.notify_reviewers(checkpoint) - - if require_approval: - # Wait for review with timeout - try: - await asyncio.wait_for( - self.review_callbacks[checkpoint_id].wait(), - timeout=self.REVIEW_TIMEOUT - ) - except asyncio.TimeoutError: - checkpoint.status = ReviewStatus.EXPIRED - raise ReviewTimeout(f"Review checkpoint {checkpoint_id} timed out") - - return self.pending_reviews[checkpoint_id] - - async def submit_review( - self, - checkpoint_id: str, - reviewer_id: str, - decision: ReviewStatus, - comments: Optional[str] = None - ): - """Submit a review decision.""" - if checkpoint_id not in self.pending_reviews: - raise ValueError(f"Unknown checkpoint: {checkpoint_id}") - - checkpoint = self.pending_reviews[checkpoint_id] - - if checkpoint.status != ReviewStatus.PENDING: - raise ValueError(f"Checkpoint already reviewed: {checkpoint.status}") - - checkpoint.status = decision - checkpoint.reviewer_id = reviewer_id - checkpoint.reviewed_at = datetime.utcnow().isoformat() - checkpoint.comments = comments - - # Signal completion - self.review_callbacks[checkpoint_id].set() - - # Audit log - await self.log_review(checkpoint) - - async def notify_reviewers(self, checkpoint: ReviewCheckpoint): - """Notify available reviewers.""" - # Implementation depends on notification system - # Could be: email, Slack, WebSocket, etc. - notification = { - "type": "review_required", - "checkpoint_id": checkpoint.checkpoint_id, - "step": checkpoint.step, - "workflow_id": checkpoint.checkpoint_id.split("_")[0], - "data_summary": self.summarize_for_notification(checkpoint.data), - "review_url": f"/review/{checkpoint.checkpoint_id}" - } - - await send_notification(notification) - - def summarize_for_notification(self, data: Dict) -> str: - """Create human-readable summary for notification.""" - if "chemical_name" in data: - return f"Chemical: {data['chemical_name']}" - elif "prediction" in data: - return f"Prediction: {data['prediction']}" - return "Review required" - - -# Integration with workflow runner -class WorkflowRunner: - def __init__(self): - self.review_orchestrator = ReviewOrchestrator() - - async def run_workflow(self, params: WorkflowParams) -> WorkflowResult: - """Run workflow with mandatory review checkpoints.""" - workflow_id = str(uuid.uuid4()) - - # Step 1: Chemical identification - chemical = await self.identify_chemical(params.identifier) - - if params.require_human_review: - checkpoint = await self.review_orchestrator.create_checkpoint( - workflow_id=workflow_id, - step="chemical_id", - data={ - "input_identifier": params.identifier, - "resolved_chemical": chemical.dict(), - "search_type_used": params.search_type - }, - require_approval=True - ) - - if checkpoint.status == ReviewStatus.REJECTED: - return WorkflowResult( - status="REJECTED", - reason=f"Chemical identification rejected: {checkpoint.comments}", - checkpoint=checkpoint - ) - - # Step 2: QSAR predictions - predictions = await self.run_qsar_predictions(chemical, params.qsar_mode) - - # Check for AD warnings - ad_warnings = [p for p in predictions if not p.ad_result.is_within_domain] - - if params.require_human_review and ad_warnings: - checkpoint = await self.review_orchestrator.create_checkpoint( - workflow_id=workflow_id, - step="ad_assessment", - data={ - "chemical": chemical.dict(), - "ad_warnings": [w.dict() for w in ad_warnings], - "predictions": [p.dict() for p in predictions] - }, - require_approval=True - ) - - if checkpoint.status == ReviewStatus.REJECTED: - return WorkflowResult( - status="REJECTED", - reason=f"AD assessment rejected: {checkpoint.comments}", - checkpoint=checkpoint - ) - - # Step 3: Generate report - report = await self.generate_report(chemical, predictions) - - # Final review before PDF generation - if params.require_human_review: - checkpoint = await self.review_orchestrator.create_checkpoint( - workflow_id=workflow_id, - step="final_report", - data={ - "report_preview": report.summary(), - "chemical": chemical.dict(), - "predictions_count": len(predictions), - "warnings_count": len(ad_warnings) - }, - require_approval=True - ) - - if checkpoint.status == ReviewStatus.REJECTED: - return WorkflowResult( - status="REJECTED", - reason=f"Final report rejected: {checkpoint.comments}", - checkpoint=checkpoint - ) - - # Generate final PDF - pdf = await self.generate_pdf(report) - - return WorkflowResult( - status="SUCCESS", - workflow_id=workflow_id, - chemical=chemical, - predictions=predictions, - report=report, - pdf=pdf, - review_checkpoints=self.review_orchestrator.get_workflow_reviews(workflow_id) - ) -``` - ---- - -## 5. Provenance Tables for PDF Generation - -**File:** `src/utils/pdf_generator.py` - -```python -from datetime import datetime -from typing import Dict, List, Any - -class ProvenanceTableGenerator: - """Generate provenance tables for PDF reports.""" - - def generate_provenance_section(self, workflow_record: Dict) -> str: - """Generate complete provenance section for PDF.""" - sections = [ - self._generate_header(), - self._generate_data_sources_table(workflow_record), - self._generate_models_table(workflow_record), - self._generate_applicability_domain_section(workflow_record), - self._generate_signatures_table(workflow_record), - self._generate_audit_trail(workflow_record), - ] - - return "\n\n".join(sections) - - def _generate_header(self) -> str: - """Generate section header.""" - return """ -## Provenance and Data Quality Information - -This section provides complete traceability for the hazard assessment -contained in this report, including data sources, model versions, and -applicability domain status. - -""" - - def _generate_data_sources_table(self, workflow_record: Dict) -> str: - """Generate data sources table.""" - provenance = workflow_record.get("provenance", {}) - - table = """ -### Data Sources and Versions - -| Component | Version | Timestamp | Source | -|-----------|---------|-----------|--------| -""" - - # O-QT MCP version - table += f"| O-QT MCP | {provenance.get('oqt_version', 'N/A')} | {provenance.get('generated_at', 'N/A')} | Internal |\n" - - # QSAR Toolbox version - table += f"| QSAR Toolbox | {provenance.get('toolbox_version', 'N/A')} | {provenance.get('toolbox_timestamp', 'N/A')} | OECD |\n" - - # Data snapshot - table += f"| Data Snapshot | {provenance.get('data_snapshot_id', 'N/A')} | {provenance.get('snapshot_date', 'N/A')} | EPA/OECD |\n" - - # API versions - for api_name, api_info in provenance.get('upstream_apis', {}).items(): - table += f"| {api_name} | {api_info.get('version', 'N/A')} | {api_info.get('called_at', 'N/A')} | External |\n" - - return table - - def _generate_models_table(self, workflow_record: Dict) -> str: - """Generate QSAR models table.""" - predictions = workflow_record.get('predictions', []) - - table = """ -### QSAR Models Used - -| Model | Version | Prediction | Confidence | AD Status | -|-------|---------|------------|------------|-----------| -""" - - for pred in predictions: - model = pred.get('model', {}) - ad_result = pred.get('ad_result', {}) - - model_name = model.get('name', 'Unknown') - model_version = model.get('version', 'N/A') - prediction = pred.get('prediction', 'N/A') - confidence = pred.get('confidence', 'N/A') - ad_status = "✓ In Domain" if ad_result.get('is_within_domain') else "✗ Outside Domain" - - table += f"| {model_name} | {model_version} | {prediction} | {confidence:.2f if isinstance(confidence, float) else confidence} | {ad_status} |\n" - - return table - - def _generate_applicability_domain_section(self, workflow_record: Dict) -> str: - """Generate applicability domain warnings section.""" - predictions = workflow_record.get('predictions', []) - - # Collect all AD warnings - all_warnings = [] - for pred in predictions: - ad_result = pred.get('ad_result', {}) - if not ad_result.get('is_within_domain'): - warnings = ad_result.get('warnings', []) - all_warnings.extend(warnings) - - if not all_warnings: - return """ -### Applicability Domain Assessment - -✓ All predictions are within the applicability domain of their respective models. - -""" - - section = """ -### ⚠️ Applicability Domain Warnings - -**WARNING:** The following predictions were made outside the strict applicability domain -of the QSAR models. These predictions should be treated with caution and may require -additional experimental validation. - -**Warnings:** - -""" - for warning in set(all_warnings): # Deduplicate - section += f"- {warning}\n" - - section += """ -**Recommendations:** -1. Consider read-across from structurally similar compounds with experimental data -2. Conduct in vitro testing for critical endpoints -3. Consult with a QSAR expert before using these predictions for regulatory decisions - -""" - return section - - def _generate_signatures_table(self, workflow_record: Dict) -> str: - """Generate electronic signatures table.""" - signatures = workflow_record.get('signatures', []) - - if not signatures: - return """ -### Electronic Signatures - -*No electronic signatures have been applied to this report.* - -""" - - table = """ -### Electronic Signatures - -| Role | Signer | Date | Meaning | Verification | -|------|--------|------|---------|--------------| -""" - - for sig in signatures: - role = sig.get('role', 'Unknown') - signer = sig.get('signer_user_id', 'Unknown') - date = sig.get('timestamp', 'N/A') - meaning = sig.get('meaning', 'N/A') - verified = "✓ Verified" if sig.get('verified') else "✗ Failed" - - table += f"| {role} | {signer} | {date} | {meaning} | {verified} |\n" - - return table - - def _generate_audit_trail(self, workflow_record: Dict) -> str: - """Generate audit trail section.""" - audit_events = workflow_record.get('audit_trail', []) - - if not audit_events: - return """ -### Audit Trail - -*No audit events recorded.* - -""" - - section = """ -### Audit Trail - -| Timestamp | Event | User | Details | -|-----------|-------|------|---------| -""" - - for event in audit_events[-10:]: # Show last 10 events - timestamp = event.get('timestamp', 'N/A') - event_type = event.get('type', 'Unknown') - user = event.get('user_id', 'System') - details = event.get('details', '') - - section += f"| {timestamp} | {event_type} | {user} | {details} |\n" - - if len(audit_events) > 10: - section += f"\n*... and {len(audit_events) - 10} more events*\n" - - return section - - -# Integration with PDF generator -class PDFGenerator: - def __init__(self): - self.provenance_generator = ProvenanceTableGenerator() - - async def generate_pdf(self, workflow_record: Dict) -> bytes: - """Generate PDF with complete provenance.""" - # Generate main content - content = self.generate_main_content(workflow_record) - - # Generate provenance section - provenance = self.provenance_generator.generate_provenance_section(workflow_record) - - # Combine - full_content = f""" -{content} - ---- - -{provenance} - ---- - -## Disclaimer - -This report was generated automatically using the O-QT MCP system. -The predictions contained herein are based on QSAR models and should -be reviewed by a qualified toxicologist before use in regulatory submissions. - -Report ID: {workflow_record.get('workflow_id', 'N/A')} -Generated: {datetime.utcnow().isoformat()}Z - """ - - # Convert to PDF (using existing PDF library) - return await self.render_to_pdf(full_content) -``` - ---- - -*These remediation code snippets address the critical issues identified in the OQT-MCP audit.* diff --git a/ToxMCP_Audit_Reviewed_v2/pbpk-mcp-audit/README.md b/ToxMCP_Audit_Reviewed_v2/pbpk-mcp-audit/README.md deleted file mode 100644 index 3fea41b..0000000 --- a/ToxMCP_Audit_Reviewed_v2/pbpk-mcp-audit/README.md +++ /dev/null @@ -1,164 +0,0 @@ -# PBPK-MCP Audit Package (Reviewed Copy) - -**Repository:** `pbpk-mcp` -**Package version cited in original audit:** `v0.4.3` -**Review date:** 2026-04-15 -**Overall posture:** **High risk for scientific guardrails and runtime stability** - ---- - -## How to read this reviewed copy - -The original package’s best PBPK findings were about: -- scientifically meaningful parameter control -- resource limits for large simulations -- reproducibility metadata -- runtime isolation and operational hardening - -This reviewed copy keeps those findings, but reduces overstatement where exploitability or infrastructure specifics were not validated. - ---- - -## Finding register - -| ID | Finding | Severity | Evidence basis | Confidence | Reviewed interpretation | -|---|---|---|---|---|---| -| PBPK-01 | Parameter changes need stronger physiological bounds and sweep governance | **Critical** | Observed + inferred | High | Unreviewed parameter exploration can bias conclusions | -| PBPK-02 | Population-size and memory controls are insufficiently explicit | **Critical** | Observed | High | Large jobs can plausibly destabilize workers without enforced limits | -| PBPK-03 | Reproducibility metadata and deterministic hashing need improvement | **High** | Observed | High | Historical comparability and event integrity are weaker than intended | -| PBPK-04 | Container/runtime hardening needs a clearer threat model and stronger controls | **High** | Observed + scenario | Medium | Important, but severity depends on actual deployment/runtime permissions | -| PBPK-05 | Queueing and availability protections deserve explicit load-test validation | **High** | Observed + inferred | Medium-High | Failure under stress is plausible and should be measured | - ---- - -## Detailed findings - -### PBPK-01: Parameter editing needs governance, not only validation -**Severity:** **Critical** -**Evidence basis:** Observed + inferred -**Confidence:** High - -The original package correctly identified a domain-specific risk that many generic software audits would miss: -a parameter-editing API can become a vehicle for selective tuning until a preferred outcome appears. - -### Why this matters -Even if each individual parameter change is syntactically valid, the workflow still needs: -- physiological plausibility bounds -- actor/reason capture -- sweep detection -- explicit review requirements when repeated tuning occurs - -### Recommended control -- bounds database curated with domain-owner review -- change audit trail with before/after values and rationale -- heuristic or rule-based sweep detection -- stronger review requirements when model outputs change materially after repeated edits - ---- - -### PBPK-02: Resource controls should be measured and enforced -**Severity:** **Critical** -**Evidence basis:** Observed -**Confidence:** High - -The package’s central concern is sound: large population simulations can exhaust memory or queue capacity without explicit control points. - -### Reviewed wording -The exact OOM threshold depends on: -- model complexity -- output retention strategy -- worker memory size -- parallelism settings - -So the reviewed copy avoids wording like "certain OOM" unless infrastructure measurements support it. - -### Recommended control -- hard upper bounds on population size -- memory/CPU quotas -- per-job estimates before execution -- streaming or chunked result handling where feasible -- defaults based on benchmarked infrastructure, not only estimates - ---- - -### PBPK-03: Reproducibility needs a fuller provenance envelope -**Severity:** **High** -**Evidence basis:** Observed -**Confidence:** High - -The original package was right to emphasize that reproducibility is not just about input values. It also depends on: -- model version -- runtime environment -- floating-point serialization -- seeds and stochastic settings -- artifact generation behavior - -### Recommended control -- canonical serialization for hashed events -- explicit handling for float edge cases -- runtime snapshot capture -- clear distinction between scientific result hash and audit-event hash - ---- - -### PBPK-04: Runtime isolation should be threat-model driven -**Severity:** **High** -**Evidence basis:** Observed + scenario -**Confidence:** Medium - -Container hardening and file/runtime isolation matter here, especially if the system ingests untrusted files or executes complex scientific tooling. -The original package likely overstated certainty for some escape scenarios, but it was directionally right to treat runtime hardening as important. - -### Recommended control -- confirm actual trust boundaries for uploaded files and model assets -- run with least privilege -- document seccomp/AppArmor/SELinux or equivalent controls where used -- separate build-time privilege needs from runtime privilege needs - ---- - -### PBPK-05: Availability and queue behavior need explicit validation -**Severity:** **High** -**Evidence basis:** Observed + inferred -**Confidence:** Medium-High - -The original package’s DoS and queue-flooding concerns are plausible. The right next step is not stronger rhetoric; it is measurement. - -### Recommended control -- representative load tests -- queue depth and age limits -- cancellation/timeout policy -- clear partial-failure behavior -- telemetry for memory, queue delay, retries, and worker saturation - ---- - -## Recommended sequence - -### Immediate -- parameter bounds and sweep governance -- population and memory limits -- deterministic hashing improvements - -### Next -- runtime hardening review -- load tests and quota tuning -- provenance envelope alignment with the rest of the suite - ---- - -## Validation backlog specific to this repo - -- benchmark population-size vs memory/latency on representative workers -- validate deterministic hashing across platforms and Python versions -- review runtime/file ingestion threat model -- confirm how repeated parameter changes are surfaced to reviewers - ---- - -## Related documents - -- `ToxMCP_Performance_Resilience_Audit_Report.md` -- `toxmcp_regulatory_audit_report.md` -- `toxmcp_adversarial_audit_report.md` -- `pbpk-mcp-audit/REMEDIATION_CODE.md` diff --git a/ToxMCP_Audit_Reviewed_v2/pbpk-mcp-audit/REMEDIATION_CODE.md b/ToxMCP_Audit_Reviewed_v2/pbpk-mcp-audit/REMEDIATION_CODE.md deleted file mode 100644 index b819d40..0000000 --- a/ToxMCP_Audit_Reviewed_v2/pbpk-mcp-audit/REMEDIATION_CODE.md +++ /dev/null @@ -1,902 +0,0 @@ -# PBPK-MCP: Detailed Remediation Code - -> **Reviewed copy note:** Treat these snippets as reference patterns. Physiological bounds, workload limits, and runtime hardening values should be validated against representative models and infrastructure. - - -## 1. Parameter Bounds Validation (Physiological Plausibility) - -**File:** `src/mcp/tools/parameter_bounds.py` - -```python -from typing import Dict, Tuple, Optional -from pydantic import BaseModel, validator -from enum import Enum -import numpy as np - -class ParameterCategory(str, Enum): - """Categories of PBPK parameters.""" - PHYSICOCHEMICAL = "physicochemical" - ANATOMICAL = "anatomical" - PHYSIOLOGICAL = "physiological" - ENZYME_KINETICS = "enzyme_kinetics" - -class ParameterBounds(BaseModel): - """Bounds for a single parameter.""" - min_value: float - max_value: float - default_value: float - unit: str - category: ParameterCategory - description: str - references: list = [] # Literature references - - def validate_value(self, value: float) -> bool: - """Check if value is within bounds.""" - return self.min_value <= value <= self.max_value - -class PBPKParameterDatabase: - """Database of physiologically plausible parameter bounds.""" - - # Organ volumes (L) - based on literature - ORGAN_VOLUMES = { - "Liver": ParameterBounds( - min_value=0.5, - max_value=3.0, - default_value=1.5, - unit="L", - category=ParameterCategory.ANATOMICAL, - description="Liver volume", - references=["ICRP 89", "PK-Sim defaults"] - ), - "Kidney": ParameterBounds( - min_value=0.2, - max_value=0.6, - default_value=0.31, - unit="L", - category=ParameterCategory.ANATOMICAL, - description="Kidney volume (both kidneys)", - references=["ICRP 89"] - ), - "Brain": ParameterBounds( - min_value=1.0, - max_value=1.8, - default_value=1.4, - unit="L", - category=ParameterCategory.ANATOMICAL, - description="Brain volume", - references=["ICRP 89"] - ), - "Muscle": ParameterBounds( - min_value=15.0, - max_value=35.0, - default_value=24.0, - unit="L", - category=ParameterCategory.ANATOMICAL, - description="Muscle volume", - references=["ICRP 89"] - ), - "Adipose": ParameterBounds( - min_value=5.0, - max_value=30.0, - default_value=15.0, - unit="L", - category=ParameterCategory.ANATOMICAL, - description="Adipose tissue volume", - references=["ICRP 89"] - ), - } - - # Blood flows (L/min) - must sum to cardiac output - BLOOD_FLOWS = { - "Liver": ParameterBounds( - min_value=0.5, - max_value=2.0, - default_value=1.0, - unit="L/min", - category=ParameterCategory.PHYSIOLOGICAL, - description="Hepatic blood flow", - references=["Davies 1993"] - ), - "Kidney": ParameterBounds( - min_value=0.5, - max_value=1.5, - default_value=1.0, - unit="L/min", - category=ParameterCategory.PHYSIOLOGICAL, - description="Renal blood flow", - references=["Davies 1993"] - ), - "Brain": ParameterBounds( - min_value=0.3, - max_value=1.0, - default_value=0.7, - unit="L/min", - category=ParameterCategory.PHYSIOLOGICAL, - description="Cerebral blood flow", - references=["Davies 1993"] - ), - } - - # Clearance parameters - CLEARANCE = { - "Liver|Clearance": ParameterBounds( - min_value=0.0, - max_value=100.0, # Cannot exceed hepatic blood flow - default_value=1.0, - unit="L/h", - category=ParameterCategory.ENZYME_KINETICS, - description="Hepatic clearance", - references=["Rowland 1973"] - ), - "Kidney|Clearance": ParameterBounds( - min_value=0.0, - max_value=50.0, # Cannot exceed renal blood flow - default_value=1.0, - unit="L/h", - category=ParameterCategory.ENZYME_KINETICS, - description="Renal clearance", - references=["Rowland 1973"] - ), - } - - # Physicochemical properties - PHYSICOCHEMICAL = { - "Lipophilicity": ParameterBounds( - min_value=-5.0, - max_value=10.0, - default_value=1.0, - unit="logP", - category=ParameterCategory.PHYSICOCHEMICAL, - description="Octanol-water partition coefficient", - references=["Leo 1971"] - ), - "MolecularWeight": ParameterBounds( - min_value=50.0, - max_value=1000.0, - default_value=300.0, - unit="g/mol", - category=ParameterCategory.PHYSICOCHEMICAL, - description="Molecular weight", - references=[] - ), - "FractionUnbound": ParameterBounds( - min_value=0.0, - max_value=1.0, - default_value=0.1, - unit="dimensionless", - category=ParameterCategory.PHYSICOCHEMICAL, - description="Fraction unbound in plasma", - references=[] - ), - } - - @classmethod - def get_bounds(cls, parameter_path: str) -> Optional[ParameterBounds]: - """Get bounds for a parameter by path.""" - # Search in all categories - for category in [cls.ORGAN_VOLUMES, cls.BLOOD_FLOWS, cls.CLEARANCE, cls.PHYSICOCHEMICAL]: - if parameter_path in category: - return category[parameter_path] - - # Try partial matching - for category in [cls.ORGAN_VOLUMES, cls.BLOOD_FLOWS, cls.CLEARANCE, cls.PHYSICOCHEMICAL]: - for key, bounds in category.items(): - if key in parameter_path or parameter_path in key: - return bounds - - return None - - @classmethod - def validate_parameter(cls, parameter_path: str, value: float) -> tuple: - """ - Validate parameter value against bounds. - - Returns: - (is_valid, bounds, message) - """ - bounds = cls.get_bounds(parameter_path) - - if bounds is None: - return (True, None, f"No bounds defined for {parameter_path}") - - if not bounds.validate_value(value): - return ( - False, - bounds, - f"Value {value} for {parameter_path} outside plausible range " - f"[{bounds.min_value}, {bounds.max_value}] {bounds.unit}" - ) - - return (True, bounds, "Valid") - - @classmethod - def get_all_parameters(cls) -> Dict[str, ParameterBounds]: - """Get all defined parameters.""" - all_params = {} - for category in [cls.ORGAN_VOLUMES, cls.BLOOD_FLOWS, cls.CLEARANCE, cls.PHYSICOCHEMICAL]: - all_params.update(category) - return all_params - - -# Integration with set_parameter_value -class ValidatedSetParameterValueRequest(BaseModel): - """Parameter value request with validation.""" - - simulation_id: str - parameter_path: str - value: float - unit: Optional[str] = None - update_mode: Optional[str] = "absolute" - comment: Optional[str] = None - - @validator('value') - def validate_physiological_bounds(cls, v, values): - """Validate against physiological bounds.""" - if 'parameter_path' not in values: - return v - - parameter_path = values['parameter_path'] - is_valid, bounds, message = PBPKParameterDatabase.validate_parameter( - parameter_path, v - ) - - if not is_valid: - raise ValueError(message) - - return v - - @validator('parameter_path') - def validate_parameter_exists(cls, v): - """Warn if parameter not in database.""" - bounds = PBPKParameterDatabase.get_bounds(v) - if bounds is None: - # Log warning but allow (might be custom parameter) - logger.warning(f"Parameter {v} not in database - no bounds validation") - return v - - -# Parameter change audit trail -class ParameterChangeAudit: - """Audit trail for parameter changes.""" - - def __init__(self): - self.changes: list = [] - - def log_change( - self, - simulation_id: str, - parameter_path: str, - old_value: float, - new_value: float, - user_id: str, - reason: str = None - ): - """Log a parameter change.""" - change = { - "timestamp": datetime.utcnow().isoformat(), - "simulation_id": simulation_id, - "parameter_path": parameter_path, - "old_value": old_value, - "new_value": new_value, - "change_magnitude": abs(new_value - old_value) / old_value if old_value != 0 else float('inf'), - "user_id": user_id, - "reason": reason - } - self.changes.append(change) - - def detect_p_hacking(self, simulation_id: str) -> list: - """Detect systematic parameter exploration (p-hacking).""" - sim_changes = [c for c in self.changes if c["simulation_id"] == simulation_id] - - alerts = [] - - # Group by parameter - param_changes = {} - for change in sim_changes: - param = change["parameter_path"] - if param not in param_changes: - param_changes[param] = [] - param_changes[param].append(change) - - # Detect patterns - for param, changes in param_changes.items(): - # Pattern 1: Many small changes to same parameter - if len(changes) > 5: - alerts.append({ - "type": "frequent_changes", - "parameter": param, - "count": len(changes), - "recommendation": "Frequent parameter changes detected - possible optimization bias" - }) - - # Pattern 2: Oscillating values (searching for target) - if len(changes) >= 3: - values = [c["new_value"] for c in changes] - # Check for oscillation (up-down-up or down-up-down) - diffs = [values[i+1] - values[i] for i in range(len(values)-1)] - sign_changes = sum(1 for i in range(len(diffs)-1) if diffs[i] * diffs[i+1] < 0) - - if sign_changes >= 2: - alerts.append({ - "type": "oscillating_values", - "parameter": param, - "changes": len(changes), - "recommendation": "Oscillating parameter values - possible target-seeking behavior" - }) - - # Pattern 3: Large magnitude changes - large_changes = [c for c in changes if c["change_magnitude"] > 0.5] - if len(large_changes) > 2: - alerts.append({ - "type": "large_changes", - "parameter": param, - "count": len(large_changes), - "recommendation": "Large parameter changes detected - review physiological plausibility" - }) - - return alerts -``` - ---- - -> **Reviewed copy (2026-04-15):** This document was retained from the original package but lightly edited for consistency. -> Unless explicitly stated otherwise, code blocks are **reference implementations**, not validated patches, and scenario-based exploit narratives should not be read as reproduced proofs. - - - -## 2. Population Size Limits and Memory Quotas - -**File:** `src/mcp_bridge/services/job_service.py` - -```python -from pydantic import BaseModel, validator -import psutil -import os - -class JobResourceConfig(BaseModel): - """Resource limits for jobs.""" - - max_population_size: int = 5000 - max_memory_per_job_mb: int = 2048 # 2 GB - max_simulation_duration_seconds: int = 1800 # 30 minutes - max_concurrent_jobs_per_user: int = 5 - max_daily_jobs_per_user: int = 100 - - @validator('max_population_size') - def validate_population_size(cls, v): - if v > 10000: - raise ValueError("Population size cannot exceed 10000") - return v - -class ResourceQuotaEnforcer: - """Enforce resource quotas for jobs.""" - - def __init__(self, config: JobResourceConfig = None): - self.config = config or JobResourceConfig() - self.user_job_counts: Dict[str, Dict[str, int]] = {} - - def check_population_size(self, population_size: int) -> tuple: - """ - Check if population size is within quota. - - Returns: - (is_allowed, message) - """ - if population_size > self.config.max_population_size: - return ( - False, - f"Population size {population_size} exceeds maximum {self.config.max_population_size}. " - f"Contact administrator for large population simulations." - ) - - return (True, "Valid") - - def check_memory_quota(self, requested_memory_mb: int) -> tuple: - """ - Check if memory request is within quota. - - Returns: - (is_allowed, message) - """ - if requested_memory_mb > self.config.max_memory_per_job_mb: - return ( - False, - f"Memory request {requested_memory_mb} MB exceeds quota {self.config.max_memory_per_job_mb} MB" - ) - - # Check system memory - available_mb = psutil.virtual_memory().available / (1024 * 1024) - if requested_memory_mb > available_mb * 0.8: - return ( - False, - f"Insufficient system memory. Requested: {requested_memory_mb} MB, " - f"Available: {available_mb:.0f} MB" - ) - - return (True, "Valid") - - def check_user_quotas(self, user_id: str) -> tuple: - """ - Check if user is within daily and concurrent job quotas. - - Returns: - (is_allowed, message) - """ - user_counts = self.user_job_counts.get(user_id, { - "concurrent": 0, - "daily": 0, - "last_reset": datetime.utcnow() - }) - - # Reset daily count if new day - last_reset = user_counts["last_reset"] - if (datetime.utcnow() - last_reset).days >= 1: - user_counts["daily"] = 0 - user_counts["last_reset"] = datetime.utcnow() - - if user_counts["concurrent"] >= self.config.max_concurrent_jobs_per_user: - return ( - False, - f"Concurrent job limit reached ({self.config.max_concurrent_jobs_per_user}). " - f"Wait for existing jobs to complete." - ) - - if user_counts["daily"] >= self.config.max_daily_jobs_per_user: - return ( - False, - f"Daily job limit reached ({self.config.max_daily_jobs_per_user}). " - f"Try again tomorrow." - ) - - return (True, "Valid") - - def estimate_memory_requirement(self, population_size: int) -> int: - """ - Estimate memory requirement for population simulation. - - Returns: - Estimated memory in MB - """ - # Base memory for simulation - base_memory = 100 # MB - - # Per-patient memory (empirical estimate) - memory_per_patient = 0.5 # MB - - # Safety factor - safety_factor = 1.5 - - estimated = (base_memory + population_size * memory_per_patient) * safety_factor - - return int(estimated) - - def validate_job_request( - self, - user_id: str, - population_size: int - ) -> tuple: - """ - Validate complete job request against all quotas. - - Returns: - (is_valid, errors) - """ - errors = [] - - # Check population size - allowed, message = self.check_population_size(population_size) - if not allowed: - errors.append(message) - - # Check memory - memory_required = self.estimate_memory_requirement(population_size) - allowed, message = self.check_memory_quota(memory_required) - if not allowed: - errors.append(message) - - # Check user quotas - allowed, message = self.check_user_quotas(user_id) - if not allowed: - errors.append(message) - - return (len(errors) == 0, errors) - - def record_job_start(self, user_id: str, job_id: str): - """Record job start for quota tracking.""" - if user_id not in self.user_job_counts: - self.user_job_counts[user_id] = { - "concurrent": 0, - "daily": 0, - "last_reset": datetime.utcnow() - } - - self.user_job_counts[user_id]["concurrent"] += 1 - self.user_job_counts[user_id]["daily"] += 1 - - def record_job_end(self, user_id: str, job_id: str): - """Record job completion.""" - if user_id in self.user_job_counts: - self.user_job_counts[user_id]["concurrent"] = max( - 0, - self.user_job_counts[user_id]["concurrent"] - 1 - ) - - -# Integration with job submission -class ResourceConstrainedJobService: - """Job service with resource quota enforcement.""" - - def __init__(self): - self.quota_enforcer = ResourceQuotaEnforcer() - - async def submit_population_simulation( - self, - user_id: str, - simulation_id: str, - population_size: int, - **kwargs - ) -> JobRecord: - """Submit population simulation with quota checks.""" - # Validate against quotas - is_valid, errors = self.quota_enforcer.validate_job_request( - user_id, population_size - ) - - if not is_valid: - raise QuotaExceeded(f"Job validation failed: {'; '.join(errors)}") - - # Record job start - self.quota_enforcer.record_job_start(user_id, simulation_id) - - try: - # Create job - job = JobRecord( - job_id=str(uuid.uuid4()), - simulation_id=simulation_id, - job_type="population_simulation", - population_size=population_size, - user_id=user_id, - estimated_memory_mb=self.quota_enforcer.estimate_memory_requirement( - population_size - ), - submitted_at=datetime.utcnow() - ) - - # Submit to queue - await self._submit_to_queue(job) - - return job - - except Exception: - # Rollback quota on failure - self.quota_enforcer.record_job_end(user_id, simulation_id) - raise -``` - ---- - -## 3. Container Security Hardening - -**File:** `Dockerfile` (Secure Multi-Stage Build) - -```dockerfile -# ============================================================================= -# PBPK-MCP Secure Dockerfile -# Multi-stage build with security hardening -# ============================================================================= - -# Stage 1: Build environment (not used in final image) -FROM r-base:4.3.0 AS builder - -# Install build dependencies -RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential \ - libxml2-dev \ - libcurl4-openssl-dev \ - libssl-dev \ - && rm -rf /var/lib/apt/lists/* - -# Install R packages -RUN R -e "install.packages('ospsuite', repos='https://...')" \ - && R -e "install.packages('rxode2', repos='https://...')" - -# Stage 2: Runtime environment (minimal, secure) -FROM gcr.io/distroless/cc-debian11:nonroot - -# Copy R installation from builder -COPY --from=builder /usr/lib/R /usr/lib/R -COPY --from=builder /usr/local/lib/R /usr/local/lib/R -COPY --from=builder /usr/share/R /usr/share/R - -# Copy application code -COPY --chown=nonroot:nonroot ./src /app/src -COPY --chown=nonroot:nonroot ./requirements.txt /app/ - -# Set working directory -WORKDIR /app - -# Switch to non-root user -USER nonroot:nonroot - -# Environment variables -ENV R_HOME=/usr/lib/R -ENV R_LIBS_USER=/usr/local/lib/R/site-library -ENV TOXMCP_CONTAINER_DIGEST=${CONTAINER_DIGEST} -ENV TOXMCP_GIT_COMMIT=${GIT_COMMIT} - -# Health check -HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ - CMD ["/app/src/health_check"] - -# Expose port -EXPOSE 8080 - -# Run application -ENTRYPOINT ["Rscript", "/app/src/main.R"] -``` - -**Seccomp Profile:** `pbpk-seccomp.json` - -```json -{ - "defaultAction": "SCMP_ACT_ERRNO", - "architectures": ["SCMP_ARCH_X86_64", "SCMP_ARCH_X86"], - "syscalls": [ - { - "names": [ - "accept", - "accept4", - "bind", - "clone", - "close", - "connect", - "epoll_create1", - "epoll_ctl", - "epoll_pwait", - "exit", - "exit_group", - "fcntl", - "fstat", - "futex", - "getpid", - "getrandom", - "getsockname", - "getsockopt", - "listen", - "mmap", - "mprotect", - "munmap", - "openat", - "read", - "recvfrom", - "recvmsg", - "rt_sigaction", - "rt_sigprocmask", - "rt_sigreturn", - "select", - "sendmsg", - "sendto", - "setitimer", - "setsockopt", - "socket", - "write", - "writev" - ], - "action": "SCMP_ACT_ALLOW" - }, - { - "names": [ - "execve", - "execveat", - "fork", - "vfork", - "ptrace", - "mount", - "umount", - "umount2", - "reboot", - "open_by_handle_at" - ], - "action": "SCMP_ACT_ERRNO" - } - ] -} -``` - -**Docker Compose Secure Configuration:** `docker-compose.secure.yml` - -```yaml -version: '3.8' - -services: - pbpk-mcp: - build: - context: . - dockerfile: Dockerfile.secure - args: - CONTAINER_DIGEST: ${CONTAINER_DIGEST} - GIT_COMMIT: ${GIT_COMMIT} - - # Security options - security_opt: - - no-new-privileges:true - - seccomp:pbpk-seccomp.json - - apparmor:pbpk-profile - - # Capabilities - cap_drop: - - ALL - cap_add: - - CHOWN - - SETGID - - SETUID - - # Read-only root filesystem - read_only: true - tmpfs: - - /tmp:noexec,nosuid,size=100m - - /var/tmp:noexec,nosuid,size=100m - - # Resource limits - deploy: - resources: - limits: - cpus: '4.0' - memory: 8G - reservations: - cpus: '1.0' - memory: 2G - - # Network - networks: - - toxmcp-internal - - # Environment - environment: - - TOXMCP_ENVIRONMENT=production - - TOXMCP_SECURE_MODE=true - - MAX_POPULATION_SIZE=5000 - - MAX_MEMORY_PER_JOB_MB=2048 - - # Health check - healthcheck: - test: ["CMD", "/app/health_check"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 40s - -networks: - toxmcp-internal: - internal: true # No external access -``` - ---- - -## 4. Floating-Point Determinism - -**File:** `src/mcp_bridge/audit/trail.py` - -```python -import json -import decimal -from decimal import Decimal, ROUND_HALF_UP -from typing import Any - -class CanonicalJsonEncoder(json.JSONEncoder): - """ - JSON encoder with canonical floating-point representation. - - Ensures that the same scientific results produce identical - audit hashes across different hardware and Python versions. - """ - - # Precision for IEEE 754 double precision - FLOAT_PRECISION = 15 - - def encode(self, obj: Any) -> str: - return json.dumps( - self._canonicalize(obj), - separators=(",", ":"), - sort_keys=True, - ensure_ascii=True - ) - - def _canonicalize(self, obj: Any) -> Any: - """Convert object to canonical form.""" - if isinstance(obj, float): - # Handle special values - if obj != obj: # NaN - return "NaN" - if obj == float('inf'): - return "Infinity" - if obj == float('-inf'): - return "-Infinity" - - # Round to fixed precision - d = Decimal(obj) - quantized = d.quantize( - Decimal('0.00000000000000'), # 14 decimal places - rounding=ROUND_HALF_UP - ) - return float(quantized) - - elif isinstance(obj, dict): - # Sort keys recursively - return { - k: self._canonicalize(v) - for k, v in sorted(obj.items()) - } - - elif isinstance(obj, list): - return [self._canonicalize(item) for item in obj] - - elif isinstance(obj, str): - # Normalize Unicode - return obj.encode('utf-8', 'ignore').decode('utf-8') - - elif isinstance(obj, (int, bool, type(None))): - return obj - - else: - # Convert unknown types to string - return str(obj) - - -def compute_deterministic_hash(event: dict) -> str: - """ - Compute deterministic hash for audit event. - - This ensures that identical scientific results produce - identical hashes regardless of hardware or Python version. - """ - # Remove hash field if present - temp = dict(event) - temp.pop("hash", None) - temp.pop("signature", None) - - # Use canonical JSON encoding - encoder = CanonicalJsonEncoder() - payload = encoder.encode(temp) - - # Compute hash - return hashlib.sha256(payload.encode('utf-8')).hexdigest() - - -# Test determinism -def test_hash_determinism(): - """Test that hash is deterministic across calls.""" - event = { - "prediction": 0.1 + 0.2, # 0.30000000000000004 - "confidence": 0.95, - "nested": { - "value": 1.234567890123456789 - } - } - - hash1 = compute_deterministic_hash(event) - hash2 = compute_deterministic_hash(event) - - assert hash1 == hash2, "Hash should be deterministic" - - # Test with equivalent values - event2 = { - "prediction": 0.3, # Mathematically equivalent - "confidence": 0.95, - "nested": { - "value": 1.234567890123456789 - } - } - - hash3 = compute_deterministic_hash(event2) - - # Note: These may differ due to floating-point representation - # but should be consistent within the same Python session - print(f"Hash 1: {hash1}") - print(f"Hash 2: {hash2}") - print(f"Hash 3: {hash3}") -``` - ---- - -*These remediation code snippets address the critical issues identified in the PBPK-MCP audit.* diff --git a/ToxMCP_Audit_Reviewed_v2/toxmcp_adversarial_audit_report.md b/ToxMCP_Audit_Reviewed_v2/toxmcp_adversarial_audit_report.md deleted file mode 100644 index 4efeaeb..0000000 --- a/ToxMCP_Audit_Reviewed_v2/toxmcp_adversarial_audit_report.md +++ /dev/null @@ -1,386 +0,0 @@ -# ToxMCP Scientific Adversary Audit Report - -**Audit Date:** 2026-04-15 -**Auditor:** Scientific Adversary Agent -**Scope:** comptox-mcp, oqt-mcp, aop-mcp, pbpk-mcp -**Mission:** Identify attack surfaces for generating false confidence and misleading toxicological conclusions - ---- - -> **Reviewed copy (2026-04-15):** This document was retained from the original package but lightly edited for consistency. -> Unless explicitly stated otherwise, code blocks are **reference implementations**, not validated patches, and scenario-based exploit narratives should not be read as reproduced proofs. - - - -## Executive Summary - -The ToxMCP ecosystem, while architecturally sophisticated, contains **multiple critical attack surfaces** that an AI agent (or malicious user) could exploit to generate misleading toxicological conclusions with false confidence. The most severe vulnerabilities exist in: - -1. **Applicability Domain (AD) enforcement gaps** in O-QT-MCP QSAR predictions -2. **Confidence interpolation without calibration** across AOP-MCP assessment tools -3. **Parameter manipulation without physiological plausibility guardrails** in PBPK-MCP -4. **Missing epistemic uncertainty propagation** across the tool chain - ---- - -## 🔴 CRITICAL VULNERABILITIES - -### V-001: Missing Applicability Domain Enforcement (O-QT-MCP) - -**Severity:** 🔴 Critical -**Location:** `oqt-mcp/src/tools/implementations/o_qt_qsar_tools.py` -**Attack Surface:** QSAR prediction workflow - -**Description:** -The O-QT-MCP mentions "applicability domain review" in its documentation and schemas, but the actual enforcement is **qualitative and post-hoc**. The `build_hazard_applicability_domain()` function (line 61 in hazard_contracts.py) creates a summary but does NOT: - -- Calculate a quantitative Applicability Domain Index (ADI) -- Enforce chemical class boundary checks -- Block predictions for out-of-domain chemicals -- Require explicit user acknowledgment for extrapolation - -**Attack Example:** -```python -# An agent can obtain QSAR predictions for a chemical completely outside -# the training set without receiving a clear UNRELIABLE flag -{ - "tool": "run_qsar_prediction", - "arguments": { - "chem_id": "EXOTIC_CHEM_001", # Novel scaffold not in training data - "model_guid": "skin_sensitization_model" - } -} -# Returns: prediction with "medium" confidence and AD notes buried in metadata -``` - -**False Confidence Generation:** -- The `oqtHazardEvidenceSummary.v1.json` schema includes `applicabilityDomain` as a required field, but it's a **narrative summary**, not a quantitative gate -- An AI agent can chain predictions → ignore AD warnings → present conclusions as reliable - -**Cross-Reference:** V-005 (Confirmation Bias Accumulation) - ---- - -### V-002: Confidence Score False Precision (AOP-MCP) - -**Severity:** 🔴 Critical -**Location:** `aop-mcp/src/tools/semantic/` (confidence assessment) -**Attack Surface:** `assess_aop_confidence` tool - -**Description:** -The `assess_aop_confidence` tool returns heuristic confidence assessments that **appear quantitative but lack calibration**. From the README documentation: - -> "`assess_aop_confidence` is OECD-aligned, not OECD-complete... confidence outputs often remain partial even when the tool is behaving correctly" - -The tool returns confidence dimensions as text ("high", "medium", "low") but these are: -- **Not probabilistic** - no confidence intervals or uncertainty quantification -- **Not calibrated** - "high" confidence doesn't map to a specific accuracy rate -- **Text-mining derived** - based on evidence text presence, not mechanistic validation - -**Attack Example:** -```python -# Agent chains multiple AOP assessments, each with "medium" confidence -# The aggregate appears to support a conclusion, but confidence is not additive -{ - "aop_1_confidence": "medium", # Based on sparse KE evidence - "aop_2_confidence": "medium", # Based on different sparse evidence - "aop_3_confidence": "medium", # Based on yet different sparse evidence -} -# Agent reports: "Multiple AOPs show consistent medium-to-high confidence" -``` - -**False Precision Pattern:** -- The schema allows `confidence_dimensions` to be reported without accompanying `uncertainty_quantification` -- No warning when confidence is inferred from path structure alone (without text evidence) - -**Cross-Reference:** V-005 (Confirmation Bias Accumulation) - ---- - -### V-003: PBPK Parameter P-Hacking (PBPK-MCP) - -**Severity:** 🔴 Critical -**Location:** `pbpk-mcp/src/mcp/tools/set_parameter_value.py` -**Attack Surface:** Parameter editing and sensitivity analysis - -**Description:** -The `set_parameter_value` tool allows direct manipulation of PBPK parameters with **minimal physiological plausibility guardrails**: - -```python -class SetParameterValueRequest(BaseModel): - simulation_id: str - parameter_path: str # No validation against physiological bounds - value: float # No range validation - unit: Optional[str] # Unit conversion but no sanity checks - update_mode: Optional[str] = "absolute" # "relative" mode compounds errors -``` - -**Attack Example - Parameter Inflation:** -```python -# Agent systematically tweaks clearance parameters until desired outcome -for clearance_factor in [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]: - set_parameter_value( - parameter_path="Liver|Clearance", - value=baseline * clearance_factor - ) - result = run_simulation() - if result.auc < safety_threshold: - return f"Model shows safe clearance at factor {clearance_factor}" -# No audit trail of parameter exploration; best result reported -``` - -**Missing Guardrails:** -- No physiological bounds checking (e.g., liver blood flow cannot exceed cardiac output) -- No parameter correlation enforcement (changing one parameter should affect correlated ones) -- No "p-hacking detection" for systematic parameter sweeps -- The `run_sensitivity_analysis` tool doesn't flag when results are cherry-picked - -**Cross-Reference:** V-005 (Confirmation Bias Accumulation) - ---- - -## 🟠 HIGH SEVERITY VULNERABILITIES - -### V-004: Read-Across Analogue Bias (O-QT-MCP) - -**Severity:** 🟠 High -**Location:** `oqt-mcp/schemas/oqtReadAcrossSummary.v1.json` -**Attack Surface:** Grouping and read-across justification - -**Description:** -The `build_grouping_justification` tool can suggest read-across from chemicals that are **structurally similar but toxicologically divergent**. The schema requires: - -- `structure_comparison` - structural similarity assessment -- `physicochemical_comparison` - physchem property comparison -- BUT: **No mechanistic justification gate** for Mode of Action (MOA) alignment - -**Attack Example - Analogue Bias:** -```python -# Agent groups chemicals by structural similarity alone -{ - "tool": "build_grouping_justification", - "arguments": { - "identifier": "Target_Chemical", - "analogue_identifiers": ["Analogue_A", "Analogue_B"], - "profiler_guids": ["structural_profiler_only"] # No MOA profiler - } -} -# Returns: grouping justification showing high structural similarity -# Problem: Target has genotoxic MOA, analogues have non-genotoxic MOA -``` - -**Schema Weakness:** -The `oqtReadAcrossSummary.v1.json` schema includes `applicabilityDomain` but it's a **qualitative field** without: -- MOA concordance scoring -- Toxicodynamic similarity metrics -- Mechanistic alert flags - -**Cross-Reference:** V-001 (Missing AD Enforcement) - ---- - -### V-005: Confirmation Bias Accumulation Across Tool Chain - -**Severity:** 🟠 High -**Location:** Cross-suite (comptox → oqt → aop → pbpk) -**Attack Surface:** Multi-tool chaining workflows - -**Description:** -There is **NO meta-assessment tool** that tracks epistemic uncertainty propagation across the tool chain. When an AI agent chains: - -``` -search_chemical → profile_chemical → run_qsar → assess_aop → run_pbpk -``` - -Each step can: -- Generate predictions with unquantified uncertainty -- Pass "confidence" forward without uncertainty accumulation -- Filter evidence that doesn't support the emerging conclusion - -**Attack Example - Confirmation Bias Chain:** -```python -# Step 1: Search finds chemical -search_result = search_chemicals("mystery_compound") - -# Step 2: Profiling shows some alerts (but agent focuses on benign ones) -profile = run_profiler(profiler_guid="safe_profiler") - -# Step 3: QSAR prediction with AD warning (agent ignores warning) -qsar = run_qsar_prediction(chem_id, model_guid="safe_model") -# AD note: "Chemical outside training domain" buried in metadata - -# Step 4: AOP assessment finds supportive pathway (ignores contradictory ones) -aop = assess_aop_confidence(aop_id="supportive_aop") - -# Step 5: PBPK with tweaked parameters shows favorable kinetics -set_parameter_value(parameter_path="clearance", value=high_value) -pbpk = run_simulation() - -# Final conclusion: "Multiple lines of evidence support safety" -# Reality: Each step had warnings that were filtered out -``` - -**Missing Safeguard:** -- No `uncertainty_propagation` tool -- No `evidence_contradiction_detection` across modules -- No `confidence_calibration` across the chain - -**Cross-Reference:** All other vulnerabilities - ---- - -## 🟡 MEDIUM SEVERITY VULNERABILITIES - -### V-006: CompTox Evidence Federation Gaps (CompTox-MCP) - -**Severity:** 🟡 Medium -**Location:** `comptox-mcp` (evidence federation) -**Attack Surface:** Multi-source evidence aggregation - -**Description:** -The CompTox-MCP federates evidence from multiple EPA sources but: -- **No source conflict resolution** - when sources disagree, all are presented equally -- **No evidence quality weighting** - high-quality studies not distinguished from preliminary data -- **No temporal decay** - older studies not flagged as potentially superseded - -**Attack Example:** -```python -# Agent can selectively cite evidence from conflicting sources -{ - "bioactivity_assays": [ - {"source": "ToxCast", "result": "inactive", "quality": "high"}, - {"source": "legacy_study", "result": "active", "quality": "low"} - ] -} -# Agent reports: "Study shows activity" (citing only legacy_study) -``` - ---- - -### V-007: Qualitative Uncertainty Masking (All Modules) - -**Severity:** 🟡 Medium -**Location:** Cross-suite schemas -**Attack Surface:** Uncertainty reporting - -**Description:** -All ToxMCP modules use **qualitative uncertainty descriptors** that mask underlying quantitative uncertainty: - -| Module | Uncertainty Field | Values | Problem | -|--------|------------------|--------|---------| -| O-QT | `accepted_uncertainty_level` | "low", "medium", "high" | No probabilistic meaning | -| AOP | `confidence_dimensions` | "high", "medium", "low" | Not calibrated | -| PBPK | `qualificationLevel` | "qualified", "unqualified" | Binary when continuous needed | -| CompTox | `evidence_quality` | "high", "medium", "low" | Subjective | - -**Attack Example:** -```python -# Agent can interpret "medium" uncertainty differently based on desired conclusion -if supporting_conclusion: - interpret("medium") = "acceptable for decision-making" -else: - interpret("medium") = "requires further study" -``` - ---- - -## Attack Surface Summary Matrix - -| Attack Vector | O-QT-MCP | AOP-MCP | PBPK-MCP | CompTox-MCP | Severity | -|--------------|----------|---------|----------|-------------|----------| -| False confidence from out-of-domain predictions | ✅ | ❌ | ❌ | ❌ | 🔴 | -| Confidence interpolation without calibration | ❌ | ✅ | ❌ | ❌ | 🔴 | -| Parameter p-hacking | ❌ | ❌ | ✅ | ❌ | 🔴 | -| Read-across analogue bias | ✅ | ❌ | ❌ | ❌ | 🟠 | -| Confirmation bias accumulation | ✅ | ✅ | ✅ | ✅ | 🟠 | -| Evidence selection bias | ✅ | ✅ | ❌ | ✅ | 🟡 | -| Qualitative uncertainty masking | ✅ | ✅ | ✅ | ✅ | 🟡 | - ---- - -## Concrete Attack Scenarios - -### Scenario 1: The "Safe by Design" Deception - -**Goal:** Convince stakeholders a hazardous chemical is safe - -**Attack Chain:** -1. Use O-QT-MCP to run QSAR models, selecting only those with favorable predictions -2. Ignore applicability domain warnings (buried in metadata) -3. Use AOP-MCP to find pathways where the chemical doesn't trigger key events -4. Use PBPK-MCP with inflated clearance parameters to show rapid elimination -5. Present conclusion: "Multiple independent lines of evidence support safety" - -**Vulnerabilities Exploited:** V-001, V-002, V-003, V-005 - ---- - -### Scenario 2: The "Toxic by Association" Smear - -**Goal:** Falsely associate a competitor's chemical with toxicity - -**Attack Chain:** -1. Use O-QT-MCP grouping to find structurally similar analogues with known toxicity -2. Ignore MOA differences (no mechanistic gate) -3. Build read-across dossier showing "consistent toxicity pattern" -4. Use AOP-MCP to construct speculative pathway linking chemical to adverse outcome -5. Present conclusion: "Read-across and AOP analysis indicate significant concern" - -**Vulnerabilities Exploited:** V-004, V-002, V-005 - ---- - -### Scenario 3: The "Confidence Inflation" Report - -**Goal:** Generate a report with inflated confidence metrics - -**Attack Chain:** -1. Run multiple QSAR predictions (O-QT-MCP) - each returns "medium" confidence -2. Run AOP assessments (AOP-MCP) - each returns "medium" confidence -3. Run PBPK simulations (PBPK-MCP) with favorable parameter sets -4. Aggregate results without uncertainty propagation -5. Present conclusion: "Consistent medium-to-high confidence across all assessments" - -**Vulnerabilities Exploited:** V-002, V-003, V-005, V-007 - ---- - -## Recommendations - -### Immediate (Critical) - -1. **Implement quantitative ADI calculation** in O-QT-MCP with hard gates for out-of-domain predictions -2. **Add confidence calibration** to AOP-MCP with explicit uncertainty quantification -3. **Implement physiological plausibility checks** in PBPK-MCP parameter editing -4. **Create uncertainty propagation tool** for cross-suite workflows - -### Short-term (High) - -5. **Add MOA concordance scoring** to O-QT-MCP read-across -6. **Implement evidence contradiction detection** across modules -7. **Add p-hacking detection** for systematic parameter exploration - -### Medium-term (Medium) - -8. **Standardize uncertainty representation** across all modules (probabilistic where possible) -9. **Implement evidence quality weighting** in CompTox-MCP -10. **Add temporal decay flags** for older studies - ---- - -## Conclusion - -The ToxMCP ecosystem, while innovative, contains significant attack surfaces that could be exploited to generate misleading toxicological conclusions. The most critical vulnerabilities are: - -1. **Missing AD enforcement** allowing out-of-domain predictions -2. **False precision** in confidence scores without calibration -3. **Parameter manipulation** without physiological guardrails -4. **No uncertainty propagation** across tool chains - -An AI agent with access to these tools could systematically exploit these vulnerabilities to build a case for virtually any predetermined conclusion, while appearing to follow rigorous scientific protocols. - -**The appearance of rigor is the most dangerous vulnerability of all.** - ---- - -*Report generated by Scientific Adversary Agent for ToxMCP Security Audit* diff --git a/ToxMCP_Audit_Reviewed_v2/toxmcp_contract_audit_report.md b/ToxMCP_Audit_Reviewed_v2/toxmcp_contract_audit_report.md deleted file mode 100644 index 5c98c97..0000000 --- a/ToxMCP_Audit_Reviewed_v2/toxmcp_contract_audit_report.md +++ /dev/null @@ -1,399 +0,0 @@ -# ToxMCP Suite: Contract Layer Architecture Audit Report -## Cross-Suite Orchestration Analysis - -**Audit Date:** 2026-04-15 -**Auditor:** Cross-Suite Orchestration Architect -**Scope:** comptox-mcp, oqt-mcp, aop-mcp, pbpk-mcp - ---- - -> **Reviewed copy (2026-04-15):** This document was retained from the original package but lightly edited for consistency. -> Unless explicitly stated otherwise, code blocks are **reference implementations**, not validated patches, and scenario-based exploit narratives should not be read as reproduced proofs. - - - -## Executive Summary - -The ToxMCP suite demonstrates sophisticated modular architecture with clear domain boundaries, but critical gaps exist in the **Contract Layer** that prevent coherent cross-suite workflows. The "Swiss Army Knife" problem is real: each module is sharp individually, but they lack the integration mechanisms to form a *coherent argument*. - -### Key Finding: Orchestrator Responsibility Is Documented but Not Implemented -The documentation repeatedly references a "downstream orchestrator" and "future ToxClaw orchestration layer" but **no such orchestrator exists** in the codebase. This is the single most critical architectural gap. - ---- - -## 1. Contract Drift Analysis - -### 🔴 CRITICAL: Evidence Block Structural Incompatibility - -| Module | Evidence Block Structure | Incompatibility | -|--------|-------------------------|-----------------| -| **CompTox-MCP** | `hazardEvidenceSummary.v1.json` - Flat structure with `datasets[]`, `keyFindings[]` | No `evidenceBlocks` wrapper | -| **O-QT-MCP** | `oqtHazardEvidenceSummary.v1.json` - Nested `evidenceBlocks{endpointData, profiling, metabolism, qsar}` | Uses `evidenceBlock` with `status`, `basis`, `keyEvidence[]` | -| **AOP-MCP** | `get_ker.response.schema.json` - `evidence_blocks{biological_plausibility, empirical_support, quantitative_understanding}` | Uses `evidenceBlock` with `text`, `heuristic_call`, `basis` | - -### Specific Contract Drift Examples - -#### 1.1 Field Name Inconsistencies (camelCase vs snake_case) - -``` -CompTox: "chemicalRef", "keyFindings", "sourceDataset" -O-QT: "chemicalIdentity", "endpointSummaries", "evidenceBlocks" -AOP: "overall_applicability", "evidence_blocks", "heuristic_call" -``` - -**File References:** -- `comptox-mcp/schemas/hazardEvidenceSummary.v1.json` (lines 16-45) -- `oqt-mcp/schemas/oqtHazardEvidenceSummary.v1.json` (lines 40-70) -- `aop-mcp/docs/contracts/schemas/read/get_ker.response.schema.json` (lines 125-134) - -#### 1.2 Evidence Block Schema Mismatch - -**O-QT `evidenceBlock` (lines 460-499):** -```json -{ - "summary": "string|null", - "status": "coverageState", - "basis": "string", - "keyEvidence": ["string"], - "references": ["referenceRecord"], - "provenanceRecords": ["provenanceRecord"] -} -``` - -**AOP `evidenceBlock` (lines 160-171):** -```json -{ - "text": "string|null", - "heuristic_call": "string", - "basis": "string", - "references": ["object"], - "provenance": ["provenanceRecord"] -} -``` - -**Transformation Loss:** A CompTox hazard evidence block CANNOT be directly consumed by AOP-MCP draft authoring without field mapping: -- `keyFindings[]` -> `evidence_blocks` requires manual transformation -- `confidence` (0-1 float in CompTox) -> `heuristic_call` (string in AOP) -- No shared `provenanceRecord` structure - -### 🟠 HIGH: Unit Mismatches - -**CompTox hazard evidence:** -- Uses `"unit": "log_mg_kg"` (line 667 in interop.py) -- ToxValDB: `mg/kg`, `uM`, `ppm` (mixed) - -**O-QT QSAR findings:** -- `"unit": "string"` (line 267-268 in oqtHazardEvidenceSummary.v1.json) -- No standardization enforced - -**PBPK context:** -- HTTK: `L/h/kg`, `1/hr` -- ADME/IVIVE: `L/h/kg` - -**Risk:** Downstream orchestrator must handle unit conversion without explicit metadata about unit systems. - -### 🟠 HIGH: Ontology Versioning Conflicts - -**AOP-MCP:** -- Uses AOP-Wiki RDF/SPARQL with OECD AOP-KB -- `assess_aop_confidence.response.schema.json` includes `oecd_alignment` field -- References "OECD-aligned" contracts throughout - -**CompTox-MCP:** -- Uses internal AOP crosswalk (bioactivity_aop mappings) -- `aopLinkageSummary.v1.json` has different `mapping` structure - -**Gap:** No shared ontology registry or version negotiation mechanism exists. - ---- - -## 2. The Orchestrator Gap - -### 🔴 CRITICAL: Missing Meta-Reasoning Layer - -**Finding:** The "downstream orchestrator" is referenced 20+ times across documentation but **DOES NOT EXIST** in the codebase. - -**Documentation References:** -- `oqt-mcp/docs/architecture.md` (line 47-56): "A downstream orchestrator sits above O-QT MCP" -- `oqt-mcp/docs/integration_orchestrators.md` (line 57): "Final suite-level evidence synthesis belongs in a downstream orchestrator" -- `comptox-mcp/docs/architecture_overview.md` (line 105): "future ToxClaw orchestration layer" - -**What the Orchestrator Should Do (but doesn't exist):** -1. **Evidence Deduplication:** Prevent double-counting when CompTox and O-QT both report similar hazard findings -2. **Contradiction Detection:** Flag when CompTox says "non-toxic" but AOP suggests "liver injury via different pathway" -3. **Cross-Module Consistency:** Ensure PBPK simulation results align with hazard evidence -4. **Narrative Coherence:** Verify PDF report from O-QT doesn't contradict PBPK results - -### 🟠 HIGH: No Narrative Consistency Checker - -**Example Scenario:** -``` -CompTox-MCP: "No genotoxicity signal detected in ToxCast assays" -AOP-MCP: "AOP 42: Liver steatosis via PPARG activation" -O-QT-MCP: "Profiler alert: potential DNA binding mechanism" -PBPK-MCP: "High hepatic concentration predicted" -``` - -**Question:** Where is the component that detects the tension between "no genotoxicity" and "DNA binding mechanism"? - -**Answer:** Nowhere. Each module operates in isolation. - -### 🟡 MEDIUM: GenRA Orchestrator is Experimental-Only - -**File:** `comptox-mcp/src/epacomp_tox/orchestrator/workflow.py` - -The `GenRAOrchestrator` class exists but: -- Is marked as **experimental** in architecture docs -- Only handles CompTox-internal workflows -- Does NOT integrate with O-QT, AOP, or PBPK modules -- Has no cross-module transaction management - ---- - -## 3. Transaction Boundaries - -### 🔴 CRITICAL: No Cross-Module Rollback Mechanism - -**Scenario Analysis:** - -``` -1. O-QT-MCP successfully generates grouping dossier -2. AOP-MCP fails to retrieve AOP (SPARQL timeout) -3. PBPK-MCP simulation completes -4. CompTox-MCP evidence pack assembly fails (API error) -``` - -**Current Behavior:** -- Each module operates independently -- No distributed transaction coordinator -- Partial results can be returned without context - -**Risk:** System can produce **partial, misleading safety reports** with missing context. - -**Evidence from Code:** -- `oqt-mcp/docs/architecture.md` (line 66): "Async queue and persistence layer remain roadmap work" -- `comptox-mcp/src/epacomp_tox/orchestrator/workflow.py` (lines 91-114): Error handling only within single workflow, no cross-module coordination - -### 🟠 HIGH: PBPK Has Session Registry, Others Don't - -**PBPK-MCP:** -- Has `mcp.session_registry` for simulation handles -- Supports job queue with Redis -- Has rollback via snapshot mechanism - -**Other Modules:** -- No session registry -- No job persistence -- No rollback capability - -**Gap:** Inconsistent state management across suite. - ---- - -## 4. Schema Evolution Strategy - -### 🔴 CRITICAL: No Schema Registry or Version Negotiation - -**Current State:** - -| Schema | Version | Version Detection | -|--------|---------|-------------------| -| `oqtWorkflowRecord.v1.json` | v1 | Hardcoded `const: "v1"` | -| `oqtHazardEvidenceSummary.v1.json` | v1 | Hardcoded `const: "v1"` | -| `hazardEvidenceSummary.v1.json` | v1 | In filename only | -| `aopLinkageSummary.v1.json` | v1 | In filename only | - -**Problems:** -1. **No schema registry** - Consumers cannot discover available versions -2. **No version negotiation** - Cannot request `v1` vs `v2` at runtime -3. **Breaking changes undefined** - No migration path documented - -**File References:** -- `oqt-mcp/schemas/oqtWorkflowRecord.v1.json` (lines 26-28): Hardcoded version -- `comptox-mcp/schemas/README.md`: "Portable schema versions are intentionally independent from package patch releases" - -### 🟠 HIGH: Inconsistent Version Declaration Patterns - -**O-QT Pattern (explicit):** -```json -"schemaName": { "const": "oqtWorkflowRecord" }, -"schemaVersion": { "const": "v1" } -``` - -**CompTox Pattern (implicit):** -```json -"$id": "https://epa.gov/comptox/schemas/hazardEvidenceSummary.v1.json" -``` - -**AOP Pattern (none):** -```json -"$schema": "https://json-schema.org/draft/2020-12/schema" -// No version in schema itself -``` - ---- - -## 5. Integration Anti-Patterns Catalog - -### Anti-Pattern 1: "Hope for the Best" Integration -**Evidence:** `comptox-mcp/src/epacomp_tox/orchestrator/workflow.py` (lines 388-411) -```python -try: - evidence_pack = self.interop_resource.assemble_comptox_evidence_pack(...) - aop_summary = self.interop_resource.build_aop_linkage_summary(...) - pbpk_bundle = self.interop_resource.build_pbpk_context_bundle(...) -except Exception as exc: - guardrails.append(...) - return None -``` - -**Problem:** Interop attachments can fail silently; no retry or compensation logic. - -### Anti-Pattern 2: "Every Module for Itself" Provenance -**CompTox Provenance:** -```json -{ - "sourceMcp": "epacomp-tox-mcp", - "generatedAt": "timestamp", - "sources": [...] -} -``` - -**O-QT Provenance:** -```json -{ - "workflowId": "string", - "sourceSystem": "string", - "generatedBy": "string", - "generatedAt": "timestamp" -} -``` - -**AOP Provenance:** -```json -{ - "source": "string", - "field": "string", - "transformation": "string|null", - "confidence": "string|null" -} -``` - -**Problem:** Three different provenance structures; no unified audit trail. - -### Anti-Pattern 3: Ambiguous Orchestrator Ownership -The orchestrator is simultaneously: -- Essential for final synthesis (per docs) -- Non-existent in code -- Referenced as "future ToxClaw layer" - ---- - -## 6. Swiss Army Knife Problem Assessment - -### Can the Tools Form a Coherent Argument? - -| Capability | Status | Gap | -|------------|--------|-----| -| Individual hazard assessment | Working | - | -| Individual AOP discovery | Working | - | -| Individual QSAR prediction | Working | - | -| Individual PBPK simulation | Working | - | -| Cross-module evidence fusion | Missing | No orchestrator | -| Contradiction detection | Missing | No meta-reasoning | -| Narrative consistency | Missing | No validation layer | -| Decision recommendation | Missing | Out of scope per design | - -### The Core Issue - -Each module correctly declares: -- `decisionBoundary.supportedDecisions` -- `decisionBoundary.prohibitedDecisions` -- `decisionOwner` - -But there's **no consumer** of these declarations. The orchestrator that should read these boundaries and make cross-module decisions doesn't exist. - ---- - -## 7. Recommendations - -### Immediate (High Priority) - -1. **Define the Orchestrator Interface** - - Create `toxmcp-orchestrator` repository - - Define contract for cross-module evidence fusion - - Implement contradiction detection engine - -2. **Standardize Evidence Blocks** - - Create `toxmcp-evidence-schema` shared package - - Unify `evidenceBlock` structure across all modules - - Version all schemas with explicit negotiation - -3. **Implement Transaction Coordination** - - Add saga pattern for cross-module workflows - - Define compensation actions for each module - - Create unified session registry - -### Medium Term - -4. **Build Meta-Reasoning Layer** - - Implement confidence aggregation across modules - - Create ontology alignment service - - Build narrative consistency validator - -5. **Schema Registry** - - Deploy central schema registry - - Implement version negotiation protocol - - Add schema compatibility testing - ---- - -## Appendix: File Reference Index - -### Schema Files Analyzed -- `comptox-mcp/schemas/hazardEvidenceSummary.v1.json` -- `comptox-mcp/schemas/aopLinkageSummary.v1.json` -- `comptox-mcp/schemas/comptoxEvidencePack.v1.json` -- `oqt-mcp/schemas/oqtHazardEvidenceSummary.v1.json` -- `oqt-mcp/schemas/oqtReadAcrossSummary.v1.json` -- `oqt-mcp/schemas/oqtWorkflowRecord.v1.json` -- `aop-mcp/docs/contracts/schemas/read/get_ker.response.schema.json` -- `aop-mcp/docs/contracts/schemas/read/assess_aop_confidence.response.schema.json` - -### Documentation Files Analyzed -- `oqt-mcp/docs/architecture.md` -- `oqt-mcp/docs/integration_orchestrators.md` -- `oqt-mcp/docs/cross_suite_alignment_2026.md` -- `comptox-mcp/docs/architecture_overview.md` -- `aop-mcp/docs/architecture.md` -- `pbpk-mcp/docs/mcp-bridge/architecture.md` - -### Code Files Analyzed -- `comptox-mcp/src/epacomp_tox/orchestrator/workflow.py` -- `comptox-mcp/src/epacomp_tox/orchestrator/offline.py` -- `comptox-mcp/src/epacomp_tox/resources/interop.py` - ---- - -## Summary of Findings by Severity - -### 🔴 Critical (4) -1. **Missing Orchestrator:** The downstream orchestrator referenced throughout docs does not exist -2. **Evidence Block Incompatibility:** CompTox, O-QT, and AOP use incompatible evidence block structures -3. **No Cross-Module Rollback:** Partial failures can produce misleading safety reports -4. **No Schema Registry:** No version negotiation or discovery mechanism - -### 🟠 High (5) -1. **No Narrative Consistency Checker:** No component validates coherence across module outputs -2. **Unit Mismatches:** Different unit systems without conversion metadata -3. **Inconsistent Version Patterns:** Each module uses different version declaration -4. **Inconsistent State Management:** PBPK has session registry; others don't -5. **Ontology Versioning Conflicts:** No shared ontology registry - -### 🟡 Medium (2) -1. **GenRA Orchestrator is Experimental:** Internal-only, not cross-module -2. **Provenance Structure Divergence:** Three different provenance formats - ---- - -**Audit Complete** diff --git a/ToxMCP_Audit_Reviewed_v2/toxmcp_future_proofing_audit_report.md b/ToxMCP_Audit_Reviewed_v2/toxmcp_future_proofing_audit_report.md deleted file mode 100644 index a0c3eb3..0000000 --- a/ToxMCP_Audit_Reviewed_v2/toxmcp_future_proofing_audit_report.md +++ /dev/null @@ -1,209 +0,0 @@ -# ToxMCP Suite - Future-Proofing & Standards Audit Report (Reviewed Copy) - -**Review date:** 2026-04-15 -**Scope:** `comptox-mcp`, `oqt-mcp`, `aop-mcp`, `pbpk-mcp` -**Focus:** Migration resilience for MCP, schema evolution, ontology drift, and provider coupling - ---- - -## Important update in this reviewed copy - -The original report treated streaming and transport changes as mostly future events. -This reviewed copy updates the framing: - -- Streamable HTTP is already part of the public MCP specification lineage. -- The current public MCP roadmap is focused on **evolving transport and session handling for scale**, not on introducing a large set of new official transports. -- The highest-value future-proofing question for ToxMCP is therefore **migration resilience**, not speculative feature timing. - ---- - -## Executive summary - -The original package correctly identified that the suite has several durability risks: - -1. **Transport/protocol logic is fragmented across repos** -2. **Schema/version handling is inconsistent** -3. **Ontology evolution is under-governed** -4. **Provider and model coupling is stronger than ideal** -5. **Binary/large artifact handling is not abstracted cleanly enough** - -These are best understood as **migration-cost multipliers**. -Even if every repo works today, the cost of adapting the suite to protocol, ontology, or provider change may be much higher than it needs to be. - ---- - -## Finding register - -| ID | Finding | Severity | Evidence basis | Confidence | Reviewed interpretation | -|---|---|---|---|---|---| -| FUT-01 | MCP transport handling is too repo-local | **High** | Observed | High | Transport change will likely require repeated work unless abstraction is shared | -| FUT-02 | Capability/version negotiation strategy is underdefined | **High** | Observed + standards note | Medium-High | Compatibility drift is likely as clients and servers evolve | -| FUT-03 | Schema evolution and registry discipline are insufficient | **High** | Observed | High | Cross-suite breakage risk grows as contracts change | -| FUT-04 | Ontology/version drift is under-managed | **High** | Observed + inferred | Medium-High | Historical comparability and interoperability may degrade over time | -| FUT-05 | Provider/model coupling is stronger than ideal | **Medium / High** | Observed | Medium-High | Supplier or API change could have outsized migration cost | -| FUT-06 | Binary/large artifact handling needs a clearer boundary | **Medium / High** | Observed + inferred | Medium | Performance and compatibility cost can rise as outputs get richer | - ---- - -## FUT-01: MCP transport handling is too repo-local -**Severity:** **High** -**Evidence basis:** Observed -**Confidence:** High - -The original report was right that transport logic is spread across repos. -That means even modest protocol evolution can create duplicated upgrade work. - -### Reviewed framing -This is not mainly a prediction about a specific future transport. -It is a present-day software architecture issue: -- transport concerns are not centralized enough -- compatibility behavior is harder to test consistently -- protocol changes may require multiple parallel migrations - -### Recommended control -Introduce a shared transport boundary or library that owns: -- protocol version selection -- capability negotiation -- request/response envelope handling -- streaming/session abstractions -- compatibility tests - ---- - -## FUT-02: Capability and version negotiation need explicit policy -**Severity:** **High** -**Evidence basis:** Observed + standards note -**Confidence:** Medium-High - -Hardcoded or uneven protocol-version handling increases: -- brittle client/server pairings -- ambiguous fallback behavior -- upgrade risk across repos - -### Recommended control -- define a single suite-level compatibility policy -- make supported protocol versions discoverable -- test downgrade/upgrade behavior explicitly -- separate “what we support” from “what we prefer” - ---- - -## FUT-03: Schema evolution discipline is insufficient -**Severity:** **High** -**Evidence basis:** Observed -**Confidence:** High - -The original contract-layer and future-proofing work reinforce each other here. -Version numbers appear, but the suite still needs a clearer answer to: -- where schemas are registered -- how new versions are discovered -- how breaking changes are communicated -- how older artifacts remain readable - -### Recommended control -- maintain a schema registry or index -- document compatibility guarantees -- ship transformers or adapters for version transitions -- add contract tests at cross-repo boundaries - ---- - -## FUT-04: Ontology evolution is under-managed -**Severity:** **High** -**Evidence basis:** Observed + inferred -**Confidence:** Medium-High - -This is especially relevant for `aop-mcp`, but it affects the full suite whenever ontology-backed concepts appear in downstream records or reports. - -### Risk pattern -- ontology or taxonomy changes upstream -- local normalization still succeeds syntactically -- semantic meaning or comparability changes silently -- historical artifacts become harder to compare or trust - -### Recommended control -- persist ontology/version provenance -- define remapping/deprecation policy -- test historical artifact interpretation against changed ontology states - ---- - -## FUT-05: Provider and model coupling should be loosened -**Severity:** **Medium / High** -**Evidence basis:** Observed -**Confidence:** Medium-High - -The original package noted provider-specific assumptions in several places. -That matters because: -- pricing can change -- APIs can shift -- naming and capabilities evolve -- fallback behavior can be unclear - -### Recommended control -- define internal capability contracts rather than provider names -- keep provider adapters narrow -- record provider/model identity in provenance -- test fallback behavior intentionally, not incidentally - ---- - -## FUT-06: Artifact and binary handling need a cleaner abstraction -**Severity:** **Medium / High** -**Evidence basis:** Observed + inferred -**Confidence:** Medium - -As the suite produces richer artifacts, handling everything as JSON payloads or per-repo conventions can create: -- overhead -- streaming friction -- inconsistent client behavior -- duplicated logic - -### Recommended control -- define a clear artifact abstraction -- separate metadata from large payload transport -- make artifact lineage and content-type handling consistent across repos - ---- - -## What changed from the original report - -### 1. Timing claims were softened -The reviewed copy avoids speculative statements tied to a single quarter unless backed by current public roadmap material. - -### 2. “Streaming gap” became “migration resilience gap” -The stronger and more durable claim is not that one specific feature is missing. -It is that the current suite structure makes protocol change expensive. - -### 3. Standards handling was made less theatrical and more operational -The reviewed copy emphasizes: -- compatibility policy -- shared abstractions -- migration tests -- version provenance - ---- - -## Recommended sequence - -### Immediate -- define shared MCP compatibility policy -- centralize transport/version handling strategy -- define schema ownership and versioning rules - -### Next -- add ontology/version provenance -- reduce provider-specific assumptions -- standardize artifact handling - -### Then -- add compatibility and migration test suites across repos -- document deprecation policy and supported-version windows - ---- - -## Final judgment - -The original package was right to worry about future change, but the best frame is **migration resilience**, not speculative roadmap drama. - -**Bottom line:** ToxMCP will be easier to evolve if transport, schema, ontology, and provider boundaries are made explicit now, while the suite is still small enough to refactor coherently. diff --git a/ToxMCP_Audit_Reviewed_v2/toxmcp_observability_audit_report.md b/ToxMCP_Audit_Reviewed_v2/toxmcp_observability_audit_report.md deleted file mode 100644 index 4ea5013..0000000 --- a/ToxMCP_Audit_Reviewed_v2/toxmcp_observability_audit_report.md +++ /dev/null @@ -1,616 +0,0 @@ -# ToxMCP Observability & Debuggability Audit Report - -**Audit Date:** 2026-04-15 -**Auditor:** Observability & Debuggability Specialist -**Scope:** comptox-mcp, oqt-mcp, aop-mcp, pbpk-mcp -**Severity Legend:** 🔴 Critical | 🟠 High | 🟡 Medium | 🟢 Low - ---- - -> **Reviewed copy (2026-04-15):** This document was retained from the original package but lightly edited for consistency. -> Unless explicitly stated otherwise, code blocks are **reference implementations**, not validated patches, and scenario-based exploit narratives should not be read as reproduced proofs. - - - -## Executive Summary - -This audit reveals **significant observability gaps** across the ToxMCP ecosystem that will make production debugging extremely difficult. The most critical issues are: - -1. **No distributed tracing** - Cross-tool workflows are untraceable -2. **Missing feature attribution** - The "Why" gap makes classification results unexplainable -3. **No PII/PSI scrubbing** - Proprietary chemical structures logged in plaintext -4. **No replay capability** - Cannot debug without re-running expensive simulations -5. **No result diff tooling** - Divergent results cannot be analyzed - -**Debuggability Debt Score: 8.5/10 (Critical)** - ---- - -## Finding 1: The 'Why' Gap - Missing Feature Attribution 🔴 CRITICAL - -### Description -When O-QT returns a classification like "Class 1 (narcosis or baseline toxicity)", there is **no explanation of which molecular features triggered this classification**. The response contains only the classification result without feature-level attribution. - -### Evidence - -**File:** `oqt-mcp/src/tools/implementations/o_qt_qsar_tools.py` (lines 311-393) - -```python -async def run_qsar_prediction(smiles: str, model_id: str) -> dict: - """Runs a QSAR prediction.""" - # ... fetch prediction ... - result = { - "chem_id": chem_id, - "model_id": model_id, - "prediction": prediction, # <-- Contains ONLY the result, not WHY - "domain": domain, - "search_hits": hits, - } -``` - -The `prediction` object from the QSAR Toolbox API contains: -- `Value`: The predicted value -- `Unit`: The unit of measurement -- `DomainResult`: In/out of domain status -- **Missing:** Which molecular features contributed to this prediction -- **Missing:** Feature importance scores -- **Missing:** Structural alerts triggered - -### Concrete Example - -**Current Response:** -```json -{ - "prediction": { - "Value": "Class 1", - "DomainResult": "Inside applicability domain" - }, - "model_provenance": { - "title": "Verhaar Scheme for predicting toxicity mode of action" - } -} -``` - -**What Scientists Need:** -```json -{ - "prediction": { - "Value": "Class 1", - "DomainResult": "Inside applicability domain", - "feature_attribution": { - "triggered_rules": [ - { - "rule_id": "VERHAAR_001", - "description": "Non-reactive organic compound with logKow > 2.0", - "confidence": 0.94, - "contributing_fragments": ["C-C", "C-H"], - "molecular_features": { - "logKow": 3.2, - "reactive_groups": [] - } - } - ], - "explanation": "Class 1 assigned due to non-reactive nature and moderate lipophilicity consistent with narcosis mechanism" - } - } -} -``` - -### Impact -- **Regulatory Rejection:** Agencies (EPA, ECHA) require explainable predictions -- **Scientific Distrust:** Users cannot validate or challenge results -- **Debugging Impossibility:** When results are wrong, cannot determine if it's data issue, model issue, or bug - -### Cross-References -- Related to: Finding 4 (Replay Without Re-execution) - Cannot debug what you cannot explain -- Related to: Finding 5 (Result Diff) - Cannot diff without feature-level comparison - -### Recommendation -1. Extend `hazard_contracts.py` to include `feature_attribution` field -2. Parse profiler alerts from Toolbox response to extract triggered rules -3. Add `explain_prediction()` tool that returns human-readable rationale - ---- - -## Finding 2: Cross-Tool Tracing - No Distributed Trace IDs 🔴 CRITICAL - -### Description -When a user runs a workflow that hits CompTox → O-QT → AOP, there is **no distributed trace ID that links all three calls**. Each MCP server generates its own isolated correlation ID, making it impossible to see the full request graph. - -### Evidence - -**File:** `oqt-mcp/src/api/server.py` (lines 95-118) - -```python -@app.middleware("http") -async def audit_log_middleware(request: Request, call_next): - correlation_id = str(uuid.uuid4()) # <-- NEW UUID FOR EVERY REQUEST - request.state.correlation_id = correlation_id - # ... - response.headers["X-Request-ID"] = correlation_id -``` - -**File:** `aop-mcp/src/server/mcp/router.py` (lines 57-118) - -```python -async def mcp_endpoint(request: Request, response: Response): - # No correlation ID extraction from incoming request! - payload = await request.json() - # ... -``` - -**File:** `comptox-mcp/src/epacomp_tox/orchestrator/workflow.py` (lines 68-206) - -```python -def run_workflow(self, ..., workflow_run_id: Optional[str] = None): - run_id = workflow_run_id or str(uuid4()) # <-- Local only, not propagated - # No tracing context propagation to O-QT or AOP -``` - -### The Problem - -``` -User Request - │ - ├──► CompTox-MCP [X-Request-ID: abc-123] - │ └──► Calls O-QT API [X-Request-ID: def-456] ← NEW ID! - │ - ├──► O-QT-MCP [X-Request-ID: ghi-789] ← NEW ID! - │ - └──► AOP-MCP [X-Request-ID: jkl-012] ← NEW ID! - -Result: Cannot correlate the full workflow! -``` - -### What Should Happen (OpenTelemetry/W3C Trace Context) - -``` -User Request [trace-id: abc-123, span-id: xyz] - │ - ├──► CompTox-MCP [trace-id: abc-123, span-id: comp-1] - │ └──► Calls O-QT API [trace-id: abc-123, span-id: oqt-1, parent: comp-1] - │ - ├──► O-QT-MCP [trace-id: abc-123, span-id: oqt-2, parent: xyz] - │ - └──► AOP-MCP [trace-id: abc-123, span-id: aop-1, parent: xyz] - -Result: Full request graph visible in Jaeger/Zipkin! -``` - -### Impact -- **No End-to-End Visibility:** Cannot trace a chemical through the entire analysis pipeline -- **Latency Attribution Impossible:** Cannot determine which tool is causing slowdowns -- **Error Propagation Opaque:** Errors in one tool appear as failures in another - -### Cross-References -- Related to: Finding 5 (Result Diff) - Cannot correlate divergent results across tools - -### Recommendation -1. Implement W3C Trace Context propagation (`traceparent` header) -2. Add OpenTelemetry SDK to all MCP servers -3. Deploy Jaeger/Zipkin for distributed tracing visualization -4. Add span IDs to all log entries - ---- - -## Finding 3: Log Privacy Leakage - No PII/PSI Scrubbing 🔴 CRITICAL - -### Description -Toxicological data can be proprietary (new drug candidates). The logs capture **chemical structures (SMILES) and CAS numbers in plaintext** with no PII/PSI (Proprietary Substance Information) scrubbing filters. - -### Evidence - -**File:** `oqt-mcp/src/tools/implementations/o_qt_qsar_tools.py` (lines 311-315) - -```python -async def run_qsar_prediction(smiles: str, model_id: str) -> dict: - log.info( - f"Running QSAR prediction for SMILES: {smiles[:20]}... using model: {model_id}" - ) # <-- SMILES LOGGED IN PLAINTEXT! -``` - -**File:** `oqt-mcp/src/tools/registry.py` (lines 135-157) - -```python -# CRITICAL: This should be handled by a dedicated, immutable audit service in production -# Ensure PII/Sensitive data in params is sanitized before logging if necessary. -try: - logged_params = json.dumps(params, default=str, indent=2)[:500] # <-- NO SANITIZATION! -except Exception: - logged_params = "Params serialization failed" - -audit.emit( - { - "type": "tool_execution", - "tool": name, - "user_id": user.id, - "status": "success", - "params": logged_params, # <-- CONTAINS SMILES, CAS, CHEMICAL NAMES! - } -) -``` - -**File:** `oqt-mcp/src/api/server.py` (lines 95-118) - -```python -async def audit_log_middleware(request: Request, call_next): - # ... - event = { - "type": "http_request", - "correlation_id": correlation_id, - "user_id": user_id, - "method": request.method, - "path": request.url.path, - "status_code": response.status_code, - "duration_ms": round(duration_ms, 3), - # <-- NO SCRUBBING OF REQUEST BODY! - } - audit.emit(event) -``` - -### Concrete Privacy Leak Example - -**Log Entry (Current):** -```json -{ - "timestamp": "2026-04-15T10:30:00Z", - "level": "INFO", - "message": "Running QSAR prediction for SMILES: CC(C)Cc1ccc...", - "params": { - "smiles": "CC(C)Cc1ccc(C(C)C(=O)O)cc1", # <-- IBUPROFEN STRUCTURE! - "chemical_identifier": "15687-27-1", # <-- CAS NUMBER! - "preferred_name": "Ibuprofen" # <-- DRUG NAME! - } -} -``` - -**What It Should Be (Scrubbed):** -```json -{ - "timestamp": "2026-04-15T10:30:00Z", - "level": "INFO", - "message": "Running QSAR prediction for SMILES: [REDACTED]...", - "params": { - "smiles_hash": "sha256:a3f5c8...", # <-- HASH ONLY - "chemical_identifier": "[REDACTED]", - "preferred_name": "[REDACTED]", - "_debug": "PII scrubbed - see secure vault for original" - } -} -``` - -### Impact -- **Regulatory Violation:** GDPR, CCPA, and pharma confidentiality agreements breached -- **IP Theft Risk:** Competitors can extract chemical structures from logs -- **Audit Failure:** Compliance audits will flag this as critical finding - -### Cross-References -- Related to: Finding 1 (Why Gap) - Feature attribution requires chemical data, creating tension with privacy - -### Recommendation -1. Implement `PrivacyScrubber` class with regex patterns for: - - SMILES strings - - CAS numbers - - InChI/InChIKey - - Chemical names (dictionary-based) -2. Hash chemical identifiers for correlation without exposure -3. Store original values in encrypted sidecar for authorized debugging -4. Add `X-Confidentiality-Level` header to control scrubbing per-request - ---- - -## Finding 4: Replay Without Re-execution - No Record Mode 🟠 HIGH - -### Description -There is **no 'record mode' that caches deterministic responses**. Developers cannot replay an exact MCP tool call from last Tuesday without re-running the expensive simulation. - -### Evidence - -**File:** `oqt-mcp/src/qsar/client.py` (lines 55-165) - -```python -async def _request(self, method, path, *, params=None, json=None, ...): - # No caching layer! - # No VCR/recording mechanism! - async def _execute_request(): - # ... makes live HTTP request every time ... -``` - -**File:** `aop-mcp/src/instrumentation/cache.py` (lines 1-47) - -```python -class InMemoryCache(Cache): - """Simple cache abstraction with in-memory implementation.""" - # Only used for SPARQL query caching, not for: - # - Tool call recording - # - Response replay - # - Deterministic debugging -``` - -**File:** `comptox-mcp/src/epacomp_tox/orchestrator/workflow.py` (lines 350-378) - -```python -def _persist_bundle(self, bundle, ...): - # Saves bundle AFTER execution - # No recording of intermediate steps - # No ability to replay from checkpoint -``` - -### The Problem - -**Scenario:** A scientist reports: "Last Tuesday, O-QT said this chemical was Class 2, but today it says Class 1. Why?" - -**Current Debugging Process:** -1. Re-run the same query → May get different result (data drift?) -2. Check logs → No feature attribution (Finding 1) -3. Check cross-tool trace → No trace ID (Finding 2) -4. **Result:** Cannot determine cause of divergence - -**What Should Exist:** -```python -# Record mode for deterministic replay -@record_replay(cache_dir=".vcr_cassettes") -async def run_qsar_prediction(smiles: str, model_id: str) -> dict: - # First call: Records to .vcr_cassettes/qsar_abc123.yaml - # Subsequent calls: Replays from cassette (no API call!) - ... -``` - -### Impact -- **Debugging Cost:** Each debug session requires expensive re-execution -- **Non-Determinism:** Cannot distinguish between data drift and bugs -- **Regression Testing:** Cannot verify fixes without live APIs - -### Cross-References -- Related to: Finding 1 (Why Gap) - Replay without explanation is insufficient -- Related to: Finding 5 (Result Diff) - Replay enables diff comparison - -### Recommendation -1. Integrate VCR.py for HTTP recording/replay -2. Add `TOXMCP_RECORD_MODE` environment variable -3. Store cassettes with versioning for regression testing -4. Add `replay_from_cassette()` helper for debugging - ---- - -## Finding 5: Result Diff Tool - No Divergence Analysis 🟠 HIGH - -### Description -When two scientists get different results for the same query, there is **no 'result diff' tool** to determine if it's data drift, model drift, hardware floating-point differences, or a bug. - -### Evidence - -**Search Results:** No `diff`, `compare`, `divergence`, or `regression` tools found in any repository. - -**File:** `comptox-mcp/src/epacomp_tox/orchestrator/audit.py` (lines 1-99) - -```python -class AuditBundleStore: - """Durable storage for orchestrator audit bundles.""" - - def save(self, bundle, *, attachments=None): - # Saves bundles with checksums - # No comparison/diff functionality! - - def load_bundle(self, run_id: str) -> Dict[str, any]: - # Loads single bundle - # No cross-run comparison! -``` - -**File:** `pbpk-mcp/docs/mcp-bridge/audit-trail.md` (lines 94-98) - -```markdown -## Verification Tools -- `audit verify --from 2025-10-16` – Streams events, recomputes hash chain -- `audit replay --job job-uuid` – Reconstructs timeline for a specific job -# <-- NO `audit diff` TOOL! -``` - -### What Should Exist - -```python -class ResultDiffer: - """Compare two workflow results to identify divergence.""" - - def diff(self, run_id_a: str, run_id_b: str) -> DivergenceReport: - return { - "divergence_type": "MODEL_DRIFT", # or DATA_DRIFT, BUG, HARDWARE_FP - "confidence": 0.94, - "differences": [ - { - "path": "predictive.results[0].prediction.Value", - "old": "Class 2", - "new": "Class 1", - "explanation": "Model version changed from 2.1 to 2.2" - } - ], - "root_cause": { - "type": "model_update", - "details": "Verhaar scheme updated 2025-01-10" - } - } -``` - -### Impact -- **Scientific Disagreements:** Cannot resolve "I got different results" issues -- **Regression Detection:** Cannot detect when updates break existing analyses -- **Data Quality:** Cannot identify upstream data changes - -### Cross-References -- Related to: Finding 1 (Why Gap) - Diff requires feature-level comparison -- Related to: Finding 4 (Replay) - Diff requires ability to replay old results - -### Recommendation -1. Create `toxmcp-diff` CLI tool -2. Implement semantic diff for chemical predictions -3. Add divergence classification (data vs model vs bug) -4. Integrate with audit bundle storage - ---- - -## Finding 6: Missing Structured Health/Metrics Endpoints 🟡 MEDIUM - -### Description -Only O-QT has a basic health endpoint. No comprehensive metrics for monitoring tool success rates, latency percentiles, or error rates. - -### Evidence - -**File:** `oqt-mcp/src/api/server.py` (lines 135-142) - -```python -@app.get("/health") -async def health_check(): - return { - "status": "healthy", - "environment": settings.app.ENVIRONMENT, - "auth_bypassed": settings.security.BYPASS_AUTH, - "qsar_api_url": settings.qsar.QSAR_TOOLBOX_API_URL, - } -``` - -**Missing:** -- Tool success/error rates -- Latency histograms -- Queue depth (for async jobs) -- External dependency health (QSAR Toolbox, CompTox API) - -### Recommendation -1. Add Prometheus metrics endpoint (`/metrics`) -2. Export key metrics: - - `toxmcp_tool_invocations_total` (counter with tool, status labels) - - `toxmcp_tool_duration_seconds` (histogram) - - `toxmcp_external_api_health` (gauge) - ---- - -## Finding 7: Inconsistent Audit Event Schemas 🟡 MEDIUM - -### Description -Each MCP server uses a different audit event schema, making centralized analysis impossible. - -### Evidence - -**O-QT:** `oqt-mcp/src/tools/registry.py` -```python -audit.emit({ - "type": "tool_execution", - "tool": name, - "user_id": user.id, - "status": "success", - "params": logged_params, -}) -``` - -**AOP-MCP:** `aop-mcp/src/instrumentation/audit.py` -```python -# Only verifies draft checksums, no event emission! -def verify_audit_chain(draft: Draft) -> bool: - ... -``` - -**CompTox:** `comptox-mcp/src/epacomp_tox/orchestrator/audit.py` -```python -# File-based bundle storage, no structured events -class AuditBundleStore: - def save(self, bundle, *, attachments=None): - ... -``` - -### Recommendation -1. Define unified `ToxMcpAuditEvent` schema -2. Include: timestamp, trace_id, tool_name, user_id, duration, status, checksums -3. Implement in shared library across all MCP servers - ---- - -## Finding 8: No Floating-Point Determinism Controls 🟡 MEDIUM - -### Description -No controls for ensuring floating-point determinism across different hardware/platforms. - -### Impact -- Results may differ between Intel vs AMD, or CPU vs GPU -- Cannot reproduce results on different deployments - -### Recommendation -1. Document FP precision requirements -2. Add `deterministic_mode` flag for critical calculations -3. Use fixed-precision libraries where appropriate - ---- - -## Summary Table - -| Finding | Severity | Component | Effort to Fix | -|---------|----------|-----------|---------------| -| 1. Why Gap | 🔴 Critical | O-QT | 2-3 weeks | -| 2. Cross-Tool Tracing | 🔴 Critical | All | 1-2 weeks | -| 3. Log Privacy | 🔴 Critical | All | 1 week | -| 4. Replay Mode | 🟠 High | All | 2 weeks | -| 5. Result Diff | 🟠 High | All | 2-3 weeks | -| 6. Health/Metrics | 🟡 Medium | All | 3-5 days | -| 7. Audit Schema | 🟡 Medium | All | 1 week | -| 8. FP Determinism | 🟡 Medium | CompTox | 1 week | - ---- - -## Debuggability Debt Quantification - -| Category | Debt Score | Justification | -|----------|------------|---------------| -| Explainability | 9/10 | No feature attribution anywhere | -| Traceability | 8/10 | No distributed tracing, isolated correlation IDs | -| Privacy | 9/10 | Plaintext chemical structures in logs | -| Reproducibility | 8/10 | No record/replay, cannot debug without re-execution | -| Comparability | 8/10 | No diff tools for divergence analysis | -| **Overall** | **8.5/10** | **Critical debuggability debt** | - ---- - -## Priority Recommendations - -### Immediate (Week 1-2) -1. **Implement PII/PSI scrubbing** - Critical regulatory/compliance risk -2. **Add distributed trace context propagation** - Enable end-to-end visibility - -### Short-term (Week 3-4) -3. **Add feature attribution to O-QT responses** - Enable explainability -4. **Implement VCR recording/replay** - Enable deterministic debugging - -### Medium-term (Month 2) -5. **Build result diff tool** - Enable divergence analysis -6. **Unify audit event schemas** - Enable centralized monitoring - ---- - -## Appendix: File References - -### O-QT MCP -- `oqt-mcp/src/tools/implementations/o_qt_qsar_tools.py` - Main QSAR tools -- `oqt-mcp/src/tools/hazard_contracts.py` - Response contract builders -- `oqt-mcp/src/tools/provenance.py` - Provenance tracking -- `oqt-mcp/src/tools/registry.py` - Tool execution & audit logging -- `oqt-mcp/src/api/server.py` - HTTP server & middleware -- `oqt-mcp/src/qsar/client.py` - QSAR Toolbox API client -- `oqt-mcp/src/utils/audit.py` - Audit event emission -- `oqt-mcp/src/utils/logging.py` - Structured logging setup - -### AOP MCP -- `aop-mcp/src/server/mcp/router.py` - MCP request routing -- `aop-mcp/src/instrumentation/audit.py` - Draft audit chain verification -- `aop-mcp/src/instrumentation/cache.py` - In-memory caching -- `aop-mcp/src/instrumentation/metrics.py` - Basic metrics recording -- `aop-mcp/src/instrumentation/logging.py` - Structured logging - -### CompTox MCP -- `comptox-mcp/src/epacomp_tox/orchestrator/workflow.py` - Workflow orchestration -- `comptox-mcp/src/epacomp_tox/orchestrator/audit.py` - Audit bundle storage -- `comptox-mcp/src/epacomp_tox/orchestrator/utils.py` - Metadata sanitization - -### PBPK MCP -- `pbpk-mcp/docs/mcp-bridge/audit-trail.md` - Audit trail design document -- `pbpk-mcp/docs/mcp-bridge/monitoring.md` - Monitoring design document - ---- - -*End of Report* diff --git a/ToxMCP_Audit_Reviewed_v2/toxmcp_regulatory_audit_report.md b/ToxMCP_Audit_Reviewed_v2/toxmcp_regulatory_audit_report.md deleted file mode 100644 index b5438b0..0000000 --- a/ToxMCP_Audit_Reviewed_v2/toxmcp_regulatory_audit_report.md +++ /dev/null @@ -1,230 +0,0 @@ -# ToxMCP Suite - Regulatory Survivability Audit Report (Reviewed Copy) - -**Review date:** 2026-04-15 -**Scope:** `comptox-mcp`, `oqt-mcp`, `aop-mcp`, `pbpk-mcp` -**Frameworks considered in the original package:** OECD GLP / data integrity expectations, 21 CFR Part 11, Annex 11, related regulated-use controls - ---- - -## Read this report carefully - -This reviewed copy preserves the original regulatory concerns but narrows the claim style. - -It uses the following rule: - -> The package can strongly identify **compliance-relevant design gaps**. -> It cannot, from the audit material alone, guarantee a specific regulator’s decision in a specific submission context. - -So this report prefers phrasing such as: -- **high risk of non-conformance** -- **likely unacceptable without compensating controls** -- **insufficient for defensible reconstruction** - -and avoids categorical claims such as: -- automatic FDA rejection -- automatic submission rejection - ---- - -## Executive summary - -The ToxMCP suite shows meaningful awareness of provenance and auditability, but the package still identifies several gaps that would matter for regulated or high-assurance use: - -1. **Historical reconstruction is incomplete** - The package does not show a suite-wide, fully reconstructable provenance envelope. - -2. **Audit trail semantics are not yet strong enough** - Several components rely on mechanisms that look audit-like but are not clearly tamper-evident end to end. - -3. **Electronic review/signature semantics are under-specified** - Draft authorship, review, and approval lineage are not yet represented robustly enough for stronger assurance contexts. - -4. **Determinism and version capture remain uneven** - Reproducibility depends on environment, data/version, ordering, and serialization choices that are not consistently captured. - -### Overall judgment -**Regulatory survivability is currently limited by provenance, reconstruction, and review-state design more than by any single missing field.** - ---- - -## Finding register - -| ID | Finding | Severity | Evidence basis | Confidence | Reviewed interpretation | -|---|---|---|---|---|---| -| REG-01 | Historical reconstruction / “time-machine” capability is incomplete | **Critical** | Observed + inferred | High | Hard to defend what happened, with what versions and conditions, after the fact | -| REG-02 | Audit trails are present but not uniformly tamper-evident | **Critical** | Observed | High | Audit-looking records are not yet equivalent to stronger integrity controls | -| REG-03 | Review/signature semantics are too weak for higher-assurance use | **Critical** | Observed | High | Identity, meaning, and content binding need strengthening | -| REG-04 | Determinism and canonicalization are uneven | **High** | Observed | High | Reproducibility can drift across runtime/environment changes | -| REG-05 | Upstream provenance capture is incomplete | **Critical** | Observed + inferred | Medium-High | External data dependence is not consistently reconstructable | -| REG-06 | Cross-suite provenance contracts are not unified | **High** | Observed + inferred | Medium-High | Even good local controls can fail if downstream artifacts do not preserve them | - ---- - -## REG-01: Historical reconstruction is incomplete -**Severity:** **Critical** -**Evidence basis:** Observed + inferred -**Confidence:** High - -The package’s “time-machine” concern remains one of its strongest findings. - -### Why this matters -For defensible historical reconstruction, the system needs a record of: -- code version / commit -- package and environment state -- upstream data/version context -- input identity resolution -- model/tool versions -- human review/approval status -- final artifact lineage - -The audited material shows fragments of this, but not a single suite-wide mechanism that makes reconstruction routine. - -### Reviewed wording -This is best framed as: -- **high risk of non-conformance for regulated or high-assurance use** -- **insufficient historical defensibility without compensating controls** - ---- - -## REG-02: Audit records are not yet uniformly tamper-evident -**Severity:** **Critical** -**Evidence basis:** Observed -**Confidence:** High - -The original package persuasively identified places where audit events or draft metadata can exist without: -- strong content binding -- mandatory previous-hash linkage -- verification on read -- clearly immutable storage semantics - -### Why this matters -An audit record is much more useful than a plain log line, but it is not equivalent to a verified integrity chain unless: -- the canonicalized content is defined, -- the chain is mandatory, -- and verification is part of normal operation. - ---- - -## REG-03: Electronic review/signature semantics are underdeveloped -**Severity:** **Critical** -**Evidence basis:** Observed -**Confidence:** High - -The package correctly highlighted missing or weak semantics around: -- reviewer identity -- signature meaning -- timestamp discipline -- signature-to-content linkage -- role or approval state - -### Reviewed wording -This is a **strong compliance gap finding**. -It is not, on its own, proof of a specific regulator outcome without intended-use and procedure context. - -### Practical implication -If the system is meant to support high-assurance draft approval or regulated record workflows, signature and approval state need to be explicit, verified, and preserved in lineage. - ---- - -## REG-04: Determinism and canonicalization need more discipline -**Severity:** **High** -**Evidence basis:** Observed -**Confidence:** High - -The package’s best examples here include: -- floating-point serialization for hashed records -- ordering assumptions in query results -- lack of explicit random-seed or environment recording - -### Why this matters -Two scientifically “same” runs can become operationally non-identical if: -- ordering differs, -- float serialization differs, -- environment changes are not captured, -- or a downstream artifact is regenerated under slightly different conditions. - ---- - -## REG-05: Upstream provenance capture remains too weak -**Severity:** **Critical** -**Evidence basis:** Observed + inferred -**Confidence:** Medium-High - -The package is strong in pointing out that upstream data dependence must be represented, not assumed. - -### Important refinement in the reviewed copy -The correct requirement is **not** “invent version headers.” -The requirement is to capture the strongest provenance and replay information the upstream actually makes available, and to supplement it internally where needed. - -That may include: -- provider release/version identifiers -- snapshot identifiers -- response hashes -- retrieval timestamps -- request parameters -- internal cache keys or mirror snapshots - ---- - -## REG-06: Cross-suite provenance contracts need to be unified -**Severity:** **High** -**Evidence basis:** Observed + inferred -**Confidence:** Medium-High - -Local compliance-minded controls are less useful if downstream repos cannot reliably preserve: -- provenance fields -- review state -- uncertainty state -- version metadata -- signed-artifact lineage - -This is where the regulatory and contract-layer audits reinforce each other. - ---- - -## Recommended control architecture - -### 1. Suite-wide provenance envelope -A single record model carried across repos, including: -- input identity -- upstream retrieval data -- code/runtime snapshot -- tool outputs and hashes -- review and approval state -- artifact lineage - -### 2. Verified audit chain -Separate from ordinary developer logging: -- canonical event schema -- mandatory chaining -- content recomputation -- immutable or append-controlled storage semantics -- automated verification tests - -### 3. Explicit review/signature model -For higher-assurance flows: -- actor identity -- role -- meaning -- time -- content linkage -- revocation or supersession model - ---- - -## What to validate next - -- intended regulated-use context for each repo and output type -- what external procedural controls already exist -- how draft approval is meant to work in practice -- which provenance fields survive cross-repo handoffs -- whether deterministic hashing and ordering assumptions hold across environments - ---- - -## Final judgment - -The original package was right to focus on provenance, reconstruction, and review-state design. -Those remain the most important regulatory-survivability concerns in the suite. - -**Bottom line:** the package strongly supports the claim that ToxMCP still needs a more robust integrity and provenance model before it can be treated as ready for regulated or similarly high-assurance use. diff --git a/ToxMCP_Audit_Reviewed_v2/toxmcp_remediation_snippets.py b/ToxMCP_Audit_Reviewed_v2/toxmcp_remediation_snippets.py deleted file mode 100644 index 2425f76..0000000 --- a/ToxMCP_Audit_Reviewed_v2/toxmcp_remediation_snippets.py +++ /dev/null @@ -1,463 +0,0 @@ -""" -ToxMCP Reviewed Remediation Snippets -=================================== - -This module contains implementation-oriented reference code derived from the -reviewed audit package. It is intentionally written as reference code rather -than a drop-in patch set. - -Important: -- These patterns still require repository-specific adaptation. -- Provider/version controls must use features the upstream actually supports. -- Signature verification is exposed via an injected verifier callback. -""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from datetime import datetime, timezone -from decimal import Decimal -from pathlib import Path -from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Protocol -import hashlib -import json -import os -import platform -import subprocess -import unicodedata - - -# ============================================================================= -# Shared helpers -# ============================================================================= - -def utc_now() -> datetime: - """Return a timezone-aware UTC timestamp.""" - return datetime.now(timezone.utc) - - -def iso_utc(dt: datetime) -> str: - """Serialize datetime consistently in UTC.""" - if dt.tzinfo is None: - dt = dt.replace(tzinfo=timezone.utc) - return dt.astimezone(timezone.utc).isoformat().replace("+00:00", "Z") - - -def sha256_hex(data: bytes) -> str: - return hashlib.sha256(data).hexdigest() - - -def normalize_json_value(value: Any, *, fp_precision: int = 17) -> Any: - """ - Normalize a value for deterministic JSON hashing. - - Notes: - - Floats are normalized conservatively. - - NaN/Infinity are represented as strings because JSON itself does not - define canonical encodings for these values. - """ - if isinstance(value, float): - if value != value: # NaN - return "NaN" - if value == float("inf"): - return "Infinity" - if value == float("-inf"): - return "-Infinity" - if value == 0.0: - return 0.0 - return round(value, fp_precision) - if isinstance(value, Decimal): - return format(value, "f") - if isinstance(value, datetime): - return iso_utc(value) - if isinstance(value, Mapping): - return {str(k): normalize_json_value(v, fp_precision=fp_precision) for k, v in sorted(value.items(), key=lambda item: str(item[0]))} - if isinstance(value, (list, tuple)): - return [normalize_json_value(v, fp_precision=fp_precision) for v in value] - return value - - -def canonical_json_bytes(value: Any) -> bytes: - normalized = normalize_json_value(value) - return json.dumps( - normalized, - sort_keys=True, - ensure_ascii=True, - separators=(",", ":"), - ).encode("utf-8") - - -# ============================================================================= -# Audit trail reference model -# ============================================================================= - -class AuditStorage(Protocol): - def append(self, event: "RegulatoryAuditEvent") -> None: - ... - - def read_all(self) -> Iterable["RegulatoryAuditEvent"]: - ... - - -class InMemoryAuditStorage: - """Simple storage backend for examples and tests.""" - - def __init__(self) -> None: - self._events: List[RegulatoryAuditEvent] = [] - - def append(self, event: "RegulatoryAuditEvent") -> None: - self._events.append(event) - - def read_all(self) -> Iterable["RegulatoryAuditEvent"]: - return list(self._events) - - -@dataclass(frozen=True) -class RegulatoryAuditEvent: - """Reference audit-event envelope for higher-assurance workflows.""" - - event_id: str - event_type: str - timestamp_utc: datetime - user_id: str - session_id: str - payload: Dict[str, Any] - previous_hash: str - content_hash: str - service_version: str - git_commit: str - upstream: Dict[str, Any] = field(default_factory=dict) - signature: Optional[str] = None - - @staticmethod - def build( - *, - event_id: str, - event_type: str, - user_id: str, - session_id: str, - payload: Dict[str, Any], - previous_hash: str, - service_version: str, - git_commit: str, - upstream: Optional[Dict[str, Any]] = None, - timestamp_utc: Optional[datetime] = None, - signature: Optional[str] = None, - ) -> "RegulatoryAuditEvent": - ts = timestamp_utc or utc_now() - canonical = { - "event_id": event_id, - "event_type": event_type, - "timestamp_utc": iso_utc(ts), - "user_id": user_id, - "session_id": session_id, - "payload": payload, - "previous_hash": previous_hash, - "service_version": service_version, - "git_commit": git_commit, - "upstream": upstream or {}, - } - content_hash = sha256_hex(canonical_json_bytes(canonical)) - return RegulatoryAuditEvent( - event_id=event_id, - event_type=event_type, - timestamp_utc=ts, - user_id=user_id, - session_id=session_id, - payload=payload, - previous_hash=previous_hash, - content_hash=content_hash, - service_version=service_version, - git_commit=git_commit, - upstream=upstream or {}, - signature=signature, - ) - - -class AuditChainBrokenError(Exception): - """Raised when the expected audit chain has been broken.""" - - -class RegulatoryAuditTrail: - """ - Append-only audit trail reference implementation. - - This example uses an in-memory backend by default. In production, replace - with an append-controlled storage implementation and add retention/access - controls appropriate for the deployment context. - """ - - def __init__(self, storage: Optional[AuditStorage] = None) -> None: - self._storage = storage or InMemoryAuditStorage() - self._tail_hash = "0" * 64 - - @property - def tail_hash(self) -> str: - return self._tail_hash - - def record(self, event: RegulatoryAuditEvent) -> str: - if event.previous_hash != self._tail_hash: - raise AuditChainBrokenError( - f"Expected previous_hash={self._tail_hash}, got {event.previous_hash}" - ) - if self._compute_hash(event) != event.content_hash: - raise AuditChainBrokenError("Event content hash does not match canonical content") - self._storage.append(event) - self._tail_hash = event.content_hash - return self._tail_hash - - def verify_chain(self) -> bool: - expected = "0" * 64 - for event in self._storage.read_all(): - if event.previous_hash != expected: - return False - if self._compute_hash(event) != event.content_hash: - return False - expected = event.content_hash - return True - - @staticmethod - def _compute_hash(event: RegulatoryAuditEvent) -> str: - canonical = { - "event_id": event.event_id, - "event_type": event.event_type, - "timestamp_utc": iso_utc(event.timestamp_utc), - "user_id": event.user_id, - "session_id": event.session_id, - "payload": event.payload, - "previous_hash": event.previous_hash, - "service_version": event.service_version, - "git_commit": event.git_commit, - "upstream": event.upstream, - } - return sha256_hex(canonical_json_bytes(canonical)) - - -# ============================================================================= -# Electronic review/signature reference model -# ============================================================================= - -SignatureVerifier = Callable[[bytes, "ElectronicSignature"], bool] - - -@dataclass(frozen=True) -class ElectronicSignature: - """ - Reference structure for review or approval events. - - This intentionally leaves cryptographic verification pluggable because the - concrete mechanism depends on deployment policy and available infrastructure. - """ - - signer_user_id: str - signer_full_name: str - signature_meaning: str # e.g. authored / reviewed / approved / rejected - signature_timestamp_utc: datetime - content_hash: str - signature_value: bytes - algorithm: str = "ecdsa-sha256" - certificate_chain_pem: List[str] = field(default_factory=list) - - def verify(self, content: bytes, verifier: SignatureVerifier) -> bool: - if sha256_hex(content) != self.content_hash: - return False - return verifier(content, self) - - -# ============================================================================= -# Upstream provenance capture -# ============================================================================= - -@dataclass(frozen=True) -class UpstreamRecord: - """ - Captures the strongest provenance information available for an upstream call. - - Note: - - Only populate provider_version or snapshot_id if the upstream actually - exposes such concepts. - - If not, internal response hashing and cache identity become more important. - """ - - provider_name: str - request_url: str - request_params: Dict[str, Any] = field(default_factory=dict) - retrieved_at_utc: datetime = field(default_factory=utc_now) - provider_version: Optional[str] = None - snapshot_id: Optional[str] = None - response_hash: Optional[str] = None - cache_key: Optional[str] = None - - def as_dict(self) -> Dict[str, Any]: - return { - "provider_name": self.provider_name, - "request_url": self.request_url, - "request_params": self.request_params, - "retrieved_at_utc": iso_utc(self.retrieved_at_utc), - "provider_version": self.provider_version, - "snapshot_id": self.snapshot_id, - "response_hash": self.response_hash, - "cache_key": self.cache_key, - } - - -# ============================================================================= -# Reproducibility and environment capture -# ============================================================================= - -@dataclass(frozen=True) -class ExecutionEnvironment: - container_image_digest: str - container_image_tag: str - git_commit_hash: str - git_tag: Optional[str] - git_dirty: bool - poetry_lock_hash: Optional[str] - python_version: str - os_name: str - os_version: str - cpu_architecture: str - random_seed: Optional[int] = None - floating_point_mode: str = "strict" - upstream_records: Dict[str, Dict[str, Any]] = field(default_factory=dict) - - def as_dict(self) -> Dict[str, Any]: - return { - "container": { - "image_digest": self.container_image_digest, - "image_tag": self.container_image_tag, - }, - "code": { - "git_commit": self.git_commit_hash, - "git_tag": self.git_tag, - "git_dirty": self.git_dirty, - "poetry_lock_hash": self.poetry_lock_hash, - }, - "runtime": { - "python": self.python_version, - "os": f"{self.os_name} {self.os_version}", - "cpu": self.cpu_architecture, - "random_seed": self.random_seed, - "floating_point_mode": self.floating_point_mode, - }, - "upstream": self.upstream_records, - } - - -def _run_git_command(args: List[str]) -> Optional[str]: - try: - result = subprocess.run(args, capture_output=True, text=True, check=True) - return result.stdout.strip() or None - except Exception: - return None - - -def _file_hash_if_exists(path: str) -> Optional[str]: - p = Path(path) - if not p.exists() or not p.is_file(): - return None - return sha256_hex(p.read_bytes()) - - -def capture_execution_environment( - *, - upstream_records: Optional[Mapping[str, UpstreamRecord]] = None, - random_seed: Optional[int] = None, - floating_point_mode: str = "strict", -) -> ExecutionEnvironment: - git_commit = _run_git_command(["git", "rev-parse", "HEAD"]) or "unknown" - git_tag = _run_git_command(["git", "describe", "--tags", "--exact-match"]) - git_status = _run_git_command(["git", "status", "--porcelain"]) - git_dirty = bool(git_status) - - upstream = { - name: record.as_dict() - for name, record in (upstream_records or {}).items() - } - - return ExecutionEnvironment( - container_image_digest=os.getenv("TOXMCP_IMAGE_DIGEST", "unknown"), - container_image_tag=os.getenv("TOXMCP_IMAGE_TAG", "unknown"), - git_commit_hash=git_commit, - git_tag=git_tag, - git_dirty=git_dirty, - poetry_lock_hash=_file_hash_if_exists("poetry.lock"), - python_version=platform.python_version(), - os_name=platform.system(), - os_version=platform.release(), - cpu_architecture=platform.machine(), - random_seed=random_seed, - floating_point_mode=floating_point_mode, - upstream_records=upstream, - ) - - -# ============================================================================= -# Untrusted text handling for model-facing contexts -# ============================================================================= - -def sanitize_untrusted_identifier(text: str, *, allow_newlines: bool = False, max_length: int = 256) -> str: - """ - Normalize and sanitize a free-text identifier before passing it into an - LLM- or agent-facing context. - - This is a helper, not a complete prompt-injection defense. The primary - defense should still be prompt structure and isolation of untrusted fields. - """ - normalized = unicodedata.normalize("NFKC", text) - if len(normalized) > max_length: - raise ValueError(f"Identifier exceeds maximum length {max_length}") - - cleaned_chars: List[str] = [] - for char in normalized: - category = unicodedata.category(char) - if category.startswith("C"): - if allow_newlines and char in "\n\r": - cleaned_chars.append("\n") - # drop all other control characters - continue - cleaned_chars.append(char) - - cleaned = "".join(cleaned_chars) - if not allow_newlines: - cleaned = cleaned.replace("\n", " ").replace("\r", " ") - return " ".join(cleaned.split()).strip() - - -# ============================================================================= -# Example usage -# ============================================================================= - -if __name__ == "__main__": - audit_trail = RegulatoryAuditTrail() - - env = capture_execution_environment( - upstream_records={ - "comptox": UpstreamRecord( - provider_name="comptox", - request_url="https://example.invalid/chemical/detail/DTXSID123", - request_params={"id": "DTXSID123"}, - provider_version=None, - snapshot_id=None, - response_hash="placeholder-response-hash", - cache_key="comptox:DTXSID123:v1", - ) - }, - random_seed=1234, - ) - - event = RegulatoryAuditEvent.build( - event_id="evt-001", - event_type="workflow_started", - user_id="user-123", - session_id="sess-001", - payload={"chemical_name": sanitize_untrusted_identifier("Benzene")}, - previous_hash=audit_trail.tail_hash, - service_version="toxmcp-suite reviewed-reference", - git_commit=env.git_commit_hash, - upstream={"comptox": env.upstream_records.get("comptox", {})}, - ) - - audit_trail.record(event) - print(json.dumps(env.as_dict(), indent=2)) - print(f"audit_chain_ok={audit_trail.verify_chain()}") diff --git a/ToxMCP_Audit_Reviewed_v2/toxmcp_security_audit_report.md b/ToxMCP_Audit_Reviewed_v2/toxmcp_security_audit_report.md deleted file mode 100644 index 748e777..0000000 --- a/ToxMCP_Audit_Reviewed_v2/toxmcp_security_audit_report.md +++ /dev/null @@ -1,215 +0,0 @@ -# ToxMCP Suite - Adversarial Security Audit Report (Reviewed Copy) - -**Review date:** 2026-04-15 -**Scope:** `comptox-mcp`, `oqt-mcp`, `aop-mcp`, `pbpk-mcp` -**Purpose:** Identify trust-boundary, availability, and integrity risks relevant to toxicology workflows - ---- - -## Read this report carefully - -The original security report had strong instincts but sometimes overstated exploit certainty. -This reviewed copy keeps the high-value findings while making the following distinction explicit: - -- **Observed:** insecure pattern directly visible in the audited material -- **Scenario:** plausible exploit or misuse path that depends on runtime preconditions -- **Operational consequence:** what the issue could mean in production if those preconditions hold - -This report is therefore more conservative in wording, not weaker in substance. - ---- - -## Executive summary - -The most important security issues in the package are: - -1. **Unsafe interpolation at trust boundaries** - Especially around query/template construction and any path where untrusted identifiers may influence model-facing text. - -2. **Weak provenance/integrity controls around upstream dependence** - The suite relies on external sources and intermediate transformations that are not always strongly verifiable afterward. - -3. **Insufficient resilience and rate/quotas for expensive operations** - Availability and integrity interact here: unstable systems are harder to trust and easier to misuse. - -### Security posture after review -- **Critical findings remain:** yes -- **But some original exploit narratives are better read as scenarios:** also yes - ---- - -## Finding register - -| ID | Finding | Severity | Evidence basis | Confidence | Reviewed interpretation | -|---|---|---|---|---|---| -| SEC-01 | Unsafe query interpolation in `aop-mcp` | **Critical** | Observed + scenario | High / Medium | The pattern is real; exact exploit effect depends on which query parts are attacker-influenced and what the endpoint allows | -| SEC-02 | Untrusted identifier handling across prompt/agent boundaries | **High** | Observed + scenario | Medium | Important to mitigate now, even though full exploit demonstration still needs runtime tracing | -| SEC-03 | Upstream integrity/provenance controls are uneven | **High** | Observed + inferred | Medium | External dependence needs stronger internal verification and capture | -| SEC-04 | Authorization / workflow escalation surfaces deserve targeted review | **Medium / High** | Observed + inferred | Medium | Needs live-repo validation before stronger claims | -| SEC-05 | Resource exhaustion and denial-of-service paths are plausible | **High** | Observed | High | Large simulations, retries, or missing quotas can destabilize the system | -| SEC-06 | Offline / controlled-execution posture is underdefined | **High** | Observed + inferred | Medium | Important for high-assurance deployments and incident containment | - ---- - -## SEC-01: Unsafe query interpolation in `aop-mcp` -**Severity:** **Critical** -**Evidence basis:** Observed + scenario -**Confidence:** High for the unsafe pattern; Medium for worst-case impact - -The original package showed string-based templating for query generation. That is a valid high-priority security finding. - -### What is directly supported -- query templates are rendered through string formatting -- this is unsafe if structural query fragments or control fields are influenced by untrusted input - -### What should be stated more carefully -The reviewed copy avoids assuming destructive update outcomes such as graph deletion unless the runtime path and endpoint permissions are known. - -### Better statement -> Unsafe interpolation is present. Depending on runtime data flow, this could permit query broadening, result manipulation, data exposure, or other unintended graph access. Destructive effects depend on whether update-capable operations are reachable. - -### Correct mitigation pattern -Do not try to “sanitize everything” with regexes alone. - -Instead: -- use fixed, allow-listed query plans -- bind only literals/URIs -- keep `ORDER BY`, `LIMIT`, graph patterns, and predicates on allow-lists -- separate read-only query paths from any privileged/update path - ---- - -## SEC-02: Untrusted identifiers may influence model-facing text -**Severity:** **High** -**Evidence basis:** Observed + scenario -**Confidence:** Medium - -The original report was directionally right to worry about prompt or instruction confusion from chemical names and related fields. -The reviewed copy treats the full jailbreak claim as scenario-dependent until the exact prompt boundary is demonstrated. - -### Why it still matters now -Because mitigation is relatively cheap and scientifically sensible: -- normalize Unicode -- strip control characters for LLM-facing contexts -- avoid passing free text directly into system or tool instructions -- carry identifiers as structured data -- regression-test with adversarial names and notes - -### Important correction -Simple keyword blocking is not enough. -The primary control should be **prompt structure and boundary isolation**, not only blacklists. - ---- - -## SEC-03: Upstream integrity controls are uneven -**Severity:** **High** -**Evidence basis:** Observed + inferred -**Confidence:** Medium - -The original report identified a real issue: results derived from upstream APIs or knowledge sources can be difficult to verify later if provenance is weak. - -### Reviewed refinement -The right mitigation is not to assume that all providers support response signing. -A better hierarchy of controls is: -1. authenticated transport where available -2. source/provenance capture -3. request/response hashing -4. internal caching or mirroring for replay -5. cross-source consistency checks for high-value conclusions -6. provider-side signing **if actually supported** - ---- - -## SEC-04: Authorization and workflow escalation need targeted validation -**Severity:** **Medium / High** -**Evidence basis:** Observed + inferred -**Confidence:** Medium - -The original report’s concern about permission boundaries remains useful, but this is an area where live-repo validation matters. -Configuration alone is rarely enough to prove exploitability. - -### What to verify -- how permissions are enforced at runtime -- whether tool composition can bypass intended gates -- which roles can launch expensive, destructive, or approval-relevant flows -- whether audit records capture denied and elevated actions - ---- - -## SEC-05: DoS and exhaustion paths are plausible -**Severity:** **High** -**Evidence basis:** Observed -**Confidence:** High - -The package identifies multiple cost-amplifying patterns: -- large PBPK workloads -- retry behavior on failing upstreams -- insufficient quotas or admission control -- incomplete cancellation/timeout semantics - -These are not “mere performance issues.” -In an analytical system, prolonged instability becomes a security and integrity problem because it encourages retries, bypasses, stale-data usage, and partial-result acceptance. - ---- - -## SEC-06: Controlled/offline execution posture should be made explicit -**Severity:** **High** -**Evidence basis:** Observed + inferred -**Confidence:** Medium - -The original report usefully raised the question of “secure mode” or constrained execution, but the reviewed copy frames it more practically: - -- Which repos can operate without live external dependencies? -- Which assets must be mirrored or pre-approved? -- What logging, auth, and approval rules change in controlled mode? -- What is the incident-response posture if a supplier or upstream becomes untrusted? - -This is important for regulated, confidential, or degraded-network settings. - ---- - -## Attack-chain view - -The original report’s attack chains were helpful conceptually. The reviewed copy keeps the model but phrases them as **scenario compositions**, not proof. - -### Example composite scenario -1. untrusted identifier or query input crosses a weak boundary -2. upstream retrieval/provenance is weak -3. review checkpoints are missing or optional -4. a polished artifact is produced -5. the resulting conclusion appears more trustworthy than its evidence warrants - -This is the core systemic security theme of the suite: **false confidence plus weak verification**. - ---- - -## Immediate actions - -1. **Fix trust-boundary handling** - - query allow-lists - - structured prompt inputs - - control-character stripping for model-facing fields - -2. **Improve provenance and integrity capture** - - response hashes - - retrieval metadata - - clear actor/review state - -3. **Add quotas and resilience controls** - - population/job limits - - bounded retries - - circuit breakers - - cancellation semantics - -4. **Validate authorization pathways** - - runtime permission tests - - escalation-path review - - denial auditability - ---- - -## Final judgment - -The original package correctly identified that ToxMCP’s biggest security risks are not only perimeter vulnerabilities. They are failures at **trust boundaries, provenance boundaries, and review boundaries**. - -**Bottom line:** the reviewed copy supports several strong security findings, especially around query safety, prompt-boundary hygiene, upstream integrity capture, and exhaustion control. Some exploit narratives remain scenario-based and should be validated against the live repositories before external use. diff --git a/docs/deployment.md b/docs/deployment.md index 25b0dd5..4c2cee7 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -7,6 +7,7 @@ This guide explains how to run the EPA CompTox MCP transport service in producti - Python 3.11 (or newer) for bare-metal deployments. - Valid CompTox credentials exposed via `CTX_API_KEY` (preferred) or `EPA_COMPTOX_API_KEY`. - Network access to the CompTox CTX API endpoint configured with `CTX_API_BASE_URL` (defaults to `https://comptox.epa.gov/ctx-api`). +- Production MCP bearer-token validation configured with `MCP_AUTH_ISSUER`, `MCP_AUTH_AUDIENCE`, `MCP_AUTH_JWKS_URL`, and an externally reachable `MCP_RESOURCE_URL`. ## Running with Gunicorn + Uvicorn Workers @@ -30,6 +31,13 @@ Key environment overrides: | `EPACOMP_MCP_GRACEFUL_TIMEOUT` | Graceful shutdown window | `30` | | `EPACOMP_MCP_KEEPALIVE` | HTTP keepalive (seconds) | `5` | | `EPACOMP_MCP_LOG_LEVEL` | Gunicorn log level | `info` | +| `MCP_AUTH_ISSUER` | Expected OIDC issuer for MCP JWTs | unset | +| `MCP_AUTH_AUDIENCE` | Expected MCP JWT audience | unset | +| `MCP_AUTH_JWKS_URL` | JWKS URL for JWT signature verification | unset | +| `MCP_AUTH_REQUIRED_SCOPES` | Required bearer scopes for MCP calls | unset | +| `MCP_RESOURCE_URL` | Canonical protected MCP resource URL | `http://localhost:8000/mcp` | +| `MCP_RATE_LIMIT_REQUESTS_PER_MINUTE` | Process-local per-subject/IP tool-call limit | `120` | +| `MCP_RATE_LIMIT_BURST` | Process-local token-bucket burst size | `20` | All workers use the `uvicorn.workers.UvicornWorker` class, so the WebSocket transport runs on ASGI-native workers. @@ -69,7 +77,8 @@ The image exposes port `8000` and ships with `/app/gunicorn_conf.py` plus `/app/ - `GET /healthz`: liveness signal, returns immediate 200 when the process is responsive. - `GET /readyz`: readiness probe. Performs a strict authenticated CTX probe via `MCPServer.check_health(probe_mode="readiness")` against stable upstream API routes. Bare reachability to `/ctx-api/health` is not enough. The endpoint returns HTTP 503 when CTX credentials are missing, rejected, or when no authenticated probe succeeds. If a prior successful probe exists it will be returned with `status: degraded`. -- `GET /metrics`: Prometheus-compatible transport metrics derived from `MCPServer.get_transport_metrics()`. Gauges report session counts (`status=active|closed`) and negotiated capability adoption (`capability=tools.streams`, `scope=all|active`, `state=enabled|disabled`). Integrate the scrape endpoint with your platform’s monitoring stack—see `deploy/prometheus_scrape.yaml` for a vanilla Prometheus job and `deploy/otel_collector_metrics.yaml` for an OpenTelemetry Collector pipeline. +- `GET /metrics`: Prometheus-compatible transport metrics derived from `MCPServer.get_transport_metrics()` when `EPACOMP_MCP_METRICS_ENABLED=1`. Gauges report session counts (`status=active|closed`) and negotiated capability adoption (`capability=tools.streams`, `scope=all|active`, `state=enabled|disabled`). The endpoint requires the configured bearer auth unless `MCP_METRICS_BYPASS_AUTH=1` is explicitly set for deployments where a trusted gateway already protects the scrape path. Integrate the scrape endpoint with your platform’s monitoring stack—see `deploy/prometheus_scrape.yaml` for a vanilla Prometheus job and `deploy/otel_collector_metrics.yaml` for an OpenTelemetry Collector pipeline. +- `GET /.well-known/oauth-protected-resource`: OAuth Protected Resource Metadata for MCP clients. Unauthorized `/mcp` HTTP and `/mcp/ws` WebSocket requests return `WWW-Authenticate` challenges that point clients at this metadata. Configure Kubernetes probes (example): @@ -110,6 +119,8 @@ docker run --rm -p 8443:8443 \ ``` 3. Use a network policy or firewall rule to restrict incoming traffic to trusted agent subnets and Platform load balancers. +4. Keep `BYPASS_AUTH=0` in production. Startup fails when production auth is enabled but issuer, audience, or JWKS settings are incomplete. +5. Use the built-in process-local rate limiter as defense in depth, and enforce shared limits at the ingress/gateway for horizontally scaled deployments. ## Logging and Observability diff --git a/docs/genra_workflow.md b/docs/genra_workflow.md index 46a689d..f564b3d 100644 --- a/docs/genra_workflow.md +++ b/docs/genra_workflow.md @@ -187,7 +187,7 @@ sequenceDiagram } ``` -- Bundles align with `docs/mcp_ctx_audit.md` by including request IDs, rate-limit headers, and reproducible payload copies when `includeRawResponses=true`. +- Bundles include request IDs, rate-limit headers, and reproducible payload copies when `includeRawResponses=true` so downstream review remains traceable. - Storage layout enables downstream systems to fetch artefacts by `workflowRunId`. Each bundle carries a SHA256 checksum for integrity. ## Failure Modes & Recovery Paths diff --git a/docs/integration_guides/mcp_integration.md b/docs/integration_guides/mcp_integration.md index 2529e18..4989681 100644 --- a/docs/integration_guides/mcp_integration.md +++ b/docs/integration_guides/mcp_integration.md @@ -5,7 +5,7 @@ The EPA CompTox MCP server exposes JSON-RPC over HTTP (`/mcp`) and WebSocket (`/ > **Prerequisites** > > 1. Deploy the MCP server (local or remote) and expose the `/mcp` endpoint. -> 2. Set `CTX_API_KEY` (preferred) or `EPA_COMPTOX_API_KEY` so the server can reach the EPA CompTox API. +> 2. Set `CTX_API_KEY` (preferred) or `EPA_COMPTOX_API_KEY` so the server can reach the EPA CompTox API. EPA currently distributes free CTX API keys via `ccte_api@epa.gov`; see the [CTX APIs overview](https://www.epa.gov/comptox-tools/computational-toxicology-and-exposure-apis). > 3. If you front the MCP server with an auth layer, obtain the access token required by your MCP client. > > Replace `http://localhost:8000/mcp` with your deployment URL when following the snippets. @@ -105,4 +105,3 @@ For additional automation examples, consult: - [`tests/test_http_transport.py`](../../tests/test_http_transport.py) for pure HTTP flows. - [`tests/test_websocket_transport.py`](../../tests/test_websocket_transport.py) for WebSocket streaming and cancellation cases. - [`scripts/mcp_ws_client.py`](../../scripts/mcp_ws_client.py) for a minimal WebSocket client you can adapt. - diff --git a/docs/mcp_ctx_audit.md b/docs/mcp_ctx_audit.md deleted file mode 100644 index f70a403..0000000 --- a/docs/mcp_ctx_audit.md +++ /dev/null @@ -1,54 +0,0 @@ -Overview -- MCP uses `ctx-python` (`ctxpy`) client classes to access CTX APIs. No raw HTTP is issued in this codebase. -- Resources map to ctxpy domains and methods; auth uses `x-api-key` header. -- Base URL was not explicitly configured before; now set via env for ctxpy. - -Authentication -- Header: `x-api-key` -- Env resolution in server: prefers `CTX_API_KEY`, then `EPA_COMPTOX_API_KEY`, then `ctx_x_api_key`. -- Also sets `os.environ['ctx_x_api_key']` for ctxpy compatibility. - -Base URL -- New default base: `https://comptox.epa.gov/ctx-api` -- Legacy toggle: `CTX_USE_LEGACY=1` switches to `https://api-ccte.epa.gov` -- Env exposed for ctxpy: `ctx_api_host` set from `CTX_API_BASE_URL` or legacy toggle; `ctx_api_accept=application/json`. - -Resource → Underlying ctxpy calls -- chemical (src/epacomp_tox/resources/chemical.py:1): - - `search_chemical`/`batch_search_chemical` → `/chemical/search/*` (batch sends newline-delimited identifiers) - - `get_chemical_details`/`batch_get_chemical_details` → `/chemical/detail/search/*` with optional projection query param - - `search_msready` → `/chemical/msready/search/(by-dtxcid|by-formula|by-mass)` -- hazard (src/epacomp_tox/resources/hazard.py:1): - - `search_hazard` → `ctx.Hazard.search` shim selecting `/hazard/{dataset}` routes (toxval, skin-eye, cancer, genetox, adme-ivive, toxref, iris, pprtv, hawc) - - `batch_search_hazard` → Reuses `ctx.Hazard.batch_search` to iterate the selector for each DTXSID - - `get_hazard_toxval` / `batch_get_hazard_toxval` → `/hazard/toxval/search/by-dtxsid/{id}` (single + newline-delimited batch) - - `get_hazard_skin_eye` / `batch_get_hazard_skin_eye` → `/hazard/skin-eye/search/by-dtxsid/{id}` - - `get_hazard_cancer_summary` / `batch_get_hazard_cancer_summary` → `/hazard/cancer-summary/search/by-dtxsid/{id}` - - `get_hazard_genetox_summary` / `batch_get_hazard_genetox_summary` → `/hazard/genetox/summary/search/by-dtxsid/{id}` - - `get_hazard_genetox_details` / `batch_get_hazard_genetox_details` → `/hazard/genetox/details/search/by-dtxsid/{id}` - - `get_hazard_adme_ivive` → `/hazard/adme-ivive/search/by-dtxsid/{id}` - - `get_hazard_pprtv` → `/hazard/pprtv/search/by-dtxsid/{id}` - - `get_hazard_iris` → `/hazard/iris/search/by-dtxsid/{id}` - - `get_hazard_hawc` → `/hazard/hawc/search/by-dtxsid/{id}` - - `get_hazard_toxref` / `batch_get_hazard_toxref` → `/hazard/toxref/{dataset}/search/{lookup}/{value}` + `/hazard/toxref/search/by-dtxsid/` -- exposure (src/epacomp_tox/resources/exposure.py:1): - - `search_cpdat` → `/exposure/{functional-use|product-data|list-presence}/search/by-dtxsid/{id}` - - `search_httk` → `GET /exposure/httk/search/by-dtxsid/{id}` - - `get_cpdat_vocabulary` → `/exposure/{functional-use|product-data|list-presence}/(category|puc|tags)` - - `search_qsurs` → `GET /exposure/functional-use/probability/search/by-dtxsid/{id}` - - `search_exposures` → `/exposure/{mmdb|seem}/...` endpoints based on selector -- chemical_list (src/epacomp_tox/resources/chemical_list.py:1): - - `get_public_list_names` → `GET /chemical/list/` - - `get_full_list` → `GET /chemical/list/chemicals/search/by-listname/{list}` -- cheminformatics (src/epacomp_tox/resources/cheminformatics.py:1): - - `search_toxprints` → `ctx.search_toxprints(chemical)` (returns DataFrame; code converts to dict) - -Notes -- Method signatures and available calls extracted into `epa_comptox_api_structure.json:1` (generated via `extract_api_structure.py:1`). -- Lightweight shim in `src/ctxpy/__init__.py` wraps GET/POST/batch, respects `ctx_api_host`, enforces batch chunking, and surfaces structured `CtxApiError` data (request id, rate limits, retry-after). -- `_with_retry` now provides exponential backoff with jitter, retries only on retryable statuses, and exposes `get_last_metadata()` for downstream telemetry. -- Cheminformatics/ToxPrint endpoints remain unavailable on comptox.epa.gov/ctx-api; shim raises migration warning. - -Gaps/Actions -- Confirm maximum batch payload accepted by comptox host (shim currently assumes 200 identifiers per chunk). -- Add smoke tests exercising 1–2 endpoints per domain using `CTX_API_KEY`. diff --git a/docs/mcp_transport.md b/docs/mcp_transport.md index e570a25..6d50d49 100644 --- a/docs/mcp_transport.md +++ b/docs/mcp_transport.md @@ -10,7 +10,7 @@ This document captures the transport requirements for Model Context Protocol (MC 2. Client sends `initialize` request (`jsonrpc` 2.0) including: - `protocolVersion` (server must negotiate from supported set). - `capabilities` requested by client (per MCP spec §3.2). - - Optional session metadata (auth headers, agent info). + - Optional session metadata (agent info and capabilities). Bearer tokens are supplied via the transport `Authorization` header, not echoed through MCP payloads. 3. Server response must include: - Chosen `protocolVersion`. - Server `capabilities` object describing supported features. @@ -23,11 +23,11 @@ This document captures the transport requirements for Model Context Protocol (MC ### Current State -- Server advertises supported protocol versions (`2025-06-18`, `2025-03-26`, `2024-11-05`) and negotiates correctly. +- Server advertises supported protocol versions (`2025-11-25`, `2025-06-18`, `2025-03-26`, `2024-11-05`) and negotiates correctly. - `notifications/initialized` event emitted. - Client capability negotiation is persisted per session; `tools.streams`/`tools.cancel` features downgrade when the client opts out. - Ping/heartbeat logic responds to client `ping` frames and enforces configurable idle timeouts derived from transport settings or client overrides. -- Authentication metadata is included in tool responses so downstream orchestrators can forward bearer tokens and trace identifiers. +- Tool responses include only scrubbed auth summaries, such as hashed subject, issuer, scopes, expiry, and token hash. Raw bearer tokens and client `authentication` payloads are not returned. - Negotiated capability flags are exposed via `MCPServer.get_transport_metrics()` for transport telemetry dashboards. ### Required Follow-up diff --git a/docs/model_metadata.md b/docs/model_metadata.md index e07c8bf..cf13df2 100644 --- a/docs/model_metadata.md +++ b/docs/model_metadata.md @@ -47,7 +47,7 @@ This writes Markdown and HTML summaries under `docs/generated/`. See `docs/model - **Task 2.3** will populate cards for TEST, OPERA, and GenRA using the schema, ensuring AD definitions and provenance are complete. - **Task 2.4–2.5** will publish the AD reference data and wire schema validation into CI so regressions are blocked automatically. -Questions or suggestions can be captured in `docs/mcp_ctx_audit.md` for review during the metadata governance workshops. +Questions or suggestions should be captured in GitHub issues or focused documentation PRs during the metadata governance workshops. ## Implementation Notes diff --git a/docs/operations/metrics_integration.md b/docs/operations/metrics_integration.md index aa743a4..b235ff8 100644 --- a/docs/operations/metrics_integration.md +++ b/docs/operations/metrics_integration.md @@ -6,7 +6,10 @@ derived from `MCPServer.get_transport_metrics()`, reporting session counts and capability-negotiation outcomes. ## 1. Prerequisites -- MCP transport running with `/metrics` enabled (FastAPI app exposed on HTTP). +- MCP transport running with `/metrics` enabled via + `EPACOMP_MCP_METRICS_ENABLED=1` (FastAPI app exposed on HTTP). +- A bearer token accepted by the MCP auth policy, unless + `MCP_METRICS_BYPASS_AUTH=1` is deliberately set behind a trusted gateway. - Network connectivity from Prometheus / the OTEL Collector to the transport. - Access to the target monitoring configuration repo (GitOps) or cluster. @@ -15,8 +18,10 @@ capability-negotiation outcomes. repository. 2. Replace the `targets` hostname with the service address for your environment (e.g., Kubernetes service DNS or load balancer). -3. Adjust labels (such as `env`, `service`) to match your dashboard naming. -4. Reload Prometheus or commit the change to your GitOps pipeline. +3. Add the required `Authorization: Bearer ` header or configure a + gateway-side scrape identity. +4. Adjust labels (such as `env`, `service`) to match your dashboard naming. +5. Reload Prometheus or commit the change to your GitOps pipeline. Verify: - Open the Prometheus UI (`/graph`) and query `mcp_sessions_total` to confirm @@ -48,8 +53,11 @@ Verify: validation step (already documented in this commit). ## 6. Troubleshooting -- If `/metrics` returns 404, ensure your deployment uses the refreshed - application module (`epacomp_tox.transport.websocket:app`). +- If `/metrics` returns 404, ensure `EPACOMP_MCP_METRICS_ENABLED=1` and that + your deployment uses the refreshed application module + (`epacomp_tox.transport.websocket:app`). +- If `/metrics` returns 401 or 403, check the scrape token issuer, audience, + JWKS, and scopes against `MCP_AUTH_*` settings. - Verify network policies allow the monitoring stack to reach port `8000` (or your chosen bind port). - Enable debug logging on the OTEL collector (`service.telemetry.metrics.level = detailed`) diff --git a/epa_comptox_api_structure.json b/epa_comptox_api_structure.json deleted file mode 100644 index 617f178..0000000 --- a/epa_comptox_api_structure.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "Chemical": { - "batch": "(suffix: str, word: Iterable[str], batch_size: int, bracketed: bool = False)", - "details": "(by: str, word: Union[str, Iterable[str]], subset: Optional[str] = 'all')", - "get": "(suffix: str)", - "msready": "(by: str, word: Optional[str] = None, start: Optional[float] = None, end: Optional[float] = None)", - "post": "(suffix: str, word: str)", - "search": "(by: str, word: Union[str, Iterable[str]])" - }, - "Exposure": { - "batch": "(suffix: str, word: Iterable[str], batch_size: int, bracketed: bool = False)", - "get": "(suffix: str)", - "get_cpdat_vocabulary": "(vocab_name)", - "post": "(suffix: str, word: str)", - "search_cpdat": "(vocab_name, dtxsid)", - "search_exposures": "(by, dtxsid)", - "search_httk": "(dtxsid)", - "search_qsurs": "(dtxsid)" - }, - "Hazard": { - "batch": "(suffix: str, word: Iterable[str], batch_size: int, bracketed: bool = False)", - "batch_search": "(by: str, dtxsid: Iterable[str], summary: bool = True)", - "get": "(suffix: str)", - "post": "(suffix: str, word: str)", - "search": "(by: str, dtxsid: str, summary: bool = True)" - }, - "ChemicalList": { - "batch": "(suffix: str, word: Iterable[str], batch_size: int, bracketed: bool = False)", - "get": "(suffix: str)", - "get_full_list": "(list_name: str)", - "post": "(suffix: str, word: str)", - "public_list_names": "()" - }, - "Cheminformatics": { - "search_toxprints": "(chemical)" - } -} diff --git a/pyproject.toml b/pyproject.toml index adb4058..7d84766 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dependencies = [ "pydantic>=2.7,<3.0", "pydantic-settings>=2.3,<3.0", "jsonschema>=4.21,<5.0", + "PyJWT[crypto]>=2.8,<3.0", ] [project.optional-dependencies] diff --git a/scientific_engine_bundle.txt b/scientific_engine_bundle.txt deleted file mode 100644 index 1b7f8aa..0000000 --- a/scientific_engine_bundle.txt +++ /dev/null @@ -1,6829 +0,0 @@ -EPA CompTox Scientific Engine Bundle - -Generated: 2026-04-11T14:15:01.651033+00:00 -Repo root: /Volumes/Storage/topotox_space_relief_20260220/mcp_epacomp_tox -Selection: core scientific engine, predictive harness, science-facing CTX resources, relevant tests, and model metadata. -Included files: 40 -Source characters (included files only): 238674 -Source lines (included files only): 6652 -Conservative token ceiling at 2 chars/token: 119337 -Rule-of-thumb token estimate at 4 chars/token: 59668 - -Included paths: -- src/epacomp_tox/contracts/__init__.py -- src/epacomp_tox/orchestrator/__init__.py -- src/epacomp_tox/orchestrator/audit.py -- src/epacomp_tox/orchestrator/ctx_data.py -- src/epacomp_tox/orchestrator/evidence.py -- src/epacomp_tox/orchestrator/identifiers.py -- src/epacomp_tox/orchestrator/models.py -- src/epacomp_tox/orchestrator/offline.py -- src/epacomp_tox/orchestrator/predictive.py -- src/epacomp_tox/orchestrator/utils.py -- src/epacomp_tox/orchestrator/workflow.py -- src/epacomp_tox/predictive/__init__.py -- src/epacomp_tox/predictive/base.py -- src/epacomp_tox/predictive/clients.py -- src/epacomp_tox/predictive/genra_service.py -- src/epacomp_tox/predictive/opera_service.py -- src/epacomp_tox/predictive/router.py -- src/epacomp_tox/predictive/test_service.py -- src/epacomp_tox/metadata/__init__.py -- src/epacomp_tox/metadata/applicability.py -- src/epacomp_tox/metadata/model_cards.py -- src/epacomp_tox/metadata/validator.py -- src/epacomp_tox/resources/base.py -- src/epacomp_tox/resources/bioactivity.py -- src/epacomp_tox/resources/chemical.py -- src/epacomp_tox/resources/hazard.py -- src/epacomp_tox/resources/exposure.py -- src/epacomp_tox/resources/cheminformatics.py -- src/epacomp_tox/resources/metadata.py -- tests/test_orchestrator_stages.py -- tests/test_predictive_regression.py -- tests/workflows/test_offline_workflows.py -- tests/test_domain_contracts.py -- tests/test_cross_suite_handoffs.py -- metadata/model_cards/genra_read_across.json -- metadata/model_cards/opera_property.json -- metadata/model_cards/test_consensus.json -- metadata/applicability_domains/genra_read_across_ad.json -- metadata/applicability_domains/opera_property_ad.json -- metadata/applicability_domains/test_consensus_ad.json - -==================================================================================================== - -==================================================================================================== -FILE: src/epacomp_tox/contracts/__init__.py -==================================================================================================== -from __future__ import annotations - -import json -from functools import lru_cache -from pathlib import Path -from typing import Any, Dict, Tuple - -from jsonschema import Draft202012Validator - -SCHEMA_ROOT = Path(__file__).resolve().parents[3] / "docs" / "contracts" / "schemas" - - -class SchemaValidationError(RuntimeError): - """Raised when a payload fails JSON Schema validation.""" - - -def _schema_path(namespace: str, name: str) -> Path: - return SCHEMA_ROOT / namespace / f"{name}.json" - - -@lru_cache(maxsize=128) -def load_schema(namespace: str, name: str) -> Dict[str, Any]: - """Load and cache a JSON Schema by namespace/name.""" - path = _schema_path(namespace, name) - if not path.exists(): - raise FileNotFoundError(f"Schema '{namespace}/{name}' not found at {path}") - with path.open("r", encoding="utf-8") as handle: - return json.load(handle) - - -def validate_payload(payload: Any, *, namespace: str, name: str) -> None: - """Validate a payload against the referenced schema.""" - schema = load_schema(namespace, name) - validator = Draft202012Validator(schema) - errors = sorted(validator.iter_errors(payload), key=lambda error: error.path) - if errors: - message = "; ".join(error.message for error in errors) - raise SchemaValidationError(message) - - -def schema_ref(namespace: str, name: str) -> Dict[str, str]: - """Helper to build a schema reference dictionary for tool definitions.""" - return {"namespace": namespace, "name": name} - - -__all__ = ["SchemaValidationError", "load_schema", "schema_ref", "validate_payload"] - -==================================================================================================== -FILE: src/epacomp_tox/orchestrator/__init__.py -==================================================================================================== -"""GenRA orchestration helpers (identifier resolution + CTX data staging).""" - -from .audit import AuditBundleStore -from .ctx_data import CtxDataAssembler, CtxDataAssemblyError -from .evidence import EvidenceSynthesizer -from .identifiers import IdentifierResolutionError, IdentifierResolver -from .models import ( - CtxDataBundle, - EvidenceScore, - EvidenceSynthesis, - GuardrailEvent, - IdentifierResolution, - MetadataTrace, - PredictiveRunResult, - PredictiveStepResult, - PredictiveTask, -) -from .offline import ( - OFFLINE_SCENARIOS, - OfflinePredictiveService, - build_offline_orchestrator, -) -from .predictive import PredictiveCoordinator -from .workflow import GenRAOrchestrator - -__all__ = [ - "CtxDataAssembler", - "CtxDataAssemblyError", - "CtxDataBundle", - "GuardrailEvent", - "IdentifierResolution", - "IdentifierResolutionError", - "IdentifierResolver", - "MetadataTrace", - "PredictiveCoordinator", - "EvidenceSynthesizer", - "AuditBundleStore", - "OFFLINE_SCENARIOS", - "build_offline_orchestrator", - "OfflinePredictiveService", - "GenRAOrchestrator", - "PredictiveRunResult", - "PredictiveStepResult", - "PredictiveTask", - "EvidenceSynthesis", - "EvidenceScore", -] - -==================================================================================================== -FILE: src/epacomp_tox/orchestrator/audit.py -==================================================================================================== -from __future__ import annotations - -import hashlib -import json -from datetime import datetime, timezone -from pathlib import Path -from typing import Dict, Iterable, List, Optional, Tuple, Union - - -class AuditBundleStore: - """Durable storage for orchestrator audit bundles and attachments.""" - - def __init__( - self, base_dir: Union[str, Path], *, retention_days: Optional[int] = None - ) -> None: - self.base_dir = Path(base_dir) - self.base_dir.mkdir(parents=True, exist_ok=True) - self.retention_days = retention_days - - def save( - self, - bundle: Dict[str, any], - *, - attachments: Optional[Dict[str, Union[str, bytes]]] = None, - ) -> Dict[str, any]: - run_id = bundle.get("workflowRunId") - if not run_id: - raise ValueError("Bundle must include 'workflowRunId'.") - - run_dir = self.base_dir / run_id - run_dir.mkdir(parents=True, exist_ok=True) - created_at = datetime.now(timezone.utc).isoformat() - - payload = json.dumps( - bundle, ensure_ascii=False, indent=2, sort_keys=True - ).encode("utf-8") - bundle_path = run_dir / "bundle.json" - bundle_path.write_bytes(payload) - bundle_checksum = hashlib.sha256(payload).hexdigest() - - attachments_meta: List[Dict[str, any]] = [] - if attachments: - attachments_dir = run_dir / "attachments" - attachments_dir.mkdir(parents=True, exist_ok=True) - for name, content in attachments.items(): - target = attachments_dir / name - target.parent.mkdir(parents=True, exist_ok=True) - data = content.encode("utf-8") if isinstance(content, str) else content - target.write_bytes(data) - attachments_meta.append( - { - "name": name, - "path": str(target.relative_to(self.base_dir)), - "size": len(data), - "checksum": hashlib.sha256(data).hexdigest(), - } - ) - - metadata = { - "workflowRunId": run_id, - "createdAt": created_at, - "bundlePath": str(bundle_path.relative_to(self.base_dir)), - "bundleChecksum": bundle_checksum, - "attachments": attachments_meta, - "retentionDays": self.retention_days, - } - - (run_dir / "metadata.json").write_text( - json.dumps(metadata, indent=2, sort_keys=True), - encoding="utf-8", - ) - return metadata - - def load_bundle(self, run_id: str) -> Dict[str, any]: - bundle_path = self.base_dir / run_id / "bundle.json" - if not bundle_path.exists(): - raise FileNotFoundError(f"No bundle found for run {run_id}") - return json.loads(bundle_path.read_text(encoding="utf-8")) - - def load_metadata(self, run_id: str) -> Dict[str, any]: - metadata_path = self.base_dir / run_id / "metadata.json" - if not metadata_path.exists(): - raise FileNotFoundError(f"No metadata found for run {run_id}") - return json.loads(metadata_path.read_text(encoding="utf-8")) - - def list_runs(self) -> List[Dict[str, any]]: - runs: List[Dict[str, any]] = [] - for entry in sorted(self.base_dir.iterdir()): - if not entry.is_dir(): - continue - metadata_path = entry / "metadata.json" - if not metadata_path.exists(): - continue - try: - runs.append(json.loads(metadata_path.read_text(encoding="utf-8"))) - except json.JSONDecodeError: - continue - return runs - -==================================================================================================== -FILE: src/epacomp_tox/orchestrator/ctx_data.py -==================================================================================================== -from __future__ import annotations - -import time -from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple - -from ctxpy import CtxApiError -from epacomp_tox.resources.cheminformatics import CheminformaticsResource -from epacomp_tox.resources.exposure import ExposureResource -from epacomp_tox.resources.hazard import HazardResource - -from .models import CtxDataBundle, MetadataTrace -from .utils import sanitize_metadata - - -class CtxDataAssemblyError(RuntimeError): - """Raised when CTX data retrieval fails.""" - - -class CtxDataAssembler: - """Fetch and cache CTX datasets required before GenRA predictive calls.""" - - SCENARIO_OVERRIDES = { - "acute_toxicity": { - "hazard": ["all", "human", "eco"], - "exposure": ["httk"], - }, - "exposure_prioritization": { - "hazard": ["all"], - "exposure": ["pathways", "seem"], - "cpdat": ["fc", "puc"], - }, - "genra_read_across": { - "hazard": ["all"], - "exposure": ["httk", "qsurs"], - "cpdat": ["fc"], - "cheminformatics": True, - }, - } - - def __init__( - self, - *, - hazard_resource: HazardResource, - exposure_resource: ExposureResource, - cheminformatics_resource: Optional[CheminformaticsResource] = None, - hazard_data_types: Sequence[str] = ("all",), - exposure_datasets: Sequence[str] = ("httk",), - cpdat_vocabularies: Sequence[str] = ("fc",), - include_toxprints: bool = False, - cache_ttl: int = 900, - time_fn: Callable[[], float] = time.time, - ) -> None: - self.hazard_resource = hazard_resource - self.exposure_resource = exposure_resource - self.cheminformatics_resource = cheminformatics_resource - self.hazard_data_types = tuple(dict.fromkeys(hazard_data_types)) - self.exposure_datasets = tuple(dict.fromkeys(exposure_datasets)) - self.cpdat_vocabularies = tuple(dict.fromkeys(cpdat_vocabularies)) - self.include_toxprints = include_toxprints - self.cache_ttl = max(0, cache_ttl) - self._time_fn = time_fn - self._cache: Dict[ - Tuple[str, Tuple[str, ...], Tuple[str, ...], Tuple[str, ...], bool], - Tuple[float, CtxDataBundle], - ] = {} - - def assemble( - self, - dtxsid: str, - *, - scenarios: Optional[Sequence[str]] = None, - include_cheminformatics: Optional[bool] = None, - hazard_summary: bool = True, - ) -> CtxDataBundle: - """Gather hazard/exposure datasets (with caching) for the orchestrator workflow.""" - normalized_sid = (dtxsid or "").strip().upper() - if not normalized_sid: - raise CtxDataAssemblyError("DTXSID is required for CTX data assembly.") - - scenario_list = sorted( - { - scenario.strip().lower() - for scenario in scenarios or [] - if isinstance(scenario, str) and scenario.strip() - } - ) - - hazard_types = set(self.hazard_data_types) - exposure_types = set(self.exposure_datasets) - cpdat_vocab = set(self.cpdat_vocabularies) - include_toxprints = ( - self.include_toxprints - if include_cheminformatics is None - else include_cheminformatics - ) - - for scenario in scenario_list: - overrides = self.SCENARIO_OVERRIDES.get(scenario) - if not overrides: - continue - hazard_types.update(overrides.get("hazard", [])) - exposure_types.update(overrides.get("exposure", [])) - cpdat_vocab.update(overrides.get("cpdat", [])) - if overrides.get("cheminformatics"): - include_toxprints = True - - # Stable cache key covering config and request - cache_key = ( - normalized_sid, - tuple(sorted(hazard_types)), - tuple(sorted(exposure_types)), - tuple(sorted(cpdat_vocab)), - bool(include_toxprints), - ) - now = self._time_fn() - cached = self._cache.get(cache_key) - if cached and (self.cache_ttl == 0 or now - cached[0] <= self.cache_ttl): - return cached[1].model_copy(update={"cache_hit": True}) - - trace: List[MetadataTrace] = [] - data_gaps: List[str] = [] - hazard_data: Dict[str, List[Dict[str, Any]]] = {} - exposure_data: Dict[str, List[Dict[str, Any]]] = {} - cheminformatics_data: Dict[str, Any] = {} - - # Hazard datasets --------------------------------------------------- - for hazard_type in sorted(hazard_types): - try: - payload = self.hazard_resource.search_hazard( - data_type=hazard_type, - dtxsid=normalized_sid, - summary=hazard_summary, - ) - except CtxApiError as exc: - trace.append( - self._metadata_trace(self.hazard_resource, f"hazard:{hazard_type}") - ) - raise CtxDataAssemblyError( - f"Failed to fetch hazard dataset '{hazard_type}' for {normalized_sid}: {exc}" - ) from exc - hazard_data[hazard_type] = payload - if not payload: - data_gaps.append(f"hazard:{hazard_type}") - trace.append( - self._metadata_trace(self.hazard_resource, f"hazard:{hazard_type}") - ) - - # Exposure datasets ------------------------------------------------- - for exposure_type in sorted(exposure_types): - step_name = f"exposure:{exposure_type}" - try: - payload = self._fetch_exposure_dataset(exposure_type, normalized_sid) - except CtxApiError as exc: - trace.append(self._metadata_trace(self.exposure_resource, step_name)) - raise CtxDataAssemblyError( - f"Failed to fetch exposure dataset '{exposure_type}' for {normalized_sid}: {exc}" - ) from exc - exposure_data[exposure_type] = payload - if not payload: - data_gaps.append(step_name) - trace.append(self._metadata_trace(self.exposure_resource, step_name)) - - for vocab in sorted(cpdat_vocab): - step_name = f"exposure:cpdat:{vocab}" - try: - payload = self.exposure_resource.search_cpdat( - vocab_name=vocab, - dtxsids=[normalized_sid], - ) - except CtxApiError as exc: - trace.append(self._metadata_trace(self.exposure_resource, step_name)) - raise CtxDataAssemblyError( - f"Failed to fetch CPDat vocabulary '{vocab}' for {normalized_sid}: {exc}" - ) from exc - exposure_data[f"cpdat:{vocab}"] = payload - if not payload: - data_gaps.append(step_name) - trace.append(self._metadata_trace(self.exposure_resource, step_name)) - - # Cheminformatics --------------------------------------------------- - if include_toxprints: - if not self.cheminformatics_resource: - data_gaps.append("cheminformatics:toxprints") - else: - step_name = "cheminformatics:toxprints" - try: - payload = self.cheminformatics_resource.search_toxprints( - chemical=normalized_sid - ) - except CtxApiError as exc: - trace.append( - self._metadata_trace(self.cheminformatics_resource, step_name) - ) - raise CtxDataAssemblyError( - f"Failed to fetch toxprints for {normalized_sid}: {exc}" - ) from exc - cheminformatics_data["toxprints"] = payload - if not payload: - data_gaps.append(step_name) - trace.append( - self._metadata_trace(self.cheminformatics_resource, step_name) - ) - - bundle = CtxDataBundle( - dtxsid=normalized_sid, - scenarios=scenario_list, - hazard=hazard_data, - exposure=exposure_data, - cheminformatics=cheminformatics_data, - data_gaps=data_gaps, - trace=trace, - ) - if self.cache_ttl: - self._cache[cache_key] = (now, bundle) - return bundle - - # Internal helpers ----------------------------------------------------- - - def _metadata_trace(self, resource: Optional[object], step: str) -> MetadataTrace: - metadata = {} - if resource and hasattr(resource, "get_last_metadata"): - metadata = sanitize_metadata(resource.get_last_metadata()) - return MetadataTrace(step=step, metadata=metadata) - - def _fetch_exposure_dataset( - self, dataset: str, dtxsid: str - ) -> List[Dict[str, Any]]: - dataset = dataset.lower() - if dataset == "httk": - return self.exposure_resource.search_httk(dtxsids=[dtxsid]) - if dataset == "qsurs": - return self.exposure_resource.search_qsurs(dtxsids=[dtxsid]) - if dataset in ("pathways", "mmdb-single", "seem", "seem-demographic"): - return self.exposure_resource.search_exposures( - data_type=dataset, - dtxsids=[dtxsid], - ) - raise CtxDataAssemblyError(f"Unsupported exposure dataset '{dataset}'.") - -==================================================================================================== -FILE: src/epacomp_tox/orchestrator/evidence.py -==================================================================================================== -from __future__ import annotations - -from typing import Iterable, List - -from epacomp_tox.predictive import PredictiveResponse - -from .models import EvidenceScore, EvidenceSynthesis, PredictiveStepResult - - -class EvidenceSynthesizer: - """Compose GenRA evidence grades and narrative summaries.""" - - def synthesize(self, results: Iterable[PredictiveStepResult]) -> EvidenceSynthesis: - steps: List[PredictiveStepResult] = [ - step for step in results if step.status == "success" - ] - if not steps: - return EvidenceSynthesis( - confidence_band="Unavailable", - scores=EvidenceScore( - analogue_coverage=0.0, - evidence_quality=0.0, - predictive_agreement=0.0, - ), - narrative="No successful predictive results available for synthesis.", - recommended_actions=[ - "Review applicability domain denials", - "Re-run orchestration after addressing guardrail failures", - ], - ) - - analogue_scores = [ - self._extract_score(step, "analogueCoverage") for step in steps - ] - quality_scores = [ - self._extract_score(step, "evidenceQuality") for step in steps - ] - agreement_scores = [ - self._extract_score(step, "predictiveAgreement") for step in steps - ] - - coverage = sum(analogue_scores) / len(analogue_scores) - evidence_quality = sum(quality_scores) / len(quality_scores) - predictive_agreement = sum(agreement_scores) / len(agreement_scores) - - band = self._resolve_confidence_band( - coverage, evidence_quality, predictive_agreement - ) - narrative = self._build_narrative( - band, coverage, evidence_quality, predictive_agreement - ) - - return EvidenceSynthesis( - confidence_band=band, - scores=EvidenceScore( - analogue_coverage=coverage, - evidence_quality=evidence_quality, - predictive_agreement=predictive_agreement, - ), - narrative=narrative, - recommended_actions=self._recommended_actions(band), - ) - - def _extract_score(self, step: PredictiveStepResult, key: str) -> float: - metadata = step.metadata or {} - value = metadata.get(key) - if isinstance(value, (int, float)): - return float(value) - if key == "predictiveAgreement" and step.prediction: - return float(step.prediction.get("confidence", 0.0)) - return 0.0 - - def _resolve_confidence_band( - self, coverage: float, quality: float, agreement: float - ) -> str: - if min(coverage, quality, agreement) >= 0.8: - return "Robust" - if min(coverage, quality, agreement) >= 0.5: - return "Limited" - return "Unavailable" - - def _build_narrative( - self, band: str, coverage: float, quality: float, agreement: float - ) -> str: - return ( - f"Confidence band: {band}. Analogue coverage={coverage:.2f}, " - f"evidence quality={quality:.2f}, predictive agreement={agreement:.2f}." - ) - - def _recommended_actions(self, band: str) -> List[str]: - if band == "Robust": - return [ - "Proceed with automated dossier generation", - "Document rationale for regulatory submission", - ] - if band == "Limited": - return ["Seek SME review", "Augment analogue set or supporting evidence"] - return [ - "Address guardrail failures", - "Acquire additional data or adjust predictor inputs", - ] - -==================================================================================================== -FILE: src/epacomp_tox/orchestrator/identifiers.py -==================================================================================================== -from __future__ import annotations - -import re -import time -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple - -from ctxpy import CtxApiError -from epacomp_tox.resources.chemical import ChemicalResource - -from .models import IdentifierResolution, MetadataTrace -from .utils import sanitize_metadata - - -class IdentifierResolutionError(RuntimeError): - """Raised when chemical identifier normalization fails.""" - - -class IdentifierResolver: - """Resolve user-supplied identifiers into canonical DTXSID records.""" - - _DTXSID_RE = re.compile(r"^DTXSID\d{7}$", re.IGNORECASE) - _TYPE_ALIASES = { - "dtxsid": "dtxsid", - "sid": "dtxsid", - "dsstox": "dtxsid", - "cas": "casrn", - "casrn": "casrn", - "name": "name", - "preferred_name": "name", - "inchikey": "inchikey", - "inchi": "inchikey", - "smiles": "smiles", - } - _SEARCH_ORDER = { - "casrn": ("equals",), - "name": ("equals", "starts-with", "contains"), - "smiles": ("equals", "contains"), - "inchikey": ("equals", "contains"), - } - - def __init__( - self, - *, - chemical_resource: ChemicalResource, - cache_ttl: int = 900, - detail_subset: str = "identifiers", - time_fn: Callable[[], float] = time.time, - ) -> None: - self.chemical_resource = chemical_resource - self.cache_ttl = max(0, cache_ttl) - self.detail_subset = detail_subset - self._time_fn = time_fn - self._cache: Dict[Tuple[str, str], Tuple[float, IdentifierResolution]] = {} - - def resolve( - self, identifier: str, identifier_type: Optional[str] = None - ) -> IdentifierResolution: - """Resolve an identifier to a canonical DTXSID.""" - normalized_value = (identifier or "").strip() - if not normalized_value: - raise IdentifierResolutionError("Identifier value is required.") - - normalized_type = self._normalize_type(identifier_type, normalized_value) - cache_key = (normalized_value.lower(), normalized_type) - cached = self._cache.get(cache_key) - now = self._time_fn() - if cached and (self.cache_ttl == 0 or now - cached[0] <= self.cache_ttl): - return cached[1].model_copy(update={"cache_hit": True}) - - trace: List[MetadataTrace] = [] - warnings: List[str] = [] - matched_record: Dict[str, Any] - detail_record: Dict[str, Any] - - if normalized_type == "dtxsid": - detail_record = self._fetch_details( - identifier=normalized_value, - trace=trace, - stage="chemical.details", - ) - matched_record = detail_record - else: - matched_record = self._search_for_match( - identifier=normalized_value, - identifier_type=normalized_type, - trace=trace, - warnings=warnings, - ) - detail_record = self._fetch_details( - identifier=self._extract_dtxsid(matched_record), - trace=trace, - stage="chemical.details", - ) - - resolution = self._build_resolution( - input_value=normalized_value, - input_type=normalized_type, - matched_record=matched_record, - detail_record=detail_record, - warnings=warnings, - trace=trace, - ) - if self.cache_ttl: - self._cache[cache_key] = (now, resolution) - return resolution - - # Internal helpers ----------------------------------------------------- - - def _normalize_type(self, identifier_type: Optional[str], value: str) -> str: - if identifier_type: - key = identifier_type.strip().lower() - if key not in self._TYPE_ALIASES: - raise IdentifierResolutionError( - f"Unsupported identifier type '{identifier_type}'." - ) - return self._TYPE_ALIASES[key] - if self._DTXSID_RE.match(value): - return "dtxsid" - if value.count("-") == 2 and len(value.replace("-", "")) in (5, 6, 7, 8, 9): - return "casrn" - return "name" - - def _metadata_trace(self, stage: str) -> MetadataTrace: - metadata = sanitize_metadata(self.chemical_resource.get_last_metadata()) - return MetadataTrace(step=stage, metadata=metadata) - - def _search_for_match( - self, - *, - identifier: str, - identifier_type: str, - trace: List[MetadataTrace], - warnings: List[str], - ) -> Dict[str, Any]: - search_modes = self._SEARCH_ORDER.get(identifier_type) - if not search_modes: - raise IdentifierResolutionError( - f"Identifier type '{identifier_type}' is not searchable." - ) - - last_error: Optional[Exception] = None - for mode in search_modes: - try: - results = self.chemical_resource.search_chemical( - query=identifier, search_type=mode - ) - trace.append(self._metadata_trace(f"chemical.search:{mode}")) - except CtxApiError as exc: - last_error = exc - trace.append(self._metadata_trace(f"chemical.search:{mode}")) - continue - except Exception as exc: # pragma: no cover - defensive - last_error = exc - trace.append(self._metadata_trace(f"chemical.search:{mode}")) - continue - - candidates = [record for record in results if isinstance(record, dict)] - if not candidates: - continue - if len(candidates) > 1: - warnings.append( - f"Multiple matches found for '{identifier}' using search mode '{mode}'. " - "Using the first result." - ) - return candidates[0] - - if last_error: - raise IdentifierResolutionError( - f"Failed to search for identifier '{identifier}': {last_error}" - ) from last_error - raise IdentifierResolutionError( - f"No CTX record found for identifier '{identifier}'." - ) - - def _fetch_details( - self, - *, - identifier: str, - trace: List[MetadataTrace], - stage: str, - ) -> Dict[str, Any]: - try: - details = self.chemical_resource.get_chemical_details( - identifier=identifier, - id_type="dtxsid", - subset=self.detail_subset, - ) - trace.append(self._metadata_trace(stage)) - if not isinstance(details, dict): - raise IdentifierResolutionError( - f"Unexpected payload when fetching details for '{identifier}'." - ) - return details - except CtxApiError as exc: - trace.append(self._metadata_trace(stage)) - raise IdentifierResolutionError( - f"CTX API error retrieving details for '{identifier}': {exc}" - ) from exc - except Exception as exc: # pragma: no cover - defensive - trace.append(self._metadata_trace(stage)) - raise IdentifierResolutionError( - f"Failed to retrieve details for '{identifier}': {exc}" - ) from exc - - def _extract_dtxsid(self, record: Dict[str, Any]) -> str: - for key in ("dtxsid", "DTXSID", "dtxSid", "sid"): - value = record.get(key) - if isinstance(value, str) and value.strip(): - return value.strip() - raise IdentifierResolutionError("Search result did not include a DTXSID.") - - def _build_resolution( - self, - *, - input_value: str, - input_type: str, - matched_record: Dict[str, Any], - detail_record: Dict[str, Any], - warnings: List[str], - trace: List[MetadataTrace], - ) -> IdentifierResolution: - dtxsid = self._extract_dtxsid(detail_record or matched_record) - synonyms = self._extract_synonyms(detail_record) - casrn = self._extract_field( - ("casrn", "cas", "CASRN", "casNumber"), detail_record, matched_record - ) - preferred_name = self._extract_field( - ("preferredName", "preferred_name", "name"), - detail_record, - matched_record, - ) - - return IdentifierResolution( - input_identifier=input_value, - input_type=input_type, - dtxsid=dtxsid, - matched_record=matched_record, - detail_record=detail_record, - preferred_name=preferred_name, - casrn=casrn, - synonyms=synonyms, - warnings=warnings, - trace=trace, - ) - - def _extract_synonyms(self, detail: Dict[str, Any]) -> List[str]: - raw = ( - detail.get("synonyms") or detail.get("synonym") or detail.get("synonymList") - ) - values: Iterable[Any] - if isinstance(raw, (list, tuple)): - values = raw - elif isinstance(raw, str): - values = [raw] - elif isinstance(raw, dict): - values = raw.values() - else: - values = [] - result = [] - for item in values: - if not item: - continue - if isinstance(item, str): - trimmed = item.strip() - if trimmed and trimmed not in result: - result.append(trimmed) - return result - - def _extract_field( - self, - keys: Tuple[str, ...], - detail: Dict[str, Any], - fallback: Dict[str, Any], - ) -> Optional[str]: - for source in (detail, fallback): - for key in keys: - value = source.get(key) - if isinstance(value, str) and value.strip(): - return value.strip() - return None - -==================================================================================================== -FILE: src/epacomp_tox/orchestrator/models.py -==================================================================================================== -from __future__ import annotations - -from typing import Any, Dict, List, Optional - -from pydantic import BaseModel, Field - -from epacomp_tox.predictive import ADCheckResult, PredictiveRequest, PredictiveResponse - - -class MetadataTrace(BaseModel): - """Structured record of transport metadata captured during orchestration.""" - - step: str - metadata: Dict[str, Any] = Field(default_factory=dict) - - -class IdentifierResolution(BaseModel): - """Canonicalized identity data for orchestrator workflows.""" - - input_identifier: str - input_type: str - dtxsid: str - matched_record: Dict[str, Any] = Field(default_factory=dict) - detail_record: Dict[str, Any] = Field(default_factory=dict) - preferred_name: Optional[str] = None - casrn: Optional[str] = None - synonyms: List[str] = Field(default_factory=list) - warnings: List[str] = Field(default_factory=list) - trace: List[MetadataTrace] = Field(default_factory=list) - cache_hit: bool = False - - -class CtxDataBundle(BaseModel): - """CTX data payload and provenance captured before predictive stages.""" - - dtxsid: str - scenarios: List[str] = Field(default_factory=list) - hazard: Dict[str, List[Dict[str, Any]]] = Field(default_factory=dict) - exposure: Dict[str, List[Dict[str, Any]]] = Field(default_factory=dict) - cheminformatics: Dict[str, Any] = Field(default_factory=dict) - data_gaps: List[str] = Field(default_factory=list) - trace: List[MetadataTrace] = Field(default_factory=list) - cache_hit: bool = False - - -class PredictiveTask(BaseModel): - """Definition of a predictive call executed during orchestration.""" - - service: str - request: PredictiveRequest - scenario: Optional[str] = None - label: Optional[str] = None - - -class GuardrailEvent(BaseModel): - """Recorded guardrail outcome (denial, warning, or error).""" - - stage: str - component: str - status: str - code: Optional[str] - message: str - confidence: Optional[float] = None - timestamp: str - metadata: Dict[str, Any] = Field(default_factory=dict) - - -class PredictiveStepResult(BaseModel): - """Outcome of an individual predictive service invocation.""" - - service: str - status: str - scenario: Optional[str] = None - label: Optional[str] = None - request: PredictiveRequest - ad: Optional[ADCheckResult] = None - prediction: Optional[Dict[str, Any]] = None - metadata: Dict[str, Any] = Field(default_factory=dict) - error: Optional[str] = None - - -class PredictiveRunResult(BaseModel): - """Combined results for a predictive orchestration stage.""" - - results: List[PredictiveStepResult] = Field(default_factory=list) - guardrails: List[GuardrailEvent] = Field(default_factory=list) - succeeded: bool = True - - -class EvidenceScore(BaseModel): - """Weighted representation of evidence dimensions used in synthesis.""" - - analogue_coverage: float - evidence_quality: float - predictive_agreement: float - - -class EvidenceSynthesis(BaseModel): - """Structured result returned by the evidence grading engine.""" - - confidence_band: str - scores: EvidenceScore - narrative: str - recommended_actions: List[str] = Field(default_factory=list) - -==================================================================================================== -FILE: src/epacomp_tox/orchestrator/offline.py -==================================================================================================== -from __future__ import annotations - -from pathlib import Path -from typing import Any, Callable, Dict, Optional, Sequence - -from ..predictive.base import ADCheckResult, PredictiveRequest, PredictiveServiceBase -from .ctx_data import CtxDataAssembler -from .evidence import EvidenceSynthesizer -from .identifiers import IdentifierResolver -from .predictive import PredictiveCoordinator -from .workflow import GenRAOrchestrator - -OFFLINE_SCENARIOS = [ - "acute_toxicity", - "exposure_prioritization", - "genra_read_across", -] - - -class _OfflineChemicalResource: - def __init__(self) -> None: - self._metadata: Dict[str, Any] = {} - - def search_chemical(self, query: str, search_type: str) -> list[dict[str, Any]]: - self._metadata = {"status": 200} - return [ - { - "dtxsid": "DTXSID0000001", - "preferredName": "Offline Example", - "casrn": "50-00-0", - } - ] - - def get_chemical_details( - self, identifier: str, id_type: str, subset: str = "default" - ) -> dict[str, Any]: - self._metadata = {"status": 200} - return { - "dtxsid": "DTXSID0000001", - "preferredName": "Offline Example", - "casrn": "50-00-0", - "synonyms": ["Formaldehyde", "Methanal"], - } - - def get_last_metadata(self) -> Dict[str, Any]: - return dict(self._metadata) - - -class _OfflineHazardResource: - def __init__(self) -> None: - self._metadata: Dict[str, Any] = {} - - def search_hazard( - self, data_type: str, dtxsid: str, summary: bool = True - ) -> list[dict[str, Any]]: - self._metadata = {"status": 200} - return [{"endpoint": "Acute toxicity", "value": "LD50", "source": "Offline"}] - - def get_last_metadata(self) -> Dict[str, Any]: - return dict(self._metadata) - - -class _OfflineExposureResource: - def __init__(self) -> None: - self._metadata: Dict[str, Any] = {} - - def search_httk(self, dtxsids: Sequence[str]) -> list[dict[str, Any]]: - self._metadata = {"status": 200} - return [{"kmp": 1.2, "unit": "1/hr"}] - - def search_cpdat( - self, vocab_name: str, dtxsids: Sequence[str] - ) -> list[dict[str, Any]]: - self._metadata = {"status": 200} - return [{"vocab": vocab_name, "label": "Consumer product"}] - - def search_qsurs(self, dtxsids: Sequence[str]) -> list[dict[str, Any]]: - self._metadata = {"status": 200} - return [{"probability": 0.42}] - - def search_exposures( - self, data_type: str, dtxsids: Sequence[str] - ) -> list[dict[str, Any]]: - self._metadata = {"status": 200} - return [{"dataset": data_type, "value": "offline"}] - - def get_last_metadata(self) -> Dict[str, Any]: - return dict(self._metadata) - - -class _OfflineCheminformaticsResource: - def __init__(self) -> None: - self._metadata: Dict[str, Any] = {} - - def search_toxprints(self, chemical: str) -> dict[str, Any]: - self._metadata = {"status": 200} - return {"toxprints": ["FP_001", "FP_057"]} - - def get_last_metadata(self) -> Dict[str, Any]: - return dict(self._metadata) - - -class OfflinePredictiveService(PredictiveServiceBase): - """Predictive service stub returning deterministic GenRA-like results.""" - - def __init__(self) -> None: - super().__init__( - config={ - "name": "Offline GenRA", - "version": "0.1", - "ad_model_name": "Offline GenRA", - } - ) - - def _predict_impl(self, request: PredictiveRequest) -> Dict[str, Any]: - return { - "prediction": "Read-across suggests low concern.", - "confidence": 0.82, - } - - def _check_ad_impl(self, request: PredictiveRequest) -> ADCheckResult: - return ADCheckResult(in_domain=True, confidence=0.85, details={"analogues": 4}) - - def _build_metadata( - self, request: PredictiveRequest, ad_result: ADCheckResult - ) -> Dict[str, Any]: - metadata = super()._build_metadata(request, ad_result) - metadata.update( - { - "analogueCoverage": 0.88, - "evidenceQuality": 0.74, - "predictiveAgreement": ad_result.confidence, - } - ) - return metadata - - -def build_offline_orchestrator( - *, - persistence_dir: Optional[Path] = None, - clock: Optional[Callable[[], str]] = None, -) -> GenRAOrchestrator: - """Construct an orchestrator wired with offline stub resources.""" - resolver = IdentifierResolver( - chemical_resource=_OfflineChemicalResource(), cache_ttl=0 - ) - assembler = CtxDataAssembler( - hazard_resource=_OfflineHazardResource(), - exposure_resource=_OfflineExposureResource(), - cheminformatics_resource=_OfflineCheminformaticsResource(), - include_toxprints=True, - cache_ttl=0, - ) - predictive_service = OfflinePredictiveService() - coordinator = PredictiveCoordinator({"offline_genra": predictive_service}) - return GenRAOrchestrator( - identifier_resolver=resolver, - ctx_data_assembler=assembler, - predictive_coordinator=coordinator, - persistence_dir=persistence_dir, - evidence_synthesizer=EvidenceSynthesizer(), - clock=clock or (lambda: ""), - ) - - -__all__ = [ - "OFFLINE_SCENARIOS", - "build_offline_orchestrator", - "OfflinePredictiveService", -] - -==================================================================================================== -FILE: src/epacomp_tox/orchestrator/predictive.py -==================================================================================================== -from __future__ import annotations - -from datetime import datetime, timezone -from typing import Dict, Iterable, List, Optional - -from epacomp_tox.predictive import ( - ADCheckResult, - PredictiveRequest, - PredictiveResponse, - PredictiveServiceBase, -) - -from .models import ( - GuardrailEvent, - PredictiveRunResult, - PredictiveStepResult, - PredictiveTask, -) - - -class PredictiveCoordinator: - """Coordinate predictive micro-service execution with applicability guardrails.""" - - def __init__( - self, - services: Dict[str, PredictiveServiceBase], - *, - default_require_ad_clearance: bool = True, - stage_name: str = "RunPredictiveModels", - ) -> None: - self._services = dict(services) - self.default_require_ad_clearance = default_require_ad_clearance - self.stage_name = stage_name - - def register_service(self, name: str, service: PredictiveServiceBase) -> None: - """Register or replace a predictive service.""" - self._services[name] = service - - def run( - self, - tasks: Iterable[PredictiveTask], - *, - require_ad_clearance: Optional[bool] = None, - ) -> PredictiveRunResult: - """Execute predictive tasks and aggregate guardrail events.""" - require = ( - self.default_require_ad_clearance - if require_ad_clearance is None - else require_ad_clearance - ) - results: List[PredictiveStepResult] = [] - guardrails: List[GuardrailEvent] = [] - succeeded = True - - for task in tasks: - service = self._ensure_service(task.service) - ad_result: Optional[ADCheckResult] = None - try: - ad_result = service.check_applicability_domain(task.request) - except Exception as exc: # pragma: no cover - defensive - succeeded = False - results.append( - PredictiveStepResult( - service=task.service, - status="error", - scenario=task.scenario, - label=task.label, - request=task.request, - error=str(exc), - ) - ) - guardrails.append( - self._make_guardrail_event( - component=task.service, - status="error", - code=self._resolve_error_code(service), - message=f"Applicability domain check failed ({exc})", - confidence=None, - metadata={"stage": "check_applicability_domain"}, - ) - ) - continue - - policy = self._resolve_policy(service) - if not ad_result.in_domain and (require or policy == "block"): - succeeded = False - guardrails.append( - self._make_guardrail_event( - component=task.service, - status="denied", - code=self._resolve_error_code(service), - message="Applicability domain check failed.", - confidence=ad_result.confidence, - metadata={"policy": policy}, - ) - ) - results.append( - PredictiveStepResult( - service=task.service, - status="denied", - scenario=task.scenario, - label=task.label, - request=task.request, - ad=ad_result, - metadata={"policy": policy}, - ) - ) - continue - - try: - prediction = service.predict(task.request) - except Exception as exc: # pragma: no cover - defensive - succeeded = False - guardrails.append( - self._make_guardrail_event( - component=task.service, - status="error", - code=self._resolve_error_code(service), - message=f"Prediction failed ({exc})", - confidence=ad_result.confidence if ad_result else None, - metadata={"policy": policy}, - ) - ) - results.append( - PredictiveStepResult( - service=task.service, - status="error", - scenario=task.scenario, - label=task.label, - request=task.request, - ad=ad_result, - error=str(exc), - metadata={"policy": policy}, - ) - ) - continue - - step_status = "success" - if not prediction.applicability_domain.in_domain: - guardrails.append( - self._make_guardrail_event( - component=task.service, - status="warning", - code=self._resolve_error_code(service), - message="Applicability domain warning.", - confidence=prediction.applicability_domain.confidence, - metadata={"policy": policy}, - ) - ) - if policy == "block": - step_status = "denied" - succeeded = False - - results.append( - PredictiveStepResult( - service=task.service, - status=step_status, - scenario=task.scenario, - label=task.label, - request=task.request, - ad=prediction.applicability_domain, - prediction=prediction.prediction, - metadata=prediction.metadata, - ) - ) - - return PredictiveRunResult( - results=results, guardrails=guardrails, succeeded=succeeded - ) - - # Internal utilities ----------------------------------------------------- - - def _ensure_service(self, name: str) -> PredictiveServiceBase: - if name not in self._services: - raise KeyError(f"Predictive service '{name}' is not registered.") - return self._services[name] - - def _resolve_policy(self, service: PredictiveServiceBase) -> str: - definition = getattr(service, "ad_definition", None) or {} - policy = definition.get("policy") if isinstance(definition, dict) else None - if isinstance(policy, str): - return policy.lower() - return "block" - - def _resolve_error_code(self, service: PredictiveServiceBase) -> Optional[str]: - definition = getattr(service, "ad_definition", None) or {} - if isinstance(definition, dict): - return definition.get("errorCode") - return None - - def _make_guardrail_event( - self, - *, - component: str, - status: str, - message: str, - code: Optional[str], - confidence: Optional[float], - metadata: Optional[Dict[str, str]] = None, - ) -> GuardrailEvent: - timestamp = datetime.now(timezone.utc).isoformat() - return GuardrailEvent( - stage=self.stage_name, - component=component, - status=status, - code=code, - message=message, - confidence=confidence, - timestamp=timestamp, - metadata=metadata or {}, - ) - -==================================================================================================== -FILE: src/epacomp_tox/orchestrator/utils.py -==================================================================================================== -from __future__ import annotations - -from typing import Any, Dict, Optional - - -def sanitize_metadata(metadata: Optional[Dict[str, Any]]) -> Dict[str, Any]: - """ - Convert transport/resource metadata into JSON-serializable primitives. - - ctxpy returns dataclass instances (e.g., RateLimitInfo) inside the metadata - payload. Downstream audit bundles expect plain dictionaries, so this helper - normalizes nested structures while preserving the original keys. - """ - - def _convert(value: Any) -> Any: - if hasattr(value, "__dataclass_fields__"): - return { - field: getattr(value, field) - for field in value.__dataclass_fields__.keys() # type: ignore[attr-defined] - } - if isinstance(value, dict): - return {key: _convert(val) for key, val in value.items()} - if isinstance(value, (list, tuple)): - return [_convert(item) for item in value] - return value - - if not metadata: - return {} - return {key: _convert(val) for key, val in metadata.items()} - -==================================================================================================== -FILE: src/epacomp_tox/orchestrator/workflow.py -==================================================================================================== -from __future__ import annotations - -import json -from dataclasses import asdict, is_dataclass -from pathlib import Path -from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence -from uuid import uuid4 - -from .audit import AuditBundleStore -from .ctx_data import CtxDataAssembler, CtxDataAssemblyError -from .evidence import EvidenceSynthesizer -from .identifiers import IdentifierResolutionError, IdentifierResolver -from .models import ( - CtxDataBundle, - EvidenceSynthesis, - GuardrailEvent, - IdentifierResolution, - MetadataTrace, - PredictiveRunResult, - PredictiveStepResult, - PredictiveTask, -) -from .predictive import PredictiveCoordinator -from .utils import sanitize_metadata - - -def _serialize(obj: Any) -> Any: - if obj is None: - return None - if hasattr(obj, "model_dump"): - return obj.model_dump() - if is_dataclass(obj): - return asdict(obj) - if isinstance(obj, (list, tuple)): - return [_serialize(item) for item in obj] - if isinstance(obj, dict): - return {key: _serialize(val) for key, val in obj.items()} - return obj - - -class GenRAOrchestrator: - """Controller that ties identifier resolution, CTX data staging, and predictive runs.""" - - def __init__( - self, - *, - identifier_resolver: IdentifierResolver, - ctx_data_assembler: CtxDataAssembler, - predictive_coordinator: PredictiveCoordinator, - persistence_dir: Optional[Path] = None, - clock: Callable[[], str] = lambda: None, - evidence_synthesizer: Optional[EvidenceSynthesizer] = None, - ) -> None: - self.identifier_resolver = identifier_resolver - self.ctx_data_assembler = ctx_data_assembler - self.predictive_coordinator = predictive_coordinator - self.bundle_store = ( - AuditBundleStore(persistence_dir) if persistence_dir else None - ) - self._clock = clock - self.evidence_synthesizer = evidence_synthesizer or EvidenceSynthesizer() - - def run_workflow( - self, - *, - target_identifier: str, - identifier_type: Optional[str] = None, - scenarios: Optional[Sequence[str]] = None, - predictive_plan: Iterable[PredictiveTask], - workflow_run_id: Optional[str] = None, - options: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: - run_id = workflow_run_id or str(uuid4()) - options = options or {} - guardrails: List[GuardrailEvent] = [] - timeline: List[Dict[str, Any]] = [] - - resolution: IdentifierResolution - try: - resolution = self.identifier_resolver.resolve( - target_identifier, identifier_type - ) - timeline.append( - self._timeline_entry("NormalizeIdentifier", resolution.trace) - ) - except IdentifierResolutionError as exc: - guardrails.append( - GuardrailEvent( - stage="NormalizeIdentifier", - component="IdentifierResolver", - status="denied", - code="IDENTIFIER_NOT_RESOLVED", - message=str(exc), - confidence=None, - timestamp=self._clock() or "", - metadata={}, - ) - ) - return self._assemble_bundle( - run_id=run_id, - resolution=None, - ctx_bundle=None, - predictive_result=None, - guardrails=guardrails, - timeline=timeline, - scenarios=list(scenarios or []), - options=options, - status="denied", - ) - - ctx_bundle: CtxDataBundle - try: - ctx_bundle = self.ctx_data_assembler.assemble( - resolution.dtxsid, - scenarios=scenarios, - ) - timeline.append(self._timeline_entry("AssembleCtxData", ctx_bundle.trace)) - except CtxDataAssemblyError as exc: - guardrails.append( - GuardrailEvent( - stage="AssembleCtxData", - component="CtxDataAssembler", - status="error", - code="CTX_DATA_UNAVAILABLE", - message=str(exc), - confidence=None, - timestamp=self._clock() or "", - metadata={}, - ) - ) - return self._assemble_bundle( - run_id=run_id, - resolution=resolution, - ctx_bundle=None, - predictive_result=None, - guardrails=guardrails, - timeline=timeline, - scenarios=list(scenarios or []), - options=options, - status="error", - ) - - predictive_result: PredictiveRunResult = self.predictive_coordinator.run( - predictive_plan, - require_ad_clearance=options.get("requireAdClearance"), - ) - guardrails.extend(predictive_result.guardrails) - timeline.append( - { - "stage": "RunPredictiveModels", - "metadata": [ - self._result_metadata(step) for step in predictive_result.results - ], - } - ) - - status = "success" if predictive_result.succeeded else "error" - evidence = self.evidence_synthesizer.synthesize(predictive_result.results) - - bundle = self._assemble_bundle( - run_id=run_id, - resolution=resolution, - ctx_bundle=ctx_bundle, - predictive_result=predictive_result, - evidence=evidence, - guardrails=guardrails, - timeline=timeline, - scenarios=list(scenarios or []), - options=options, - status=status, - ) - storage = self._persist_bundle( - bundle, - ctx_bundle=ctx_bundle, - predictive_result=predictive_result, - evidence=evidence, - ) - if storage: - bundle["storage"] = storage - return bundle - - # Internal helpers ----------------------------------------------------- - - def _timeline_entry( - self, stage: str, trace: Sequence[MetadataTrace] - ) -> Dict[str, Any]: - return { - "stage": stage, - "metadata": [_serialize(item) for item in trace], - } - - def _result_metadata(self, step: PredictiveStepResult) -> Dict[str, Any]: - payload = { - "service": step.service, - "status": step.status, - "scenario": step.scenario, - "label": step.label, - "metadata": step.metadata, - } - if step.ad: - payload["ad"] = step.ad.model_dump() - return payload - - def _assemble_bundle( - self, - *, - run_id: str, - resolution: Optional[IdentifierResolution], - ctx_bundle: Optional[CtxDataBundle], - predictive_result: Optional[PredictiveRunResult], - evidence: Optional[EvidenceSynthesis], - guardrails: Sequence[GuardrailEvent], - timeline: Sequence[Dict[str, Any]], - scenarios: List[str], - options: Dict[str, Any], - status: str, - ) -> Dict[str, Any]: - bundle: Dict[str, Any] = { - "bundleVersion": "0.1", - "workflowRunId": run_id, - "status": status, - "scenarios": scenarios, - "options": options, - "guardrails": [_serialize(item) for item in guardrails], - "timeline": timeline, - } - - if resolution: - bundle["target"] = { - "dtxsid": resolution.dtxsid, - "inputIdentifier": { - "value": resolution.input_identifier, - "type": resolution.input_type, - }, - "preferredName": resolution.preferred_name, - "casrn": resolution.casrn, - "synonyms": resolution.synonyms, - "warnings": resolution.warnings, - } - - if ctx_bundle: - bundle["ctxData"] = { - "hazard": ctx_bundle.hazard, - "exposure": ctx_bundle.exposure, - "cheminformatics": ctx_bundle.cheminformatics, - "dataGaps": ctx_bundle.data_gaps, - } - - if predictive_result: - bundle["predictive"] = { - "results": [ - { - "service": step.service, - "status": step.status, - "scenario": step.scenario, - "label": step.label, - "request": step.request.model_dump(), - "ad": step.ad.model_dump() if step.ad else None, - "prediction": step.prediction, - "metadata": sanitize_metadata(step.metadata), - "error": step.error, - } - for step in predictive_result.results - ], - } - - if evidence: - bundle["evidence"] = { - "confidenceBand": evidence.confidence_band, - "scores": evidence.scores.model_dump(), - "narrative": evidence.narrative, - "recommendedActions": evidence.recommended_actions, - } - - return bundle - - def _persist_bundle( - self, - bundle: Dict[str, Any], - *, - ctx_bundle: Optional[CtxDataBundle], - predictive_result: Optional[PredictiveRunResult], - evidence: Optional[EvidenceSynthesis], - ) -> Optional[Dict[str, Any]]: - if not self.bundle_store: - return None - attachments: Dict[str, str] = {} - if ctx_bundle: - attachments["ctx_data.json"] = json.dumps( - _serialize(ctx_bundle), indent=2, sort_keys=True - ) - if predictive_result: - attachments["predictive_results.json"] = json.dumps( - _serialize(predictive_result), - indent=2, - sort_keys=True, - ) - if evidence: - attachments["evidence.json"] = json.dumps( - _serialize(evidence), indent=2, sort_keys=True - ) - return self.bundle_store.save(bundle, attachments=attachments) - -==================================================================================================== -FILE: src/epacomp_tox/predictive/__init__.py -==================================================================================================== -"""Predictive micro-service utilities.""" - -from .base import ( - ADCheckResult, - PredictiveRequest, - PredictiveResponse, - PredictiveServiceBase, -) -from .clients import PredictiveClient -from .genra_service import GenRAService -from .opera_service import OperaPropertyService -from .router import build_predictive_router -from .test_service import TestConsensusPredictiveService - -__all__ = [ - "PredictiveServiceBase", - "PredictiveRequest", - "PredictiveResponse", - "ADCheckResult", - "PredictiveClient", - "TestConsensusPredictiveService", - "OperaPropertyService", - "GenRAService", - "build_predictive_router", -] - -==================================================================================================== -FILE: src/epacomp_tox/predictive/base.py -==================================================================================================== -from __future__ import annotations - -import logging -from abc import ABC, abstractmethod -from typing import Any, Dict, Optional - -from pydantic import BaseModel - -logger = logging.getLogger(__name__) - -from epacomp_tox.metadata.applicability import ApplicabilityDomainStore - - -class PredictiveRequest(BaseModel): - """Base request model for predictive micro-servers.""" - - chemical_identifier: str - identifier_type: str = "dtxsid" - - -class ADCheckResult(BaseModel): - """Standard response for applicability domain evaluations.""" - - in_domain: bool - confidence: float - details: Dict[str, Any] = {} - - -class PredictiveResponse(BaseModel): - """Standardized predictive response envelope.""" - - prediction: Dict[str, Any] - applicability_domain: ADCheckResult - metadata: Dict[str, Any] = {} - - -class PredictiveServiceBase(ABC): - """Shared scaffolding for predictive micro-servers.""" - - def __init__( - self, - *, - config: Dict[str, Any], - ad_store: Optional[ApplicabilityDomainStore] = None, - ) -> None: - self.config = config - self.logger = logger.getChild(self.__class__.__name__) - self.ad_store = ad_store or ApplicabilityDomainStore() - self.ad_definition = self._resolve_ad_definition() - - def predict(self, request: PredictiveRequest) -> PredictiveResponse: - """Run applicability domain check, prediction, and assemble response.""" - ad_result = self.check_applicability_domain(request) - policy_metadata = self._apply_ad_policy(request, ad_result) - payload = self._predict_impl(request) - metadata = self._build_metadata(request, ad_result) - metadata.update(policy_metadata) - return PredictiveResponse( - prediction=payload, - applicability_domain=ad_result, - metadata=metadata, - ) - - def check_applicability_domain(self, request: PredictiveRequest) -> ADCheckResult: - """Evaluate whether the request falls within the validated domain.""" - return self._check_ad_impl(request) - - @abstractmethod - def _predict_impl(self, request: PredictiveRequest) -> Dict[str, Any]: - """Model-specific prediction.""" - - @abstractmethod - def _check_ad_impl(self, request: PredictiveRequest) -> ADCheckResult: - """Model-specific AD evaluation.""" - - def _build_metadata( - self, request: PredictiveRequest, ad_result: ADCheckResult - ) -> Dict[str, Any]: - """Hook for adding provenance/telemetry to responses.""" - metadata: Dict[str, Any] = { - "identifier": request.chemical_identifier, - "identifier_type": request.identifier_type, - "model": self.config.get("name"), - "model_version": self.config.get("version"), - } - if self.ad_definition: - metadata["adPolicy"] = self.ad_definition.get("policy") - metadata["adErrorCode"] = self.ad_definition.get("errorCode") - metadata["adDefinition"] = { - "model": self.ad_definition.get("model"), - "version": self.ad_definition.get("version"), - } - return metadata - - def _resolve_ad_definition(self) -> Optional[Dict[str, Any]]: - target = self.config.get("ad_model_name") or self.config.get("name") - if not target: - return None - definition = self.ad_store.get_definition(target) - if not definition: - self.logger.debug("No AD definition found for %s", target) - return definition - - def _apply_ad_policy( - self, request: PredictiveRequest, ad_result: ADCheckResult - ) -> Dict[str, Any]: - definition = self.ad_definition or {} - policy = (definition.get("policy") or "block").lower() - metadata: Dict[str, Any] = {} - if not ad_result.in_domain: - message = ( - f"Applicability domain check failed for {request.chemical_identifier}" - ) - error_code = definition.get("errorCode") - if policy == "block": - raise ValueError(error_code or message) - if policy == "warn": - metadata["adWarning"] = True - metadata["adMessage"] = error_code or message - self.logger.warning("%s", metadata["adMessage"]) - else: - # Unknown policy defaults to block - raise ValueError(error_code or message) - return metadata - -==================================================================================================== -FILE: src/epacomp_tox/predictive/clients.py -==================================================================================================== -from __future__ import annotations - -from abc import ABC, abstractmethod -from typing import Any - -from epacomp_tox.predictive.base import ADCheckResult, PredictiveRequest - - -class PredictiveClient(ABC): - """Minimal client interface for predictive services.""" - - @abstractmethod - def predict(self, request: PredictiveRequest) -> dict[str, Any]: - """Execute model prediction.""" - - @abstractmethod - def check_applicability_domain(self, request: PredictiveRequest) -> ADCheckResult: - """Evaluate applicability domain for the request.""" - -==================================================================================================== -FILE: src/epacomp_tox/predictive/genra_service.py -==================================================================================================== -from __future__ import annotations - -from typing import Any, Dict, Optional - -from epacomp_tox.metadata.applicability import ApplicabilityDomainStore -from epacomp_tox.predictive.base import ( - ADCheckResult, - PredictiveRequest, - PredictiveServiceBase, -) -from epacomp_tox.predictive.clients import PredictiveClient - - -class GenRAClient(PredictiveClient): - """Wrapper interface for GenRA analogue search + prediction service.""" - - def __init__(self, client: Any) -> None: - self.client = client - - def predict(self, request: PredictiveRequest) -> Dict[str, Any]: - return self.client.predict( - chemical=request.chemical_identifier, - identifier_type=request.identifier_type, - ) - - def check_applicability_domain(self, request: PredictiveRequest) -> ADCheckResult: - result = self.client.check_applicability_domain( - chemical=request.chemical_identifier, - identifier_type=request.identifier_type, - ) - return ADCheckResult( - in_domain=result.get("in_domain", False), - confidence=result.get("confidence", 0.0), - details=result, - ) - - -class GenRAService(PredictiveServiceBase): - """Predictive service wrapper for the GenRA read-across workflow.""" - - def __init__( - self, - *, - config: Dict[str, Any], - client: Optional[PredictiveClient] = None, - ad_store: Optional[ApplicabilityDomainStore] = None, - ) -> None: - super().__init__(config=config, ad_store=ad_store) - self.client = client - - def _ensure_client(self) -> PredictiveClient: - if self.client is None: - raise RuntimeError("GenRA client not configured") - return self.client - - def _predict_impl(self, request: PredictiveRequest) -> Dict[str, Any]: - client = self._ensure_client() - return client.predict(request) - - def _check_ad_impl(self, request: PredictiveRequest) -> ADCheckResult: - client = self._ensure_client() - return client.check_applicability_domain(request) - -==================================================================================================== -FILE: src/epacomp_tox/predictive/opera_service.py -==================================================================================================== -from __future__ import annotations - -from typing import Any, Dict, Optional - -from epacomp_tox.metadata.applicability import ApplicabilityDomainStore -from epacomp_tox.predictive.base import ( - ADCheckResult, - PredictiveRequest, - PredictiveServiceBase, -) -from epacomp_tox.predictive.clients import PredictiveClient - - -class OperaClient(PredictiveClient): - """Wrapper around OPERA CLI/API integration.""" - - def __init__(self, client: Any) -> None: - self.client = client - - def predict(self, request: PredictiveRequest) -> Dict[str, Any]: - payload = self.client.predict_property( - chemical=request.chemical_identifier, - identifier_type=request.identifier_type, - ) - return payload - - def check_applicability_domain(self, request: PredictiveRequest) -> ADCheckResult: - result = self.client.check_applicability_domain( - chemical=request.chemical_identifier, - identifier_type=request.identifier_type, - ) - return ADCheckResult( - in_domain=result.get("in_domain", False), - confidence=result.get("confidence", 0.0), - details=result, - ) - - -class OperaPropertyService(PredictiveServiceBase): - """Predictive service wrapper for OPERA property models.""" - - def __init__( - self, - *, - config: Dict[str, Any], - client: Optional[PredictiveClient] = None, - ad_store: Optional[ApplicabilityDomainStore] = None, - ) -> None: - super().__init__(config=config, ad_store=ad_store) - self.client = client - - def _ensure_client(self) -> PredictiveClient: - if self.client is None: - raise RuntimeError("OPERA client not configured") - return self.client - - def _predict_impl(self, request: PredictiveRequest) -> Dict[str, Any]: - client = self._ensure_client() - return client.predict(request) - - def _check_ad_impl(self, request: PredictiveRequest) -> ADCheckResult: - client = self._ensure_client() - return client.check_applicability_domain(request) - -==================================================================================================== -FILE: src/epacomp_tox/predictive/router.py -==================================================================================================== -from __future__ import annotations - -from typing import Callable, Dict, Optional - -from fastapi import APIRouter, Depends, HTTPException, status - -from epacomp_tox.contracts import validate_payload -from epacomp_tox.predictive.base import ( - ADCheckResult, - PredictiveRequest, - PredictiveResponse, - PredictiveServiceBase, -) - -PREDICT_RESPONSE_SCHEMA = ("predictive", "predict.response.schema") -AD_RESPONSE_SCHEMA = ("predictive", "ad_check.response.schema") - - -def build_predictive_router( - *, - service_factory: Callable[[], PredictiveServiceBase], - prefix: str = "", - tags: Optional[list[str]] = None, -) -> APIRouter: - """Construct a router exposing predict and AD check endpoints.""" - router = APIRouter(prefix=prefix, tags=tags or ["predictive"]) - - async def get_service() -> PredictiveServiceBase: - return service_factory() - - @router.post( - "/predict", - response_model=PredictiveResponse, - summary="Run predictive model with applicability domain enforcement", - ) - async def predict_endpoint( - body: PredictiveRequest, service: PredictiveServiceBase = Depends(get_service) - ) -> PredictiveResponse: - try: - response = service.predict(body) - validate_payload( - response.model_dump(), - namespace=PREDICT_RESPONSE_SCHEMA[0], - name=PREDICT_RESPONSE_SCHEMA[1], - ) - return response - except ValueError as exc: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc) - ) from exc - - @router.post( - "/check_applicability_domain", - response_model=ADCheckResult, - summary="Evaluate applicability domain for the given request", - ) - async def ad_endpoint( - body: PredictiveRequest, service: PredictiveServiceBase = Depends(get_service) - ) -> ADCheckResult: - result = service.check_applicability_domain(body) - validate_payload( - result.model_dump(), - namespace=AD_RESPONSE_SCHEMA[0], - name=AD_RESPONSE_SCHEMA[1], - ) - return result - - return router - -==================================================================================================== -FILE: src/epacomp_tox/predictive/test_service.py -==================================================================================================== -from __future__ import annotations - -from typing import Any, Dict, Optional - -from ctxpy import CtxApiError -from epacomp_tox.metadata.applicability import ApplicabilityDomainStore -from epacomp_tox.predictive.base import ( - ADCheckResult, - PredictiveRequest, - PredictiveServiceBase, -) -from epacomp_tox.predictive.clients import PredictiveClient - - -class TestClient(PredictiveClient): - """Wrapper around ctxpy TEST client.""" - - def __init__(self, client: Any) -> None: - self.client = client - - def predict(self, request: PredictiveRequest) -> Dict[str, Any]: - try: - return self.client.predict( - chemical=request.chemical_identifier, - identifier_type=request.identifier_type, - ) - except CtxApiError as exc: # pragma: no cover - passthrough - raise ValueError(f"TEST prediction failed: {exc}") from exc - - def check_applicability_domain(self, request: PredictiveRequest) -> ADCheckResult: - result = self.client.check_applicability_domain( - chemical=request.chemical_identifier, - identifier_type=request.identifier_type, - ) - return ADCheckResult( - in_domain=result.get("in_domain", False), - confidence=result.get("confidence", 0.0), - details=result, - ) - - -class TestConsensusPredictiveService(PredictiveServiceBase): - """Predictive service wrapper for TEST consensus toxicity models.""" - - def __init__( - self, - *, - config: Dict[str, Any], - client: Optional[PredictiveClient] = None, - ad_store: Optional[ApplicabilityDomainStore] = None, - ) -> None: - super().__init__(config=config, ad_store=ad_store) - self.client = client - - def _ensure_client(self) -> PredictiveClient: - if self.client is None: - raise RuntimeError("TEST client not configured") - return self.client - - def _predict_impl(self, request: PredictiveRequest) -> Dict[str, Any]: - client = self._ensure_client() - payload = client.predict(request) - return payload - - def _check_ad_impl(self, request: PredictiveRequest) -> ADCheckResult: - client = self._ensure_client() - return client.check_applicability_domain(request) - - -TestConsensusPredictiveService.__test__ = False - -==================================================================================================== -FILE: src/epacomp_tox/metadata/__init__.py -==================================================================================================== -"""Metadata utilities for CompTox model cards.""" - -from .model_cards import ModelCardFilter, ModelCardStore - -__all__ = ["ModelCardStore", "ModelCardFilter"] - -==================================================================================================== -FILE: src/epacomp_tox/metadata/applicability.py -==================================================================================================== -from __future__ import annotations - -import json -from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Tuple - -DEFAULT_AD_DIR = Path(Path.cwd(), "metadata", "applicability_domains") - - -class ApplicabilityDomainStore: - """File-backed access to applicability domain reference data.""" - - def __init__(self, directory: Optional[Path] = None): - self.directory = Path(directory or DEFAULT_AD_DIR) - self.directory.mkdir(parents=True, exist_ok=True) - - def list_definitions( - self, - *, - limit: Optional[int] = None, - cursor: Optional[str] = None, - ) -> Tuple[List[Dict[str, Any]], Optional[str]]: - entries = list(self._iter_defs()) - start = int(cursor) if cursor else 0 - end = start + limit if limit else None - page = entries[start:end] - next_cursor = None - if end is not None and end < len(entries): - next_cursor = str(end) - return page, next_cursor - - def get_definition(self, model_name: str) -> Optional[Dict[str, Any]]: - model_name_lower = model_name.lower() - for entry in self._iter_defs(): - if entry["model"].lower() == model_name_lower: - return entry - return None - - def _iter_defs(self) -> Iterable[Dict[str, Any]]: - for path in sorted(self.directory.glob("*.json")): - try: - payload = json.loads(path.read_text(encoding="utf-8")) - except ( - OSError, - json.JSONDecodeError, - ): # pragma: no cover - logged upstream - continue - payload["path"] = str(path) - yield payload - -==================================================================================================== -FILE: src/epacomp_tox/metadata/model_cards.py -==================================================================================================== -from __future__ import annotations - -import hashlib -import json -from dataclasses import dataclass -from datetime import datetime -from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Tuple - -DEFAULT_MODEL_CARD_DIR = Path(Path.cwd(), "metadata", "model_cards") - - -@dataclass -class ModelCardFilter: - model_name: Optional[str] = None - endpoint_contains: Optional[str] = None - compliance: Optional[str] = None # "approved" or "draft" - - -class ModelCardStore: - """Simple file-backed store for CompTox model cards.""" - - def __init__(self, directory: Optional[Path] = None): - self.directory = Path(directory or DEFAULT_MODEL_CARD_DIR) - self.directory.mkdir(parents=True, exist_ok=True) - - def list_cards( - self, - *, - filters: Optional[ModelCardFilter] = None, - limit: Optional[int] = None, - cursor: Optional[str] = None, - ) -> Tuple[List[Dict[str, Any]], Optional[str]]: - entries = list(self._iter_cards()) - filtered = self._apply_filters(entries, filters) - start = int(cursor) if cursor else 0 - end = start + limit if limit else None - page = filtered[start:end] - next_cursor = None - if end is not None and end < len(filtered): - next_cursor = str(end) - return page, next_cursor - - def _iter_cards(self) -> Iterable[Dict[str, Any]]: - for path in sorted(self.directory.glob("*.json")): - try: - raw = path.read_text(encoding="utf-8") - payload = json.loads(raw) - except ( - OSError, - json.JSONDecodeError, - ): # pragma: no cover - logged upstream - continue - checksum = hashlib.sha256(raw.encode("utf-8")).hexdigest() - stat = path.stat() - yield { - "card": payload, - "checksum": checksum, - "path": str(path), - "lastModified": datetime.fromtimestamp(stat.st_mtime).isoformat(), - } - - @staticmethod - def _apply_filters( - entries: Iterable[Dict[str, Any]], filters: Optional[ModelCardFilter] - ) -> List[Dict[str, Any]]: - if not filters: - return list(entries) - result: List[Dict[str, Any]] = [] - for entry in entries: - card = entry["card"] - if filters.model_name: - model_name = card.get("modelDetails", {}).get("name", "") - if filters.model_name.lower() not in model_name.lower(): - continue - if filters.endpoint_contains: - endpoint = ( - card.get("oecdValidationPrinciples", {}) - .get("definedEndpoint", {}) - .get("description", "") - ) - if filters.endpoint_contains.lower() not in endpoint.lower(): - continue - if filters.compliance: - status = _compute_compliance_status(card) - if status != filters.compliance.lower(): - continue - result.append(entry) - return result - - -def _compute_compliance_status(card: Dict[str, Any]) -> str: - review = card.get("provenance", {}).get("reviewStatus", {}) - approved_by = review.get("approvedBy", []) - if approved_by: - return "approved" - return "draft" - -==================================================================================================== -FILE: src/epacomp_tox/metadata/validator.py -==================================================================================================== -from __future__ import annotations - -import json -from pathlib import Path -from typing import Iterable, List - -from jsonschema import ValidationError, validate - -DEFAULT_SCHEMA_PATH = Path("schemas/comptox_model_card.schema.json") -DEFAULT_CARDS_DIR = Path("metadata/model_cards") -DEFAULT_AD_DIR = Path("metadata/applicability_domains") - - -class MetadataValidationError(Exception): - """Raised when metadata validation fails.""" - - -def validate_model_cards( - *, - cards_dir: Path = DEFAULT_CARDS_DIR, - schema_path: Path = DEFAULT_SCHEMA_PATH, -) -> None: - schema = json.loads(schema_path.read_text(encoding="utf-8")) - errors: List[str] = [] - for path in sorted(cards_dir.glob("*.json")): - try: - payload = json.loads(path.read_text(encoding="utf-8")) - validate(instance=payload, schema=schema) - except (ValidationError, json.JSONDecodeError) as exc: - errors.append(f"{path}: {exc}") - if errors: - raise MetadataValidationError("\n".join(errors)) - - -def validate_applicability_domains(*, directory: Path = DEFAULT_AD_DIR) -> None: - required_fields = {"model", "version", "criteria", "policy"} - errors: List[str] = [] - for path in sorted(directory.glob("*.json")): - try: - payload = json.loads(path.read_text(encoding="utf-8")) - except json.JSONDecodeError as exc: - errors.append(f"{path}: invalid JSON: {exc}") - continue - missing = required_fields - set(payload.keys()) - if missing: - errors.append(f"{path}: missing fields {sorted(missing)}") - if not isinstance(payload.get("criteria"), list): - errors.append(f"{path}: 'criteria' must be a list") - if errors: - raise MetadataValidationError("\n".join(errors)) - - -def validate_all() -> None: - validate_model_cards() - validate_applicability_domains() - -==================================================================================================== -FILE: src/epacomp_tox/resources/base.py -==================================================================================================== -import random -import time -from abc import ABC, abstractmethod -from typing import Any, Callable, Dict, List, Optional - -from ctxpy import CtxApiError -from epacomp_tox.config import get_retry_config -from epacomp_tox.validators import ensure_list, ensure_object, to_serializable - - -class BaseResource(ABC): - """ - Base class for all MCP resources. - - A resource represents a collection of related data and functionality - from the EPA CompTox APIs. - """ - - def __init__(self, api_key: str): - """ - Initialize the resource. - - Args: - api_key: EPA CompTox API key. - """ - self.api_key = api_key - self._last_metadata: Dict[str, Any] = {} - - def _with_retry( - self, - fn: Callable[[], Any], - *, - retries: Optional[int] = None, - base_delay: Optional[float] = None, - ) -> Any: - """ - Call a function with basic exponential backoff and jitter on transient errors. - - Retries on generic Exceptions to avoid tight coupling to underlying HTTP client types. - """ - if retries is None or base_delay is None: - r, b = get_retry_config() - retries = retries if retries is not None else r - base_delay = base_delay if base_delay is not None else b - attempt = 0 - while True: - try: - result = fn() - self._capture_last_metadata() - return result - except CtxApiError as exc: - self._last_metadata = { - "status": exc.status, - "request_id": exc.request_id, - "rate_limit": exc.rate_limit, - "retry_after": exc.retry_after, - } - attempt += 1 - if attempt > retries or not exc.retryable: - raise - sleep_for = base_delay * (2 ** (attempt - 1)) - sleep_for = sleep_for * (0.8 + random.random() * 0.4) - time.sleep(sleep_for) - except Exception as e: - attempt += 1 - if attempt > retries: - raise - # Exponential backoff with jitter - sleep_for = base_delay * (2 ** (attempt - 1)) - sleep_for = sleep_for * (0.8 + random.random() * 0.4) - time.sleep(sleep_for) - - def _ensure_list(self, value: Any) -> List[Any]: - """Normalize value into a list that is JSON-serializable.""" - serialized = to_serializable(value) - return ensure_list(serialized) - - def _ensure_object(self, value: Any, *, allow_list: bool = False) -> Dict[str, Any]: - """Normalize value into a mapping; optionally wrap list responses.""" - serialized = to_serializable(value) - return ensure_object(serialized, allow_list=allow_list) - - def _capture_last_metadata(self) -> None: - client = getattr(self, "client", None) - if client is not None and hasattr(client, "last_metadata"): - self._last_metadata = client.last_metadata - - def get_last_metadata(self) -> Dict[str, Any]: - """Return metadata captured from the most recent CTX API call.""" - return self._last_metadata - - @property - @abstractmethod - def name(self) -> str: - """Get the resource name.""" - pass - - @property - @abstractmethod - def description(self) -> str: - """Get the resource description.""" - pass - - @abstractmethod - def get_tools(self) -> List[Dict[str, Any]]: - """ - Get a list of tools provided by this resource. - - Returns: - List of tool definitions. - """ - pass - - def has_tool(self, tool_name: str) -> bool: - """ - Check if this resource provides the given tool. - - Args: - tool_name: Name of the tool to check. - - Returns: - True if the tool is provided by this resource, False otherwise. - """ - return any(tool["name"] == tool_name for tool in self.get_tools()) - - @abstractmethod - def execute_tool(self, tool_name: str, parameters: Dict[str, Any]) -> Any: - """ - Execute a tool with the given parameters. - - Args: - tool_name: Name of the tool to execute. - parameters: Parameters for the tool. - - Returns: - Tool execution result. - - Raises: - ValueError: If the tool is not found or parameters are invalid. - """ - pass - -==================================================================================================== -FILE: src/epacomp_tox/resources/bioactivity.py -==================================================================================================== -import logging -from typing import Any, Dict, List, Optional - -import ctxpy as ctx -from epacomp_tox.contracts import schema_ref -from epacomp_tox.validators import to_serializable - -from .base import BaseResource - -logger = logging.getLogger(__name__) - - -class BioactivityResource(BaseResource): - """MCP resource exposing CTX Bioactivity endpoints.""" - - @property - def name(self) -> str: - return "bioactivity" - - @property - def description(self) -> str: - return "Access to ToxCast/Tox21 bioactivity data, assays, models, and AOP crosswalks" - - def __init__(self, api_key: str): - super().__init__(api_key) - - # Increase upstream timeout for slow queries - UPSTREAM_TIMEOUT = 120.0 - try: - self.client = ctx.Bioactivity(x_api_key=api_key, timeout=UPSTREAM_TIMEOUT) - logger.info( - f"Successfully initialized ctx.Bioactivity with timeout={UPSTREAM_TIMEOUT}s" - ) - except TypeError as e: - logger.warning( - f"Could not set timeout for ctx.Bioactivity (TypeError: {e}). Using default timeout." - ) - self.client = ctx.Bioactivity(x_api_key=api_key) - - def get_tools(self) -> List[Dict[str, Any]]: - tools: List[Dict[str, Any]] = [ - { - "name": "search_bioactivity_terms", - "description": "Search bioactivity terms by prefix, exact match, or substring", - "parameters": { - "type": "object", - "properties": { - "search_type": { - "type": "string", - "enum": ["equals", "starts-with", "contains"], - "description": "Search mode to use", - }, - "value": { - "type": "string", - "description": "Term to search for", - }, - }, - "required": ["search_type", "value"], - }, - }, - { - "name": "get_bioactivity_summary_by_dtxsid", - "description": "Fetch bioactivity summary data for a chemical", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "DSSTox Substance Identifier", - } - }, - "required": ["dtxsid"], - }, - }, - { - "name": "get_bioactivity_summary_by_aeid", - "description": "Fetch bioactivity summary data for an assay endpoint ID (AEID)", - "parameters": { - "type": "object", - "properties": { - "aeid": { - "type": "string", - "description": "Assay endpoint identifier", - } - }, - "required": ["aeid"], - }, - }, - { - "name": "get_bioactivity_summary_by_tissue", - "description": "Fetch bioactivity summary data for a chemical in a specific tissue", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "DSSTox Substance Identifier", - }, - "tissue": { - "type": "string", - "description": "Tissue of origin (e.g., liver)", - }, - }, - "required": ["dtxsid", "tissue"], - }, - }, - { - "name": "get_bioactivity_data", - "description": "Retrieve detailed bioactivity data for a single identifier", - "parameters": { - "type": "object", - "properties": { - "identifier_type": { - "type": "string", - "enum": ["spid", "m4id", "dtxsid", "aeid"], - "description": "Identifier category", - }, - "identifier": { - "type": "string", - "description": "Identifier value", - }, - "projection": { - "type": "string", - "description": "Optional projection (e.g., toxcast-summary-plot)", - }, - }, - "required": ["identifier_type", "identifier"], - }, - }, - { - "name": "batch_get_bioactivity_data", - "description": "Batch fetch bioactivity data for multiple identifiers", - "parameters": { - "type": "object", - "properties": { - "identifier_type": { - "type": "string", - "enum": ["spid", "m4id", "dtxsid", "aeid"], - "description": "Identifier category", - }, - "identifiers": { - "type": "array", - "items": {"type": "string"}, - "minItems": 1, - "description": "Identifiers to request (max 200 per batch)", - }, - }, - "required": ["identifier_type", "identifiers"], - }, - }, - { - "name": "get_bioactivity_aed", - "description": "Retrieve Activity Exposure Distribution (AED) data for a chemical", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "DSSTox Substance Identifier", - } - }, - "required": ["dtxsid"], - }, - }, - { - "name": "batch_get_bioactivity_aed", - "description": "Batch retrieve AED data for multiple chemicals", - "parameters": { - "type": "object", - "properties": { - "dtxsids": { - "type": "array", - "items": {"type": "string"}, - "minItems": 1, - "description": "DSSTox IDs to request (max 200 per batch)", - } - }, - "required": ["dtxsids"], - }, - }, - { - "name": "get_bioactivity_assay", - "description": "Retrieve assay annotations or lists (by AEID, gene, single-concentration, or all)", - "parameters": { - "type": "object", - "properties": { - "mode": { - "type": "string", - "enum": ["all", "aeid", "gene", "single-concentration"], - "description": "Assay query type", - }, - "aeid": { - "type": "string", - "description": "Assay endpoint ID (required for aeid and single-concentration modes)", - }, - "gene_symbol": { - "type": "string", - "description": "Gene symbol (required for gene mode)", - }, - }, - "required": ["mode"], - }, - }, - { - "name": "batch_get_bioactivity_assay_annotations", - "description": "Batch retrieve assay annotations for AEIDs", - "parameters": { - "type": "object", - "properties": { - "aeids": { - "type": "array", - "items": {"type": "string"}, - "minItems": 1, - "description": "List of assay endpoint IDs", - } - }, - "required": ["aeids"], - }, - }, - { - "name": "get_bioactivity_assay_count", - "description": "Return the total count of available assays", - "parameters": {"type": "object", "properties": {}}, - }, - { - "name": "get_bioactivity_assay_chemicals", - "description": "Get chemicals associated with an assay endpoint", - "parameters": { - "type": "object", - "properties": { - "aeid": { - "type": "string", - "description": "Assay endpoint ID", - } - }, - "required": ["aeid"], - }, - }, - { - "name": "get_bioactivity_aop", - "description": "Retrieve adverse outcome pathway mappings", - "parameters": { - "type": "object", - "properties": { - "lookup_type": { - "type": "string", - "enum": ["toxcast-aeid", "event-number", "entrez-gene-id"], - "description": "AOP lookup type", - }, - "identifier": { - "type": "string", - "description": "Identifier value matching the lookup type", - }, - }, - "required": ["lookup_type", "identifier"], - }, - }, - { - "name": "get_bioactivity_analytical_qc", - "description": "Retrieve analytical QC data for a chemical", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "DSSTox Substance Identifier", - } - }, - "required": ["dtxsid"], - }, - }, - ] - - list_schema = schema_ref("common", "list_generic.response.schema") - schema_map = { - "search_bioactivity_terms": ( - "bioactivity", - "search_bioactivity_terms.response.schema", - ), - "get_bioactivity_summary_by_dtxsid": ( - "bioactivity", - "get_bioactivity_summary_by_dtxsid.response.schema", - ), - "get_bioactivity_assay": ( - "bioactivity", - "get_bioactivity_assay.response.schema", - ), - "get_bioactivity_aop": ( - "bioactivity", - "get_bioactivity_aop.response.schema", - ), - "get_bioactivity_assay_count": ("common", "object.response.schema"), - } - for tool in tools: - schema_info = schema_map.get(tool["name"]) - if schema_info: - tool["responseSchemaRef"] = schema_ref(*schema_info) - else: - tool["responseSchemaRef"] = list_schema - - # Ensure outputSchema is populated from the reference - if "responseSchemaRef" in tool: - from epacomp_tox.contracts import load_schema - - ref = tool["responseSchemaRef"] - tool["outputSchema"] = load_schema(ref["namespace"], ref["name"]) - - return tools - - def execute_tool(self, tool_name: str, parameters: Dict[str, Any]) -> Any: - if tool_name == "search_bioactivity_terms": - return self.search_bioactivity_terms( - search_type=parameters["search_type"], - value=parameters["value"], - ) - if tool_name == "get_bioactivity_summary_by_dtxsid": - return self.get_bioactivity_summary_by_dtxsid(parameters["dtxsid"]) - if tool_name == "get_bioactivity_summary_by_aeid": - return self.get_bioactivity_summary_by_aeid(parameters["aeid"]) - if tool_name == "get_bioactivity_summary_by_tissue": - return self.get_bioactivity_summary_by_tissue( - dtxsid=parameters["dtxsid"], - tissue=parameters["tissue"], - ) - if tool_name == "get_bioactivity_data": - return self.get_bioactivity_data( - identifier_type=parameters["identifier_type"], - identifier=parameters["identifier"], - projection=parameters.get("projection"), - ) - if tool_name == "batch_get_bioactivity_data": - return self.batch_get_bioactivity_data( - identifier_type=parameters["identifier_type"], - identifiers=parameters["identifiers"], - ) - if tool_name == "get_bioactivity_aed": - return self.get_bioactivity_aed(parameters["dtxsid"]) - if tool_name == "batch_get_bioactivity_aed": - return self.batch_get_bioactivity_aed(parameters["dtxsids"]) - if tool_name == "get_bioactivity_assay": - return self.get_bioactivity_assay( - mode=parameters["mode"], - aeid=parameters.get("aeid"), - gene_symbol=parameters.get("gene_symbol"), - ) - if tool_name == "batch_get_bioactivity_assay_annotations": - return self.batch_get_bioactivity_assay_annotations(parameters["aeids"]) - if tool_name == "get_bioactivity_assay_count": - return self.get_bioactivity_assay_count() - if tool_name == "get_bioactivity_assay_chemicals": - return self.get_bioactivity_assay_chemicals(parameters["aeid"]) - if tool_name == "get_bioactivity_aop": - return self.get_bioactivity_aop( - lookup_type=parameters["lookup_type"], - identifier=parameters["identifier"], - ) - if tool_name == "get_bioactivity_analytical_qc": - return self.get_bioactivity_analytical_qc(parameters["dtxsid"]) - raise ValueError(f"Unknown tool: {tool_name}") - - # Tool implementations ------------------------------------------------- - - def search_bioactivity_terms(self, search_type: str, value: str) -> List[Any]: - result = self._with_retry(lambda: self.client.search(search_type, value)) - return self._ensure_list(result) - - def get_bioactivity_models( - self, dtxsid: str, model: Optional[str] = None - ) -> List[Any]: - kwargs = {"dtxsid": dtxsid} - if model is not None: - kwargs["model"] = model - result = self._with_retry( - lambda: self.client.models_by_dtxsid_and_name(**kwargs) - ) - else: - result = self._with_retry(lambda: self.client.models_by_dtxsid(**kwargs)) - return self._ensure_list(result) - - def get_bioactivity_summary_by_dtxsid(self, dtxsid: str) -> List[Any]: - result = self._with_retry(lambda: self.client.data_summary_by_dtxsid(dtxsid)) - return self._ensure_list(result) - - def get_bioactivity_summary_by_aeid(self, aeid: str) -> List[Any]: - result = self._with_retry(lambda: self.client.data_summary_by_aeid(aeid)) - return self._ensure_list(result) - - def get_bioactivity_summary_by_tissue(self, dtxsid: str, tissue: str) -> List[Any]: - result = self._with_retry( - lambda: self.client.data_summary_by_tissue(dtxsid, tissue) - ) - return self._ensure_list(result) - - def get_bioactivity_data( - self, - identifier_type: str, - identifier: str, - projection: Optional[str] = None, - ) -> List[Any]: - norm = identifier_type.strip().lower() - kwargs = {"identifier": identifier} - if projection is not None: - kwargs["projection"] = projection - - if norm == "spid": - result = self._with_retry( - lambda: self.client.data_by_spid(kwargs["identifier"]) - ) - elif norm == "m4id": - result = self._with_retry( - lambda: self.client.data_by_m4id(kwargs["identifier"]) - ) - elif norm == "dtxsid": - result = self._with_retry(lambda: self.client.data_by_dtxsid(**kwargs)) - elif norm == "aeid": - result = self._with_retry(lambda: self.client.data_by_aeid(**kwargs)) - else: - raise ValueError( - "identifier_type must be one of spid, m4id, dtxsid, or aeid" - ) - return self._ensure_list(result) - - def batch_get_bioactivity_data( - self, identifier_type: str, identifiers: List[str] - ) -> List[Any]: - clean = [value for value in identifiers if value] - if not clean: - return [] - result = self._with_retry( - lambda: self.client.data_batch(identifier_type, clean) - ) - return self._ensure_list(result) - - def get_bioactivity_aed(self, dtxsid: str) -> List[Any]: - result = self._with_retry(lambda: self.client.aed_by_dtxsid(dtxsid)) - return self._ensure_list(result) - - def batch_get_bioactivity_aed(self, dtxsids: List[str]) -> List[Any]: - clean = [value for value in dtxsids if value] - if not clean: - return [] - result = self._with_retry(lambda: self.client.aed_batch(clean)) - return self._ensure_list(result) - - def get_bioactivity_assay( - self, - mode: str, - aeid: Optional[str] = None, - gene_symbol: Optional[str] = None, - ) -> Any: - normalized = mode.strip().lower() - kwargs = {} - if normalized == "all": - result = self._with_retry(self.client.assays_all) - elif normalized == "aeid": - if aeid is None: - raise ValueError("aeid is required when mode='aeid'") - kwargs["aeid"] = aeid - result = self._with_retry(lambda: self.client.assay_by_aeid(**kwargs)) - elif normalized == "single-concentration": - if aeid is None: - raise ValueError("aeid is required when mode='single-concentration'") - kwargs["aeid"] = aeid - result = self._with_retry( - lambda: self.client.assay_single_conc_by_aeid(**kwargs) - ) - elif normalized == "gene": - if gene_symbol is None: - raise ValueError("gene_symbol is required when mode='gene'") - kwargs["gene_symbol"] = gene_symbol - result = self._with_retry(lambda: self.client.assay_by_gene(**kwargs)) - else: - raise ValueError( - "mode must be one of all, aeid, single-concentration, or gene" - ) - return to_serializable(result) - - def batch_get_bioactivity_assay_annotations(self, aeids: List[str]) -> List[Any]: - clean = [value for value in aeids if value] - if not clean: - return [] - result = self._with_retry(lambda: self.client.assay_batch(clean)) - return self._ensure_list(result) - - def get_bioactivity_assay_count(self) -> Any: - result = self._with_retry(self.client.assay_count) - return to_serializable(result) - - def get_bioactivity_assay_chemicals(self, aeid: str) -> List[Any]: - result = self._with_retry(lambda: self.client.assay_chemicals_by_aeid(aeid)) - return self._ensure_list(result) - - def get_bioactivity_aop(self, lookup_type: str, identifier: str) -> List[Any]: - norm = lookup_type.strip().lower() - if norm == "toxcast-aeid": - result = self._with_retry( - lambda: self.client.aop_by_toxcast_aeid(identifier) - ) - elif norm == "event-number": - result = self._with_retry( - lambda: self.client.aop_by_event_number(identifier) - ) - elif norm == "entrez-gene-id": - result = self._with_retry( - lambda: self.client.aop_by_entrez_gene(identifier) - ) - else: - raise ValueError( - "lookup_type must be one of toxcast-aeid, event-number, or entrez-gene-id" - ) - return self._ensure_list(result) - - def get_bioactivity_analytical_qc(self, dtxsid: str) -> List[Any]: - result = self._with_retry(lambda: self.client.analytical_qc_by_dtxsid(dtxsid)) - return self._ensure_list(result) - -==================================================================================================== -FILE: src/epacomp_tox/resources/chemical.py -==================================================================================================== -import base64 -import logging -from typing import Any, Dict, List, Optional - -import ctxpy as ctx -from epacomp_tox.contracts import schema_ref -from epacomp_tox.validators import to_serializable - -from .base import BaseResource - -logger = logging.getLogger(__name__) - - -class ChemicalResource(BaseResource): - """ - MCP resource for EPA CompTox chemical data. - - Provides access to chemical structures, nomenclature, IDs, and properties. - """ - - @property - def name(self) -> str: - return "chemical" - - @property - def description(self) -> str: - return "Access to chemical structures, nomenclature, IDs, and properties" - - def __init__(self, api_key: str): - """ - Initialize the chemical resource. - - Args: - api_key: EPA CompTox API key. - """ - super().__init__(api_key) - - # --- START MODIFICATION: Increase Upstream Timeout --- - # The default timeout is too short for complex queries. - # Increase it significantly (e.g., 120 seconds). - UPSTREAM_TIMEOUT = 120.0 - - try: - # Attempt to initialize the client with the increased timeout. - # This assumes the ctxpy library accepts a 'timeout' argument. - self.client = ctx.Chemical(x_api_key=api_key, timeout=UPSTREAM_TIMEOUT) - logger.info( - f"Successfully initialized ctx.Chemical with timeout={UPSTREAM_TIMEOUT}s" - ) - - except TypeError as e: - # If ctxpy does not accept the 'timeout' argument, it raises a TypeError. - # Fall back to the original initialization and log a warning. - logger.warning( - f"Could not set timeout for ctx.Chemical (TypeError: {e}). Using default timeout. " - "Timeouts may still occur for slow queries. Check ctxpy documentation/version." - ) - self.client = ctx.Chemical(x_api_key=api_key) - # --- END MODIFICATION --- - - def get_tools(self) -> List[Dict[str, Any]]: - """ - Get a list of tools provided by this resource. - - Returns: - List of tool definitions. - """ - tools: List[Dict[str, Any]] = [ - { - "name": "search_chemical", - "description": "Search for chemicals by name, CAS-RN, or other identifiers", - "parameters": { - "type": "object", - "properties": { - "query": {"type": "string", "description": "Search term"}, - "search_type": { - "type": "string", - "description": "Search type: equals, starts-with, or contains", - "enum": ["equals", "starts-with", "contains"], - }, - }, - "required": ["query", "search_type"], - }, - }, - { - "name": "batch_search_chemical", - "description": "Batch search for chemicals using a list of identifiers", - "parameters": { - "type": "object", - "properties": { - "identifiers": { - "type": "array", - "items": {"type": "string"}, - "description": "Identifiers to search (DTXSIDs, CASRNs, names, etc.)", - } - }, - "required": ["identifiers"], - }, - }, - { - "name": "get_chemical_details", - "description": "Get detailed information about a chemical", - "parameters": { - "type": "object", - "properties": { - "identifier": { - "type": "string", - "description": "Chemical identifier (DTXSID or DTXCID)", - }, - "id_type": { - "type": "string", - "description": "Type of identifier", - "enum": ["dtxsid", "dtxcid"], - }, - "subset": { - "type": "string", - "description": "Optional subset selector for details", - "enum": [ - "default", - "all", - "details", - "identifiers", - "structures", - "nta", - ], - "default": "default", - }, - }, - "required": ["identifier", "id_type"], - }, - }, - { - "name": "batch_get_chemical_details", - "description": "Get detailed information about multiple chemicals", - "parameters": { - "type": "object", - "properties": { - "identifiers": { - "type": "array", - "items": {"type": "string"}, - "description": "List of chemical identifiers", - }, - "id_type": { - "type": "string", - "description": "Type of identifier", - "enum": ["dtxsid", "dtxcid"], - }, - "subset": { - "type": "string", - "description": "Optional subset selector for details", - "enum": [ - "default", - "all", - "details", - "identifiers", - "structures", - "nta", - ], - "default": "default", - }, - }, - "required": ["identifiers", "id_type"], - }, - }, - { - "name": "search_msready", - "description": "Search for chemicals by MS-ready properties", - "parameters": { - "type": "object", - "properties": { - "search_type": { - "type": "string", - "description": "Type of MS-ready search", - "enum": ["dtxcid", "formula", "mass-range"], - }, - "query": { - "type": "string", - "description": "Search term for dtxcid or formula", - }, - "mass_start": { - "type": "number", - "description": "Start of mass range for mass-range search", - }, - "mass_end": { - "type": "number", - "description": "End of mass range for mass-range search", - }, - }, - "required": ["search_type"], - }, - }, - ] - - # Property endpoints are not available in the current ctxpy client; excluded to avoid runtime 500s. - tools.extend( - [ - { - "name": "get_chemical_fate_summary", - "description": "Retrieve environmental fate summary for a chemical", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "DSSTox Substance Identifier", - }, - "property_name": { - "type": "string", - "description": "Optional fate property filter", - }, - }, - "required": ["dtxsid"], - }, - }, - { - "name": "get_chemical_fate_details", - "description": "Retrieve detailed environmental fate data for a chemical", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "DSSTox Substance Identifier", - } - }, - "required": ["dtxsid"], - }, - }, - { - "name": "get_chemical_extra_data", - "description": "Fetch extra chemical data (functional use, use cases, etc.)", - "parameters": { - "type": "object", - "properties": { - "dtxsids": { - "type": "array", - "items": {"type": "string"}, - "minItems": 1, - "description": "List of DSSTox Substance Identifiers", - } - }, - "required": ["dtxsids"], - }, - }, - { - "name": "opsin_convert_name", - "description": "Convert a systematic name using OPSIN", - "parameters": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "Systematic IUPAC name", - }, - "output_format": { - "type": "string", - "enum": ["smiles", "inchikey", "inchi"], - "description": "Desired representation", - }, - }, - "required": ["name", "output_format"], - }, - }, - { - "name": "indigo_convert_molfile", - "description": "Convert a molfile using Indigo toolkit endpoints", - "parameters": { - "type": "object", - "properties": { - "molfile": { - "type": "string", - "description": "Molfile contents (V2000/V3000)", - }, - "output_format": { - "type": "string", - "enum": [ - "smiles", - "inchikey", - "inchi", - "mol_v2000", - "mol_v3000", - "mol_weight", - "canonical_smiles", - ], - "description": "Desired transformation", - }, - }, - "required": ["molfile", "output_format"], - }, - }, - ] - ) - - schema_map = { - "search_chemical": ("chemical", "search_chemical.response.schema"), - "batch_search_chemical": ("chemical", "search_chemical.response.schema"), - "get_chemical_details": ("common", "object.response.schema"), - "batch_get_chemical_details": ("common", "list_generic.response.schema"), - "search_msready": ("common", "list_generic.response.schema"), - "get_chemical_fate_summary": ("common", "object.response.schema"), - "get_chemical_fate_details": ("common", "object.response.schema"), - "get_chemical_extra_data": ("common", "list_generic.response.schema"), - "opsin_convert_name": ("chemical", "opsin_convert.response.schema"), - "indigo_convert_molfile": ("chemical", "indigo_convert.response.schema"), - } - - for tool in tools: - schema_info = schema_map.get(tool["name"]) - if schema_info: - tool["responseSchemaRef"] = schema_ref(*schema_info) - - # Ensure outputSchema is populated from the reference - if "responseSchemaRef" in tool: - from epacomp_tox.contracts import load_schema - - ref = tool["responseSchemaRef"] - tool["outputSchema"] = load_schema(ref["namespace"], ref["name"]) - - return tools - - def execute_tool(self, tool_name: str, parameters: Dict[str, Any]) -> Any: - """ - Execute a tool with the given parameters. - - Args: - tool_name: Name of the tool to execute. - parameters: Parameters for the tool. - - Returns: - Tool execution result. - - Raises: - ValueError: If the tool is not found or parameters are invalid. - """ - if tool_name == "search_chemical": - return self.search_chemical( - query=parameters["query"], - search_type=parameters["search_type"], - ) - if tool_name == "batch_search_chemical": - return self.batch_search_chemical( - identifiers=parameters["identifiers"], - ) - if tool_name == "get_chemical_details": - return self.get_chemical_details( - identifier=parameters["identifier"], - id_type=parameters["id_type"], - subset=parameters.get("subset", "default"), - ) - if tool_name == "batch_get_chemical_details": - return self.batch_get_chemical_details( - identifiers=parameters["identifiers"], - id_type=parameters["id_type"], - subset=parameters.get("subset", "default"), - ) - if tool_name == "search_msready": - return self.search_msready( - search_type=parameters["search_type"], - query=parameters.get("query"), - mass_start=parameters.get("mass_start"), - mass_end=parameters.get("mass_end"), - ) - if tool_name == "get_chemical_fate_summary": - return self.get_chemical_fate_summary( - dtxsid=parameters["dtxsid"], - property_name=parameters.get("property_name"), - ) - if tool_name == "get_chemical_fate_details": - return self.get_chemical_fate_details(parameters["dtxsid"]) - if tool_name == "get_chemical_extra_data": - return self.get_chemical_extra_data(parameters["dtxsids"]) - if tool_name == "opsin_convert_name": - return self.opsin_convert_name( - name=parameters["name"], - output_format=parameters["output_format"], - ) - if tool_name == "indigo_convert_molfile": - return self.indigo_convert_molfile( - molfile=parameters["molfile"], - output_format=parameters["output_format"], - ) - raise ValueError(f"Unknown tool: {tool_name}") - - def search_chemical(self, query: str, search_type: str) -> List[Dict[str, Any]]: - """Search for chemicals by name, CAS-RN, or other identifiers.""" - result = self._with_retry( - lambda: self.client.search(by=search_type, word=query) - ) - return self._ensure_list(result) - - def batch_search_chemical(self, identifiers: List[str]) -> List[Dict[str, Any]]: - """Batch search for multiple chemical identifiers.""" - identifiers = [item for item in identifiers if item] - if not identifiers: - return [] - result = self._with_retry( - lambda: self.client.search(by="batch", word=identifiers) - ) - return self._ensure_list(result) - - def get_chemical_details( - self, identifier: str, id_type: str, subset: str = "default" - ) -> Dict[str, Any]: - """Get detailed information about a single chemical.""" - result = self._with_retry( - lambda: self.client.details(by=id_type, word=identifier, subset=subset) - ) - return self._ensure_object(result) - - def batch_get_chemical_details( - self, identifiers: List[str], id_type: str, subset: str = "default" - ) -> List[Dict[str, Any]]: - """Get detailed information about multiple chemicals.""" - identifiers = [item for item in identifiers if item] - if not identifiers: - return [] - result = self._with_retry( - lambda: self.client.details(by="batch", word=identifiers, subset=subset) - ) - return self._ensure_list(result) - - def search_msready( - self, - search_type: str, - query: Optional[str] = None, - mass_start: Optional[float] = None, - mass_end: Optional[float] = None, - ) -> List[Dict[str, Any]]: - """Search for chemicals by MS-ready properties or mass range.""" - normalized = search_type.strip().lower() - kwargs = {} - if normalized == "mass-range": - if mass_start is not None: - kwargs["start"] = mass_start - if mass_end is not None: - kwargs["end"] = mass_end - result = self._with_retry(lambda: self.client.msready(by="mass", **kwargs)) - else: - if query is not None: - kwargs["word"] = query - result = self._with_retry( - lambda: self.client.msready(by=search_type, **kwargs) - ) - return self._ensure_list(result) - - def _raise_properties_unavailable(self, tool_name: str) -> None: - """Helper to surface a clear error when property endpoints are unavailable.""" - raise NotImplementedError( - f"Chemical property tool '{tool_name}' is disabled: ctxpy client does not expose property endpoints." - ) - - def get_chemical_property_summary( - self, dtxsid: str, property_name: Optional[str] = None - ) -> Any: - self._raise_properties_unavailable("get_chemical_property_summary") - - def get_chemical_predicted_properties(self, dtxsid: str) -> List[Dict[str, Any]]: - self._raise_properties_unavailable("get_chemical_predicted_properties") - - def batch_get_chemical_predicted_properties( - self, dtxsids: List[str] - ) -> List[Dict[str, Any]]: - self._raise_properties_unavailable("batch_get_chemical_predicted_properties") - - def get_chemical_predicted_properties_by_range( - self, property_id: str, start: float, end: float - ) -> List[Dict[str, Any]]: - self._raise_properties_unavailable("get_chemical_predicted_properties_by_range") - - def get_chemical_experimental_properties(self, dtxsid: str) -> List[Dict[str, Any]]: - self._raise_properties_unavailable("get_chemical_experimental_properties") - - def batch_get_chemical_experimental_properties( - self, dtxsids: List[str] - ) -> List[Dict[str, Any]]: - self._raise_properties_unavailable("batch_get_chemical_experimental_properties") - - def get_chemical_experimental_properties_by_range( - self, property_name: str, start: float, end: float - ) -> List[Dict[str, Any]]: - self._raise_properties_unavailable( - "get_chemical_experimental_properties_by_range" - ) - - def list_chemical_property_names(self, property_type: str) -> List[str]: - self._raise_properties_unavailable("list_chemical_property_names") - - def get_chemical_fate_summary( - self, dtxsid: str, property_name: Optional[str] = None - ) -> Any: - kwargs = {"dtxsid": dtxsid} - if property_name is not None: - kwargs["prop_name"] = property_name - - result = self._with_retry(lambda: self.client.fate_summary(**kwargs)) - return to_serializable(result) - - def get_chemical_fate_details(self, dtxsid: str) -> Any: - result = self._with_retry(lambda: self.client.fate_details(dtxsid)) - return to_serializable(result) - - def get_chemical_extra_data(self, dtxsids: List[str]) -> List[Any]: - identifiers = [sid for sid in dtxsids if sid] - if not identifiers: - return [] - result = self._with_retry(lambda: self.client.extra_data_batch(identifiers)) - return self._ensure_list(result) - - def check_chemical_ghs_links( - self, source: str, dtxsids: List[str] - ) -> Dict[str, Any]: - identifiers = [sid for sid in dtxsids if sid] - if not identifiers: - return {"source": source, "results": []} - result = self._with_retry( - lambda: self.client.ghs_check_batch(source, identifiers) - ) - return { - "source": source, - "results": self._ensure_list(result), - } - - def opsin_convert_name(self, name: str, output_format: str) -> Dict[str, Any]: - result = self._with_retry( - lambda: self.client.opsin_convert(name, output=output_format) - ) - return { - "name": name, - "outputFormat": output_format, - "value": to_serializable(result), - } - - def indigo_convert_molfile( - self, molfile: str, output_format: str - ) -> Dict[str, Any]: - result = self._with_retry( - lambda: self.client.indigo_convert(molfile, output=output_format) - ) - converted = to_serializable(result) - return { - "outputFormat": output_format, - "value": converted, - } - - def get_chemical_structure_file( - self, - identifier_type: str, - identifier: str, - file_format: str, - image_format: Optional[str] = None, - ) -> Dict[str, Any]: - kwargs = { - "identifier_type": identifier_type, - "identifier": identifier, - "file_format": file_format, - } - if image_format is not None: - kwargs["image_format"] = image_format - - payload = self._with_retry(lambda: self.client.structure_file(**kwargs)) - # ... (rest of the method remains the same) - metadata = self.get_last_metadata() - content_type = metadata.get("content_type") if metadata else None - - if isinstance(payload, bytes): - # Ensure base64 is imported if needed - import base64 - - data = base64.b64encode(payload).decode("ascii") - encoding = "base64" - else: - data = to_serializable(payload) - encoding = "utf-8" - - response: Dict[str, Any] = { - "identifier": identifier, - "identifierType": identifier_type, - "fileFormat": file_format, - "encoding": encoding, - "data": data, - "length": len(payload) if isinstance(payload, (bytes, str)) else None, - } - if content_type: - response["contentType"] = content_type - if file_format.lower() == "image": - response["imageFormat"] = (image_format or "PNG").upper() - return response - -==================================================================================================== -FILE: src/epacomp_tox/resources/hazard.py -==================================================================================================== -import logging -from typing import Any, Dict, List - -import ctxpy as ctx -from epacomp_tox.contracts import schema_ref - -from .base import BaseResource - -logger = logging.getLogger(__name__) - - -class HazardResource(BaseResource): - """MCP resource exposing CTX hazard datasets (ToxValDB, ToxRefDB, cancer, genetox, ADME/IVIVE, IRIS, PPRTV, HAWC).""" - - _DATA_TYPE_ENUM = [ - "all", - "hazard", - "toxval", - "human", - "eco", - "skin-eye", - "cancer", - "genetox", - "adme", - "toxref", - "iris", - "pprtv", - "hawc", - ] - - @staticmethod - def _schema(namespace: str, name: str) -> Dict[str, str]: - return schema_ref(namespace, name) - - @property - def name(self) -> str: - return "hazard" - - @property - def description(self) -> str: - return ( - "Access to hazard datasets from the CTX APIs, including ToxValDB, ToxRefDB, cancer, " - "genetox, ADME/IVIVE, IRIS, PPRTV, and HAWC link mappers." - ) - - def __init__(self, api_key: str): - super().__init__(api_key) - # Increase upstream timeout for slow queries - UPSTREAM_TIMEOUT = 120.0 - try: - self.client = ctx.Hazard(x_api_key=api_key, timeout=UPSTREAM_TIMEOUT) - logger.info( - f"Successfully initialized ctx.Hazard with timeout={UPSTREAM_TIMEOUT}s" - ) - except TypeError as e: - logger.warning( - f"Could not set timeout for ctx.Hazard (TypeError: {e}). Using default timeout." - ) - self.client = ctx.Hazard(x_api_key=api_key) - - def _clean_identifiers(self, identifiers: List[str]) -> List[str]: - return [ - value.strip() - for value in identifiers - if isinstance(value, str) and value.strip() - ] - - def get_tools(self) -> List[Dict[str, Any]]: - tools: List[Dict[str, Any]] = [ - { - "name": "search_hazard", - "description": "Search for hazard data by DTXSID across ToxValDB, ToxRefDB, cancer, genetox, ADME/IVIVE, IRIS, PPRTV, or HAWC datasets.", - "parameters": { - "type": "object", - "properties": { - "data_type": { - "type": "string", - "description": "Hazard dataset to query.", - "enum": self._DATA_TYPE_ENUM, - }, - "dtxsid": { - "type": "string", - "description": "Chemical identifier (DTXSID).", - }, - "summary": { - "type": "boolean", - "description": "Whether to request summary (vs. detailed) data when supported.", - "default": True, - }, - }, - "required": ["data_type", "dtxsid"], - }, - }, - { - "name": "batch_search_hazard", - "description": "Batch hazard lookup for multiple DTXSIDs for the selected dataset.", - "parameters": { - "type": "object", - "properties": { - "data_type": { - "type": "string", - "description": "Hazard dataset to query.", - "enum": self._DATA_TYPE_ENUM, - }, - "dtxsids": { - "type": "array", - "items": {"type": "string"}, - "minItems": 1, - "description": "List of chemical identifiers (DTXSIDs).", - }, - "summary": { - "type": "boolean", - "description": "Whether to request summary (vs. detailed) data when supported.", - "default": True, - }, - }, - "required": ["data_type", "dtxsids"], - }, - }, - { - "name": "get_hazard_toxval", - "description": "Retrieve full ToxValDB hazard data for a single chemical.", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "Chemical identifier (DTXSID).", - } - }, - "required": ["dtxsid"], - }, - }, - { - "name": "batch_get_hazard_toxval", - "description": "Retrieve ToxValDB hazard data for multiple chemicals.", - "parameters": { - "type": "object", - "properties": { - "dtxsids": { - "type": "array", - "items": {"type": "string"}, - "minItems": 1, - "description": "Chemical identifiers (DTXSIDs).", - } - }, - "required": ["dtxsids"], - }, - }, - { - "name": "get_hazard_skin_eye", - "description": "Retrieve skin and eye hazard data for a single chemical.", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "Chemical identifier (DTXSID).", - } - }, - "required": ["dtxsid"], - }, - }, - { - "name": "batch_get_hazard_skin_eye", - "description": "Retrieve skin and eye hazard data for multiple chemicals.", - "parameters": { - "type": "object", - "properties": { - "dtxsids": { - "type": "array", - "items": {"type": "string"}, - "minItems": 1, - "description": "Chemical identifiers (DTXSIDs).", - } - }, - "required": ["dtxsids"], - }, - }, - { - "name": "get_hazard_cancer_summary", - "description": "Retrieve cancer hazard summary for a single chemical.", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "Chemical identifier (DTXSID).", - } - }, - "required": ["dtxsid"], - }, - }, - { - "name": "batch_get_hazard_cancer_summary", - "description": "Retrieve cancer hazard summary for multiple chemicals.", - "parameters": { - "type": "object", - "properties": { - "dtxsids": { - "type": "array", - "items": {"type": "string"}, - "minItems": 1, - "description": "Chemical identifiers (DTXSIDs).", - } - }, - "required": ["dtxsids"], - }, - }, - { - "name": "get_hazard_genetox_summary", - "description": "Retrieve genotoxicity summary data for a chemical.", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "Chemical identifier (DTXSID).", - } - }, - "required": ["dtxsid"], - }, - }, - { - "name": "batch_get_hazard_genetox_summary", - "description": "Retrieve genotoxicity summary data for multiple chemicals.", - "parameters": { - "type": "object", - "properties": { - "dtxsids": { - "type": "array", - "items": {"type": "string"}, - "minItems": 1, - "description": "Chemical identifiers (DTXSIDs).", - } - }, - "required": ["dtxsids"], - }, - }, - { - "name": "get_hazard_genetox_details", - "description": "Retrieve genotoxicity detailed data for a chemical.", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "Chemical identifier (DTXSID).", - } - }, - "required": ["dtxsid"], - }, - }, - { - "name": "batch_get_hazard_genetox_details", - "description": "Retrieve genotoxicity detailed data for multiple chemicals.", - "parameters": { - "type": "object", - "properties": { - "dtxsids": { - "type": "array", - "items": {"type": "string"}, - "minItems": 1, - "description": "Chemical identifiers (DTXSIDs).", - } - }, - "required": ["dtxsids"], - }, - }, - { - "name": "get_hazard_adme_ivive", - "description": "Retrieve ADME/IVIVE hazard data for a chemical.", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "Chemical identifier (DTXSID).", - } - }, - "required": ["dtxsid"], - }, - }, - { - "name": "get_hazard_pprtv", - "description": "Retrieve PPRTV hazard data for a chemical.", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "Chemical identifier (DTXSID).", - } - }, - "required": ["dtxsid"], - }, - }, - { - "name": "get_hazard_iris", - "description": "Retrieve IRIS hazard data for a chemical.", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "Chemical identifier (DTXSID).", - } - }, - "required": ["dtxsid"], - }, - }, - { - "name": "get_hazard_hawc", - "description": "Retrieve HAWC link mapper data for a chemical.", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "Chemical identifier (DTXSID).", - } - }, - "required": ["dtxsid"], - }, - }, - { - "name": "get_hazard_toxref", - "description": "Retrieve ToxRefDB data (summary, data, effects, or observations) by DTXSID, study ID, or study type.", - "parameters": { - "type": "object", - "properties": { - "dataset": { - "type": "string", - "enum": ["summary", "data", "effects", "observations"], - "description": "ToxRefDB dataset to query.", - }, - "lookup_type": { - "type": "string", - "enum": ["dtxsid", "study-id", "study-type"], - "description": "Lookup mode for the query.", - }, - "value": { - "type": "string", - "description": "Identifier corresponding to the selected lookup type.", - }, - }, - "required": ["dataset", "lookup_type", "value"], - }, - }, - { - "name": "batch_get_hazard_toxref", - "description": "Batch retrieve ToxRefDB data by DTXSID.", - "parameters": { - "type": "object", - "properties": { - "dtxsids": { - "type": "array", - "items": {"type": "string"}, - "minItems": 1, - "description": "Chemical identifiers (DTXSIDs).", - } - }, - "required": ["dtxsids"], - }, - }, - ] - for tool in tools: - if tool["name"] == "search_hazard": - tool["responseSchemaRef"] = self._schema( - "hazard", "search_hazard.response.schema" - ) - elif tool["name"] == "batch_search_hazard": - tool["responseSchemaRef"] = self._schema( - "hazard", "batch_search_hazard.response.schema" - ) - else: - tool["responseSchemaRef"] = self._schema( - "common", "list_generic.response.schema" - ) - - # Ensure outputSchema is populated from the reference - if "responseSchemaRef" in tool: - from epacomp_tox.contracts import load_schema - - ref = tool["responseSchemaRef"] - tool["outputSchema"] = load_schema(ref["namespace"], ref["name"]) - - return tools - - def execute_tool(self, tool_name: str, parameters: Dict[str, Any]) -> Any: - handlers = { - "search_hazard": lambda params: self.search_hazard( - data_type=params["data_type"], - dtxsid=params["dtxsid"], - summary=params.get("summary", True), - ), - "batch_search_hazard": lambda params: self.batch_search_hazard( - data_type=params["data_type"], - dtxsids=params["dtxsids"], - summary=params.get("summary", True), - ), - "get_hazard_toxval": lambda params: self.get_hazard_toxval( - params["dtxsid"] - ), - "batch_get_hazard_toxval": lambda params: self.batch_get_hazard_toxval( - params["dtxsids"] - ), - "get_hazard_skin_eye": lambda params: self.get_hazard_skin_eye( - params["dtxsid"] - ), - "batch_get_hazard_skin_eye": lambda params: self.batch_get_hazard_skin_eye( - params["dtxsids"] - ), - "get_hazard_cancer_summary": lambda params: self.get_hazard_cancer_summary( - params["dtxsid"] - ), - "batch_get_hazard_cancer_summary": lambda params: self.batch_get_hazard_cancer_summary( - params["dtxsids"] - ), - "get_hazard_genetox_summary": lambda params: self.get_hazard_genetox_summary( - params["dtxsid"] - ), - "batch_get_hazard_genetox_summary": lambda params: self.batch_get_hazard_genetox_summary( - params["dtxsids"] - ), - "get_hazard_genetox_details": lambda params: self.get_hazard_genetox_details( - params["dtxsid"] - ), - "batch_get_hazard_genetox_details": lambda params: self.batch_get_hazard_genetox_details( - params["dtxsids"] - ), - "get_hazard_adme_ivive": lambda params: self.get_hazard_adme_ivive( - params["dtxsid"] - ), - "get_hazard_pprtv": lambda params: self.get_hazard_pprtv(params["dtxsid"]), - "get_hazard_iris": lambda params: self.get_hazard_iris(params["dtxsid"]), - "get_hazard_hawc": lambda params: self.get_hazard_hawc(params["dtxsid"]), - "get_hazard_toxref": lambda params: self.get_hazard_toxref( - dataset=params["dataset"], - lookup_type=params["lookup_type"], - value=params["value"], - ), - "batch_get_hazard_toxref": lambda params: self.batch_get_hazard_toxref( - params["dtxsids"] - ), - } - - try: - handler = handlers[tool_name] - except KeyError as exc: # pragma: no cover - defensive - raise ValueError(f"Unknown tool: {tool_name}") from exc - return handler(parameters) - - def search_hazard( - self, data_type: str, dtxsid: str, summary: bool = True - ) -> List[Dict[str, Any]]: - """ - Search hazard datasets for a chemical. - - Args: - data_type: Hazard dataset to query (all, hazard, toxval, human, eco, skin-eye, cancer, genetox, adme, toxref, iris, pprtv, hawc). - dtxsid: Chemical identifier (DTXSID). - summary: Whether to request summary data when the API supports a detail toggle. - - Returns: - List of hazard data records. - """ - result = self._with_retry( - lambda: self.client.search(by=data_type, dtxsid=dtxsid, summary=summary) - ) - return self._ensure_list(result) - - def batch_search_hazard( - self, - data_type: str, - dtxsids: List[str], - summary: bool = True, - ) -> Dict[str, List[Dict[str, Any]]]: - """ - Search hazard datasets for multiple chemicals. - - Args: - data_type: Hazard dataset to query. - dtxsids: List of DTXSIDs. - summary: Whether to request summary data when supported. - - Returns: - Mapping of DTXSID to hazard records. - """ - cleaned = self._clean_identifiers(dtxsids) - if not cleaned: - return {} - payload = self._with_retry( - lambda: self.client.batch_search( - by=data_type, dtxsid=cleaned, summary=summary - ) - ) - normalized = self._ensure_object(payload) - return {key: self._ensure_list(value) for key, value in normalized.items()} - - def get_hazard_toxval(self, dtxsid: str) -> List[Dict[str, Any]]: - payload = self._with_retry(lambda: self.client.toxval(dtxsid=dtxsid)) - return self._ensure_list(payload) - - def batch_get_hazard_toxval(self, dtxsids: List[str]) -> List[Dict[str, Any]]: - cleaned = self._clean_identifiers(dtxsids) - if not cleaned: - return [] - payload = self._with_retry(lambda: self.client.toxval_batch(dtxsids=cleaned)) - return self._ensure_list(payload) - - def get_hazard_skin_eye(self, dtxsid: str) -> List[Dict[str, Any]]: - payload = self._with_retry(lambda: self.client.skin_eye(dtxsid=dtxsid)) - return self._ensure_list(payload) - - def batch_get_hazard_skin_eye(self, dtxsids: List[str]) -> List[Dict[str, Any]]: - cleaned = self._clean_identifiers(dtxsids) - if not cleaned: - return [] - payload = self._with_retry(lambda: self.client.skin_eye_batch(dtxsids=cleaned)) - return self._ensure_list(payload) - - def get_hazard_cancer_summary(self, dtxsid: str) -> List[Dict[str, Any]]: - payload = self._with_retry(lambda: self.client.cancer_summary(dtxsid=dtxsid)) - return self._ensure_list(payload) - - def batch_get_hazard_cancer_summary( - self, dtxsids: List[str] - ) -> List[Dict[str, Any]]: - cleaned = self._clean_identifiers(dtxsids) - if not cleaned: - return [] - payload = self._with_retry( - lambda: self.client.cancer_summary_batch(dtxsids=cleaned) - ) - return self._ensure_list(payload) - - def get_hazard_genetox_summary(self, dtxsid: str) -> List[Dict[str, Any]]: - payload = self._with_retry(lambda: self.client.genetox_summary(dtxsid=dtxsid)) - return self._ensure_list(payload) - - def batch_get_hazard_genetox_summary( - self, dtxsids: List[str] - ) -> List[Dict[str, Any]]: - cleaned = self._clean_identifiers(dtxsids) - if not cleaned: - return [] - payload = self._with_retry( - lambda: self.client.genetox_summary_batch(dtxsids=cleaned) - ) - return self._ensure_list(payload) - - def get_hazard_genetox_details(self, dtxsid: str) -> List[Dict[str, Any]]: - payload = self._with_retry(lambda: self.client.genetox_details(dtxsid=dtxsid)) - return self._ensure_list(payload) - - def batch_get_hazard_genetox_details( - self, dtxsids: List[str] - ) -> List[Dict[str, Any]]: - cleaned = self._clean_identifiers(dtxsids) - if not cleaned: - return [] - payload = self._with_retry( - lambda: self.client.genetox_details_batch(dtxsids=cleaned) - ) - return self._ensure_list(payload) - - def get_hazard_adme_ivive(self, dtxsid: str) -> List[Dict[str, Any]]: - payload = self._with_retry(lambda: self.client.adme_ivive(dtxsid=dtxsid)) - return self._ensure_list(payload) - - def get_hazard_pprtv(self, dtxsid: str) -> List[Dict[str, Any]]: - payload = self._with_retry(lambda: self.client.pprtv(dtxsid=dtxsid)) - return self._ensure_list(payload) - - def get_hazard_iris(self, dtxsid: str) -> List[Dict[str, Any]]: - payload = self._with_retry(lambda: self.client.iris(dtxsid=dtxsid)) - return self._ensure_list(payload) - - def get_hazard_hawc(self, dtxsid: str) -> List[Dict[str, Any]]: - payload = self._with_retry(lambda: self.client.hawc(dtxsid=dtxsid)) - return self._ensure_list(payload) - - def get_hazard_toxref( - self, dataset: str, lookup_type: str, value: str - ) -> List[Dict[str, Any]]: - payload = self._with_retry( - lambda: self.client.toxref(dataset=dataset, lookup=lookup_type, value=value) - ) - return self._ensure_list(payload) - - def batch_get_hazard_toxref(self, dtxsids: List[str]) -> List[Dict[str, Any]]: - cleaned = self._clean_identifiers(dtxsids) - if not cleaned: - return [] - payload = self._with_retry(lambda: self.client.toxref_batch(dtxsids=cleaned)) - return self._ensure_list(payload) - -==================================================================================================== -FILE: src/epacomp_tox/resources/exposure.py -==================================================================================================== -import logging -from typing import Any, Dict, List, Optional, Sequence - -import ctxpy as ctx -from epacomp_tox.contracts import schema_ref - -from .base import BaseResource - -logger = logging.getLogger(__name__) - - -class ExposureResource(BaseResource): - """MCP resource for EPA CompTox exposure data.""" - - @property - def name(self) -> str: - return "exposure" - - @property - def description(self) -> str: - return "Access to SEEM predictions, CPDat product data, HTTK, MMDB monitoring, and CCD datasets" - - def __init__(self, api_key: str): - super().__init__(api_key) - # Increase upstream timeout for slow queries - UPSTREAM_TIMEOUT = 120.0 - try: - self.client = ctx.Exposure(x_api_key=api_key, timeout=UPSTREAM_TIMEOUT) - logger.info( - f"Successfully initialized ctx.Exposure with timeout={UPSTREAM_TIMEOUT}s" - ) - except TypeError as e: - logger.warning( - f"Could not set timeout for ctx.Exposure (TypeError: {e}). Using default timeout." - ) - self.client = ctx.Exposure(x_api_key=api_key) - - # ------------------------------------------------------------------ - # Tool catalog - # ------------------------------------------------------------------ - def get_tools(self) -> List[Dict[str, Any]]: - tools: List[Dict[str, Any]] = [ - { - "name": "search_cpdat", - "description": "Search historical CPDat data (functional use, product use categories, or list presence) for chemicals", - "parameters": { - "type": "object", - "properties": { - "vocab_name": { - "type": "string", - "enum": ["fc", "puc", "lpk"], - "description": "Vocabulary domain to query: functional use (fc), product use categories (puc), or list presence keywords (lpk)", - }, - "dtxsid": { - "type": "string", - "description": "Optional single DSSTox ID", - }, - "dtxsids": { - "type": "array", - "items": {"type": "string"}, - "description": "Optional list of DSSTox IDs (max 200 per batch)", - }, - }, - "required": ["vocab_name"], - }, - }, - { - "name": "search_httk", - "description": "Search for high-throughput toxicokinetics (HTTK) data", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "Optional single DSSTox ID", - }, - "dtxsids": { - "type": "array", - "items": {"type": "string"}, - "description": "Optional list of DSSTox IDs", - }, - }, - "required": [], - }, - }, - { - "name": "get_cpdat_vocabulary", - "description": "Return CPDat controlled vocabulary values (functional use, product use categories, or list presence tags)", - "parameters": { - "type": "object", - "properties": { - "vocab_name": { - "type": "string", - "enum": ["fc", "puc", "lpk"], - "description": "Vocabulary domain to list", - } - }, - "required": ["vocab_name"], - }, - }, - { - "name": "search_qsurs", - "description": "Retrieve QSUR model functional-use probability predictions", - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "Optional single DSSTox ID", - }, - "dtxsids": { - "type": "array", - "items": {"type": "string"}, - "description": "Optional list of DSSTox IDs", - }, - }, - "required": [], - }, - }, - { - "name": "search_exposures", - "description": "Backwards-compatible exposure search across pathways/MMDB/SEEM datasets", - "parameters": { - "type": "object", - "properties": { - "data_type": { - "type": "string", - "enum": [ - "pathways", - "mmdb-single", - "seem", - "seem-demographic", - ], - "description": "Legacy exposure dataset selector", - }, - "dtxsid": { - "type": "string", - "description": "Optional single DSSTox ID", - }, - "dtxsids": { - "type": "array", - "items": {"type": "string"}, - "description": "Optional list of DSSTox IDs", - }, - }, - "required": ["data_type"], - }, - }, - ] - - # Additional granular tools (single-item retrievals) - tools.extend( - [ - _single_id_tool( - "get_seem_general", "Fetch SEEM general exposure predictions" - ), - _batch_id_tool( - "batch_get_seem_general", - "Batch fetch SEEM general exposure predictions", - ), - _single_id_tool( - "get_seem_demographic", - "Fetch SEEM demographic exposure predictions", - ), - _batch_id_tool( - "batch_get_seem_demographic", - "Batch fetch SEEM demographic exposure predictions", - ), - _single_id_tool( - "get_exposure_product_data", "Retrieve CPDat product data" - ), - _batch_id_tool( - "batch_get_exposure_product_data", "Batch fetch CPDat product data" - ), - _no_param_tool( - "list_exposure_product_puc", "List product use categories (PUC)" - ), - _single_id_tool( - "get_exposure_list_presence", "Retrieve list presence data" - ), - _batch_id_tool( - "batch_get_exposure_list_presence", "Batch fetch list presence data" - ), - _no_param_tool( - "list_exposure_list_presence_tags", "List list-presence tags" - ), - _single_id_tool("get_exposure_httk", "Retrieve HTTK data"), - _batch_id_tool("batch_get_exposure_httk", "Batch fetch HTTK data"), - _single_id_tool( - "get_exposure_functional_use", - "Retrieve reported functional use data", - ), - _batch_id_tool( - "batch_get_exposure_functional_use", - "Batch fetch reported functional use data", - ), - _single_id_tool( - "get_exposure_functional_use_probability", - "Retrieve functional use probability predictions", - ), - _no_param_tool( - "list_exposure_functional_use_categories", - "List functional use categories", - ), - _single_id_tool( - "get_exposure_ccd_puc", "Retrieve CCD Product Use Category data" - ), - _single_id_tool( - "get_exposure_ccd_production_volume", - "Retrieve CCD production volume data", - ), - _single_id_tool( - "get_exposure_ccd_monitoring_data", - "Retrieve CCD biomonitoring data", - ), - _single_id_tool( - "get_exposure_ccd_keywords", "Retrieve CCD general use keywords" - ), - _single_id_tool( - "get_exposure_ccd_functional_use", - "Retrieve CCD reported functional use data", - ), - _single_id_tool( - "get_exposure_ccd_chem_weight_fractions", - "Retrieve CCD chemical weight fractions data", - ), - _str_param_tool( - "get_exposure_mmdb_single_sample_by_medium", - "medium", - "Retrieve MMDB single-sample data filtered by medium", - ), - _single_id_tool( - "get_exposure_mmdb_single_sample_by_dtxsid", - "Retrieve MMDB single-sample data", - ), - _no_param_tool( - "list_exposure_mmdb_mediums", "List MMDB medium categories" - ), - _str_param_tool( - "get_exposure_mmdb_aggregate_by_medium", - "medium", - "Retrieve MMDB aggregate records filtered by medium", - ), - _single_id_tool( - "get_exposure_mmdb_aggregate_by_dtxsid", - "Retrieve MMDB aggregate records", - ), - ] - ) - - for tool in tools: - schema_map = { - "search_cpdat": ("exposure", "search_cpdat.response.schema"), - "search_httk": ("exposure", "search_httk.response.schema"), - "get_exposure_httk": ( - "exposure", - "get_exposure_httk.response.schema", - ), - } - schema_info = schema_map.get( - tool["name"], ("common", "list_generic.response.schema") - ) - tool["responseSchemaRef"] = schema_ref(*schema_info) - - # Ensure outputSchema is populated from the reference - if "responseSchemaRef" in tool: - from epacomp_tox.contracts import load_schema - - ref = tool["responseSchemaRef"] - tool["outputSchema"] = load_schema(ref["namespace"], ref["name"]) - - return tools - - # ------------------------------------------------------------------ - # Tool execution - # ------------------------------------------------------------------ - def execute_tool(self, tool_name: str, parameters: Dict[str, Any]) -> Any: - # Legacy handlers ------------------------------------------------- - if tool_name == "search_cpdat": - vocab = parameters["vocab_name"] - identifiers = self._resolve_identifiers( - parameters.get("dtxsid"), - parameters.get("dtxsids"), - ) - return self.search_cpdat(vocab, identifiers) - - if tool_name == "search_httk": - identifiers = self._resolve_identifiers( - parameters.get("dtxsid"), - parameters.get("dtxsids"), - ) - return self.search_httk(identifiers) - - if tool_name == "get_cpdat_vocabulary": - return self.get_cpdat_vocabulary(parameters["vocab_name"]) - - if tool_name == "search_qsurs": - identifiers = self._resolve_identifiers( - parameters.get("dtxsid"), - parameters.get("dtxsids"), - ) - return self.search_qsurs(identifiers) - - if tool_name == "search_exposures": - identifiers = self._resolve_identifiers( - parameters.get("dtxsid"), - parameters.get("dtxsids"), - ) - return self.search_exposures(parameters["data_type"], identifiers) - - # Granular handlers ---------------------------------------------- - handler_map = { - "get_seem_general": lambda p: self.get_seem_general(p["dtxsid"]), - "batch_get_seem_general": lambda p: self.batch_get_seem_general( - p["dtxsids"] - ), - "get_seem_demographic": lambda p: self.get_seem_demographic(p["dtxsid"]), - "batch_get_seem_demographic": lambda p: self.batch_get_seem_demographic( - p["dtxsids"] - ), - "get_exposure_product_data": lambda p: self.get_exposure_product_data( - p["dtxsid"] - ), - "batch_get_exposure_product_data": lambda p: self.batch_get_exposure_product_data( - p["dtxsids"] - ), - "list_exposure_product_puc": lambda p: self.list_exposure_product_puc(), - "get_exposure_list_presence": lambda p: self.get_exposure_list_presence( - p["dtxsid"] - ), - "batch_get_exposure_list_presence": lambda p: self.batch_get_exposure_list_presence( - p["dtxsids"] - ), - "list_exposure_list_presence_tags": lambda p: self.list_exposure_list_presence_tags(), - "get_exposure_httk": lambda p: self.get_exposure_httk(p["dtxsid"]), - "batch_get_exposure_httk": lambda p: self.batch_get_exposure_httk( - p["dtxsids"] - ), - "get_exposure_functional_use": lambda p: self.get_exposure_functional_use( - p["dtxsid"] - ), - "batch_get_exposure_functional_use": lambda p: self.batch_get_exposure_functional_use( - p["dtxsids"] - ), - "get_exposure_functional_use_probability": lambda p: self.get_exposure_functional_use_probability( - p["dtxsid"] - ), - "list_exposure_functional_use_categories": lambda p: self.list_exposure_functional_use_categories(), - "get_exposure_ccd_puc": lambda p: self.get_exposure_ccd_puc(p["dtxsid"]), - "get_exposure_ccd_production_volume": lambda p: self.get_exposure_ccd_production_volume( - p["dtxsid"] - ), - "get_exposure_ccd_monitoring_data": lambda p: self.get_exposure_ccd_monitoring_data( - p["dtxsid"] - ), - "get_exposure_ccd_keywords": lambda p: self.get_exposure_ccd_keywords( - p["dtxsid"] - ), - "get_exposure_ccd_functional_use": lambda p: self.get_exposure_ccd_functional_use( - p["dtxsid"] - ), - "get_exposure_ccd_chem_weight_fractions": lambda p: self.get_exposure_ccd_chem_weight_fractions( - p["dtxsid"] - ), - "get_exposure_mmdb_single_sample_by_medium": lambda p: self.get_exposure_mmdb_single_sample_by_medium( - p["medium"] - ), - "get_exposure_mmdb_single_sample_by_dtxsid": lambda p: self.get_exposure_mmdb_single_sample_by_dtxsid( - p["dtxsid"] - ), - "list_exposure_mmdb_mediums": lambda p: self.list_exposure_mmdb_mediums(), - "get_exposure_mmdb_aggregate_by_medium": lambda p: self.get_exposure_mmdb_aggregate_by_medium( - p["medium"] - ), - "get_exposure_mmdb_aggregate_by_dtxsid": lambda p: self.get_exposure_mmdb_aggregate_by_dtxsid( - p["dtxsid"] - ), - } - - try: - handler = handler_map[tool_name] - except KeyError as exc: - raise ValueError(f"Unknown tool: {tool_name}") from exc - return handler(parameters) - - # ------------------------------------------------------------------ - # Helper utilities - # ------------------------------------------------------------------ - def _resolve_identifiers( - self, - single: Optional[str], - multiple: Optional[Sequence[str]], - ) -> List[str]: - identifiers: List[str] = [] - if multiple: - identifiers.extend([item for item in multiple if item]) - if single: - identifiers.append(single) - identifiers = [item for item in identifiers if item] - if not identifiers: - raise ValueError("At least one DSSTox identifier must be provided.") - return identifiers - - # ------------------------------------------------------------------ - # Legacy tool implementations - # ------------------------------------------------------------------ - def search_cpdat( - self, vocab_name: str, dtxsids: Sequence[str] - ) -> List[Dict[str, Any]]: - results: List[Dict[str, Any]] = [] - for sid in dtxsids: - payload = self._with_retry( - lambda sid=sid: self.client.search_cpdat(vocab_name, sid) - ) - results.extend(self._ensure_list(payload)) - return results - - def search_httk(self, dtxsids: Sequence[str]) -> List[Dict[str, Any]]: - results: List[Dict[str, Any]] = [] - for sid in dtxsids: - payload = self._with_retry(lambda sid=sid: self.client.search_httk(sid)) - results.extend(self._ensure_list(payload)) - return results - - def get_cpdat_vocabulary(self, vocab_name: str) -> List[Any]: - payload = self._with_retry(lambda: self.client.get_cpdat_vocabulary(vocab_name)) - return self._ensure_list(payload) - - def search_qsurs(self, dtxsids: Sequence[str]) -> List[Dict[str, Any]]: - results: List[Dict[str, Any]] = [] - for sid in dtxsids: - payload = self._with_retry(lambda sid=sid: self.client.search_qsurs(sid)) - results.extend(self._ensure_list(payload)) - return results - - def search_exposures(self, data_type: str, dtxsids: Sequence[str]) -> List[Any]: - if not dtxsids: - raise ValueError("At least one DSSTox identifier must be provided.") - results: List[Any] = [] - for sid in dtxsids: - payload = self._with_retry( - lambda sid=sid: self.client.search_exposures(data_type, sid) - ) - results.extend(self._ensure_list(payload)) - return results - - # ------------------------------------------------------------------ - # SEEM helpers - # ------------------------------------------------------------------ - def get_seem_general(self, dtxsid: str) -> List[Dict[str, Any]]: - result = self._with_retry(lambda: self.client.seem_general(dtxsid)) - return self._ensure_list(result) - - def batch_get_seem_general(self, dtxsids: Sequence[str]) -> List[Any]: - identifiers = [sid for sid in dtxsids if sid] - if not identifiers: - return [] - result = self._with_retry(lambda: self.client.seem_general_batch(identifiers)) - return self._ensure_list(result) - - def get_seem_demographic(self, dtxsid: str) -> List[Dict[str, Any]]: - result = self._with_retry(lambda: self.client.seem_demographic(dtxsid)) - return self._ensure_list(result) - - def batch_get_seem_demographic(self, dtxsids: Sequence[str]) -> List[Any]: - identifiers = [sid for sid in dtxsids if sid] - if not identifiers: - return [] - result = self._with_retry( - lambda: self.client.seem_demographic_batch(identifiers) - ) - return self._ensure_list(result) - - # ------------------------------------------------------------------ - # CPDat product + list presence helpers - # ------------------------------------------------------------------ - def get_exposure_product_data(self, dtxsid: str) -> List[Dict[str, Any]]: - result = self._with_retry(lambda: self.client.product_data(dtxsid)) - return self._ensure_list(result) - - def batch_get_exposure_product_data(self, dtxsids: Sequence[str]) -> List[Any]: - identifiers = [sid for sid in dtxsids if sid] - if not identifiers: - return [] - result = self._with_retry(lambda: self.client.product_data_batch(identifiers)) - return self._ensure_list(result) - - def list_exposure_product_puc(self) -> List[Any]: - result = self._with_retry(self.client.product_data_puc) - return self._ensure_list(result) - - def get_exposure_list_presence(self, dtxsid: str) -> List[Dict[str, Any]]: - result = self._with_retry(lambda: self.client.list_presence(dtxsid)) - return self._ensure_list(result) - - def batch_get_exposure_list_presence(self, dtxsids: Sequence[str]) -> List[Any]: - identifiers = [sid for sid in dtxsids if sid] - if not identifiers: - return [] - result = self._with_retry(lambda: self.client.list_presence_batch(identifiers)) - return self._ensure_list(result) - - def list_exposure_list_presence_tags(self) -> List[Any]: - result = self._with_retry(self.client.list_presence_tags) - return self._ensure_list(result) - - # ------------------------------------------------------------------ - # HTTK + functional use helpers - # ------------------------------------------------------------------ - def get_exposure_httk(self, dtxsid: str) -> List[Dict[str, Any]]: - result = self._with_retry(lambda: self.client.httk(dtxsid)) - return self._ensure_list(result) - - def batch_get_exposure_httk(self, dtxsids: Sequence[str]) -> List[Any]: - identifiers = [sid for sid in dtxsids if sid] - if not identifiers: - return [] - result = self._with_retry(lambda: self.client.httk_batch(identifiers)) - return self._ensure_list(result) - - def get_exposure_functional_use(self, dtxsid: str) -> List[Dict[str, Any]]: - result = self._with_retry(lambda: self.client.functional_use(dtxsid)) - return self._ensure_list(result) - - def batch_get_exposure_functional_use(self, dtxsids: Sequence[str]) -> List[Any]: - identifiers = [sid for sid in dtxsids if sid] - if not identifiers: - return [] - result = self._with_retry(lambda: self.client.functional_use_batch(identifiers)) - return self._ensure_list(result) - - def get_exposure_functional_use_probability( - self, dtxsid: str - ) -> List[Dict[str, Any]]: - result = self._with_retry( - lambda: self.client.functional_use_probability(dtxsid) - ) - return self._ensure_list(result) - - def list_exposure_functional_use_categories(self) -> List[Any]: - result = self._with_retry(self.client.functional_use_categories) - return self._ensure_list(result) - - # ------------------------------------------------------------------ - # CCD helpers - # ------------------------------------------------------------------ - def get_exposure_ccd_puc(self, dtxsid: str) -> List[Dict[str, Any]]: - result = self._with_retry(lambda: self.client.ccd_puc(dtxsid)) - return self._ensure_list(result) - - def get_exposure_ccd_production_volume(self, dtxsid: str) -> List[Dict[str, Any]]: - result = self._with_retry(lambda: self.client.ccd_production_volume(dtxsid)) - return self._ensure_list(result) - - def get_exposure_ccd_monitoring_data(self, dtxsid: str) -> List[Dict[str, Any]]: - result = self._with_retry(lambda: self.client.ccd_monitoring_data(dtxsid)) - return self._ensure_list(result) - - def get_exposure_ccd_keywords(self, dtxsid: str) -> List[Dict[str, Any]]: - result = self._with_retry(lambda: self.client.ccd_keywords(dtxsid)) - return self._ensure_list(result) - - def get_exposure_ccd_functional_use(self, dtxsid: str) -> List[Dict[str, Any]]: - result = self._with_retry(lambda: self.client.ccd_functional_use(dtxsid)) - return self._ensure_list(result) - - def get_exposure_ccd_chem_weight_fractions( - self, dtxsid: str - ) -> List[Dict[str, Any]]: - result = self._with_retry(lambda: self.client.ccd_chem_weight_fractions(dtxsid)) - return self._ensure_list(result) - - # ------------------------------------------------------------------ - # MMDB helpers - # ------------------------------------------------------------------ - def get_exposure_mmdb_single_sample_by_medium( - self, medium: str - ) -> List[Dict[str, Any]]: - result = self._with_retry( - lambda: self.client.mmdb_single_sample_by_medium(medium) - ) - return self._ensure_list(result) - - def get_exposure_mmdb_single_sample_by_dtxsid( - self, dtxsid: str - ) -> List[Dict[str, Any]]: - result = self._with_retry( - lambda: self.client.mmdb_single_sample_by_dtxsid(dtxsid) - ) - return self._ensure_list(result) - - def list_exposure_mmdb_mediums(self) -> List[Any]: - result = self._with_retry(self.client.mmdb_mediums) - return self._ensure_list(result) - - def get_exposure_mmdb_aggregate_by_medium( - self, medium: str - ) -> List[Dict[str, Any]]: - result = self._with_retry(lambda: self.client.mmdb_aggregate_by_medium(medium)) - return self._ensure_list(result) - - def get_exposure_mmdb_aggregate_by_dtxsid( - self, dtxsid: str - ) -> List[Dict[str, Any]]: - result = self._with_retry(lambda: self.client.mmdb_aggregate_by_dtxsid(dtxsid)) - return self._ensure_list(result) - - -# ---------------------------------------------------------------------- -# Utility helpers for tool definitions -# ---------------------------------------------------------------------- -def _single_id_tool(name: str, description: str) -> Dict[str, Any]: - return { - "name": name, - "description": description, - "parameters": { - "type": "object", - "properties": { - "dtxsid": { - "type": "string", - "description": "DSSTox Substance Identifier", - } - }, - "required": ["dtxsid"], - }, - } - - -def _batch_id_tool(name: str, description: str) -> Dict[str, Any]: - return { - "name": name, - "description": description, - "parameters": { - "type": "object", - "properties": { - "dtxsids": { - "type": "array", - "items": {"type": "string"}, - "minItems": 1, - "description": "List of DSSTox Substance Identifiers", - } - }, - "required": ["dtxsids"], - }, - } - - -def _no_param_tool(name: str, description: str) -> Dict[str, Any]: - return { - "name": name, - "description": description, - "parameters": {"type": "object", "properties": {}}, - } - - -def _str_param_tool(name: str, field: str, description: str) -> Dict[str, Any]: - return { - "name": name, - "description": description, - "parameters": { - "type": "object", - "properties": { - field: { - "type": "string", - "description": field.replace("_", " ").capitalize(), - } - }, - "required": [field], - }, - } - -==================================================================================================== -FILE: src/epacomp_tox/resources/cheminformatics.py -==================================================================================================== -from typing import Any, Dict, List - -import ctxpy as ctx -from epacomp_tox.contracts import schema_ref -from epacomp_tox.validators import to_serializable - -from .base import BaseResource - - -class CheminformaticsResource(BaseResource): - """ - MCP resource for EPA CompTox cheminformatics tools. - - Provides access to ToxPrint chemotypes and other cheminformatics tools. - """ - - @property - def name(self) -> str: - return "cheminformatics" - - @property - def description(self) -> str: - return "Access to ToxPrint chemotypes and other cheminformatics tools" - - def __init__(self, api_key: str): - """ - Initialize the cheminformatics resource. - - Args: - api_key: EPA CompTox API key. - """ - super().__init__(api_key) - # No specific client for cheminformatics, using functions directly - - def get_tools(self) -> List[Dict[str, Any]]: - """ - Get a list of tools provided by this resource. - - Returns: - List of tool definitions. - """ - tools: List[Dict[str, Any]] = [] - # ToxPrint tools disabled: endpoints not available on new CTX API - return tools - - def execute_tool(self, tool_name: str, parameters: Dict[str, Any]) -> Any: - """ - Execute a tool with the given parameters. - - Args: - tool_name: Name of the tool to execute. - parameters: Parameters for the tool. - - Returns: - Tool execution result. - - Raises: - ValueError: If the tool is not found or parameters are invalid. - """ - raise ValueError(f"Unknown tool: {tool_name}") - - def search_toxprints(self, chemical: str) -> Dict[str, Any]: - """ - Search for ToxPrint chemotypes for a chemical. - - Args: - chemical: Chemical identifier (DTXSID, DTXCID, or SMILES). - - Returns: - ToxPrint chemotypes. - """ - results = self._with_retry(lambda: ctx.search_toxprints(chemical=chemical)) - return to_serializable(results) - - def batch_search_toxprints(self, chemicals: List[str]) -> Dict[str, Any]: - """ - Search for ToxPrint chemotypes for multiple chemicals. - - Args: - chemicals: List of chemical identifiers (DTXSIDs, DTXCIDs, or SMILES). - - Returns: - ToxPrint chemotypes for multiple chemicals. - """ - results = self._with_retry(lambda: ctx.search_toxprints(chemical=chemicals)) - return to_serializable(results) - -==================================================================================================== -FILE: src/epacomp_tox/resources/metadata.py -==================================================================================================== -from __future__ import annotations - -from typing import Any, Dict, List - -from epacomp_tox.contracts import schema_ref -from epacomp_tox.metadata import ModelCardFilter, ModelCardStore -from epacomp_tox.metadata.applicability import ApplicabilityDomainStore -from epacomp_tox.resources.base import BaseResource - - -class MetadataResource(BaseResource): - """Resource exposing model metadata and applicability domain definitions.""" - - def __init__( - self, - api_key: str = "", - *, - store: ModelCardStore | None = None, - ad_store: ApplicabilityDomainStore | None = None, - ): - super().__init__(api_key) - self.store = store or ModelCardStore() - self.ad_store = ad_store or ApplicabilityDomainStore() - - @property - def name(self) -> str: - return "metadata" - - @property - def description(self) -> str: - return "Model cards, applicability domain definitions, and provenance metadata" - - def get_tools(self) -> List[Dict[str, Any]]: - return [ - { - "name": "metadata_get_model_card", - "description": "Retrieve CompTox model cards with optional filters and pagination", - "inputSchema": { - "type": "object", - "properties": { - "model_name": {"type": "string"}, - "endpoint": {"type": "string"}, - "compliance": { - "type": "string", - "enum": ["approved", "draft"], - }, - "limit": {"type": "integer", "minimum": 1, "maximum": 100}, - "cursor": {"type": "string"}, - }, - }, - "outputSchema": { - "type": "object", - "properties": { - "modelCards": {"type": "array"}, - "nextCursor": {"type": ["string", "null"]}, - }, - }, - "responseSchemaRef": schema_ref( - "metadata", "model_cards.response.schema" - ), - "outputSchema": { - "type": "object", - "properties": { - "modelCards": {"type": "array"}, - "nextCursor": {"type": ["string", "null"]}, - }, - }, - }, - { - "name": "metadata_list_applicability_domain", - "description": "List applicability domain reference definitions", - "inputSchema": { - "type": "object", - "properties": { - "limit": {"type": "integer", "minimum": 1, "maximum": 100}, - "cursor": {"type": "string"}, - }, - }, - "outputSchema": { - "type": "object", - "properties": { - "applicabilityDomains": {"type": "array"}, - "nextCursor": {"type": ["string", "null"]}, - }, - }, - "responseSchemaRef": schema_ref( - "metadata", "applicability_list.response.schema" - ), - "outputSchema": { - "type": "object", - "properties": { - "applicabilityDomains": {"type": "array"}, - "nextCursor": {"type": ["string", "null"]}, - }, - }, - }, - { - "name": "metadata_get_applicability_domain", - "description": "Fetch applicability domain configuration for a specific model", - "inputSchema": { - "type": "object", - "properties": { - "model_name": {"type": "string"}, - }, - "required": ["model_name"], - }, - "outputSchema": { - "type": "object", - "properties": { - "model": {"type": "string"}, - "version": {"type": "string"}, - "criteria": {"type": "array"}, - "policy": {"type": "string"}, - "errorCode": {"type": "string"}, - "references": {"type": "array"}, - }, - }, - "responseSchemaRef": schema_ref( - "metadata", "applicability_detail.response.schema" - ), - "outputSchema": { - "type": "object", - "properties": { - "model": {"type": "string"}, - "version": {"type": "string"}, - "criteria": {"type": "array"}, - "policy": {"type": "string"}, - "errorCode": {"type": ["string", "null"]}, - "references": {"type": "array"}, - }, - }, - }, - ] - - def execute_tool(self, tool_name: str, parameters: Dict[str, Any]) -> Any: - if tool_name == "metadata_get_model_card": - filters = ModelCardFilter( - model_name=parameters.get("model_name"), - endpoint_contains=parameters.get("endpoint"), - compliance=parameters.get("compliance"), - ) - limit = parameters.get("limit") - cursor = parameters.get("cursor") - cards, next_cursor = self.store.list_cards( - filters=filters, limit=limit, cursor=cursor - ) - payload = [] - for item in cards: - data = { - "card": item["card"], - "checksum": item["checksum"], - "lastModified": item["lastModified"], - } - payload.append(data) - return { - "modelCards": payload, - "nextCursor": next_cursor, - } - - if tool_name == "metadata_list_applicability_domain": - limit = parameters.get("limit") - cursor = parameters.get("cursor") - defs, next_cursor = self.ad_store.list_definitions( - limit=limit, cursor=cursor - ) - return { - "applicabilityDomains": defs, - "nextCursor": next_cursor, - } - - if tool_name == "metadata_get_applicability_domain": - model_name = parameters["model_name"] - definition = self.ad_store.get_definition(model_name) - if not definition: - raise ValueError(f"No applicability domain found for {model_name}") - return definition - - raise ValueError(f"Unknown tool: {tool_name}") - -==================================================================================================== -FILE: tests/test_orchestrator_stages.py -==================================================================================================== -from __future__ import annotations - -from unittest import mock - -import pytest - -from ctxpy import RateLimitInfo -from epacomp_tox.orchestrator.ctx_data import CtxDataAssembler -from epacomp_tox.orchestrator.identifiers import ( - IdentifierResolutionError, - IdentifierResolver, -) -from epacomp_tox.orchestrator.models import PredictiveTask -from epacomp_tox.orchestrator.predictive import PredictiveCoordinator -from epacomp_tox.orchestrator.workflow import GenRAOrchestrator -from epacomp_tox.predictive import ( - ADCheckResult, - PredictiveRequest, - PredictiveServiceBase, -) -from epacomp_tox.resources.cheminformatics import CheminformaticsResource -from epacomp_tox.resources.exposure import ExposureResource -from epacomp_tox.resources.hazard import HazardResource - - -def _rate_limit( - limit: int = 120, remaining: int = 119, reset: int = 60 -) -> RateLimitInfo: - return RateLimitInfo(limit=limit, remaining=remaining, reset=reset) - - -class _StubADStore: - def __init__(self, definition): - self._definition = definition - - def get_definition(self, _model_name): - return self._definition - - -class _StubPredictiveService(PredictiveServiceBase): - def __init__(self, *, name: str, ad_results, payloads, ad_definition): - super().__init__( - config={"name": name, "version": "1.0"}, - ad_store=_StubADStore(ad_definition), - ) - self._ad_results = list(ad_results) - self._payloads = list(payloads) - self._last_ad_result = self._ad_results[-1] if self._ad_results else None - self.ad_checks = 0 - self.predictions = 0 - - def _predict_impl(self, request: PredictiveRequest): - self.predictions += 1 - if not self._payloads: - raise RuntimeError("No payload configured") - value = self._payloads.pop(0) - # retain last value so repeated predict calls can reuse when necessary - self._payloads.append(value) - return value - - def _check_ad_impl(self, request: PredictiveRequest) -> ADCheckResult: - self.ad_checks += 1 - if self._ad_results: - result = self._ad_results.pop(0) - self._last_ad_result = result - self._ad_results.append(result) - return result - if self._last_ad_result is None: - raise RuntimeError("No AD result configured") - return self._last_ad_result - - -def test_identifier_resolver_caches_and_sanitizes_metadata(): - chemical_resource = mock.Mock() - chemical_resource.search_chemical.return_value = [ - { - "dtxsid": "DTXSID0000001", - "preferredName": "Example Chemical", - "casrn": "50-00-0", - } - ] - chemical_resource.get_chemical_details.return_value = { - "dtxsid": "DTXSID0000001", - "preferredName": "Example Chemical", - "casrn": "50-00-0", - "synonyms": ["example chemical", "Formaldehyde"], - } - chemical_resource.get_last_metadata.side_effect = [ - {"status": 200, "rate_limit": _rate_limit()}, - {"status": 200, "request_id": "req-chem-1"}, - ] - - resolver = IdentifierResolver(chemical_resource=chemical_resource, cache_ttl=120) - - result = resolver.resolve("50-00-0", identifier_type="casrn") - assert result.dtxsid == "DTXSID0000001" - assert result.cache_hit is False - assert "Formaldehyde" in result.synonyms - assert result.trace[0].metadata["rate_limit"]["limit"] == 120 - - cached = resolver.resolve("50-00-0", identifier_type="casrn") - assert cached.cache_hit is True - assert chemical_resource.search_chemical.call_count == 1 - assert chemical_resource.get_chemical_details.call_count == 1 - # No additional metadata calls when serving from cache - assert chemical_resource.get_last_metadata.call_count == 2 - - -def test_identifier_resolver_raises_when_not_found(): - chemical_resource = mock.Mock() - chemical_resource.search_chemical.return_value = [] - chemical_resource.get_last_metadata.return_value = {} - - resolver = IdentifierResolver(chemical_resource=chemical_resource) - with pytest.raises(IdentifierResolutionError): - resolver.resolve("UNKNOWN", identifier_type="name") - - -def _mock_resource(resource_cls): - return mock.create_autospec(resource_cls, instance=True) - - -def test_ctx_data_assembler_fetches_datasets_and_uses_cache(): - hazard_resource = _mock_resource(HazardResource) - exposure_resource = _mock_resource(ExposureResource) - cheminformatics_resource = _mock_resource(CheminformaticsResource) - - hazard_resource.search_hazard.return_value = [{"hazard": "record"}] - hazard_resource.get_last_metadata.side_effect = lambda: { - "status": 200, - "rate_limit": _rate_limit(100, 98, 30), - } - - exposure_resource.search_httk.return_value = [{"httk": "value"}] - exposure_resource.search_qsurs.return_value = [] - exposure_resource.search_cpdat.return_value = [{"fc": "industrial"}] - exposure_resource.get_last_metadata.side_effect = lambda: { - "status": 200, - "request_id": "req-exp", - } - - cheminformatics_resource.search_toxprints.return_value = {"fingerprints": ["FP1"]} - cheminformatics_resource.get_last_metadata.return_value = {} - - assembler = CtxDataAssembler( - hazard_resource=hazard_resource, - exposure_resource=exposure_resource, - cheminformatics_resource=cheminformatics_resource, - hazard_data_types=("all",), - exposure_datasets=("httk",), - cpdat_vocabularies=("fc",), - include_toxprints=False, - cache_ttl=300, - ) - - bundle = assembler.assemble("dtxsid0001234", scenarios=["genra_read_across"]) - assert bundle.cache_hit is False - assert bundle.hazard["all"][0]["hazard"] == "record" - assert bundle.exposure["httk"][0]["httk"] == "value" - assert bundle.exposure["cpdat:fc"][0]["fc"] == "industrial" - assert "exposure:qsurs" in bundle.data_gaps # qsurs returned empty - assert bundle.cheminformatics["toxprints"]["fingerprints"] == ["FP1"] - assert bundle.trace[0].metadata["rate_limit"]["limit"] == 100 - - # Cached execution should avoid additional upstream calls - cached = assembler.assemble("dtxsid0001234", scenarios=["genra_read_across"]) - assert cached.cache_hit is True - assert hazard_resource.search_hazard.call_count == 1 - assert exposure_resource.search_httk.call_count == 1 - assert exposure_resource.search_qsurs.call_count == 1 - assert exposure_resource.search_cpdat.call_count == 1 - assert cheminformatics_resource.search_toxprints.call_count == 1 - - -def test_ctx_data_assembler_marks_toxprint_gap_when_resource_missing(): - hazard_resource = _mock_resource(HazardResource) - hazard_resource.search_hazard.return_value = [] - hazard_resource.get_last_metadata.return_value = {} - - exposure_resource = _mock_resource(ExposureResource) - exposure_resource.get_last_metadata.return_value = {} - - assembler = CtxDataAssembler( - hazard_resource=hazard_resource, - exposure_resource=exposure_resource, - cheminformatics_resource=None, - hazard_data_types=("all",), - exposure_datasets=(), - cpdat_vocabularies=(), - include_toxprints=True, - cache_ttl=0, - ) - - bundle = assembler.assemble("DTXSID9999999") - assert "cheminformatics:toxprints" in bundle.data_gaps - assert "hazard:all" in bundle.data_gaps - - -def test_predictive_coordinator_success_flow(): - ad = ADCheckResult(in_domain=True, confidence=0.9) - service = _StubPredictiveService( - name="Stub", - ad_results=[ad], - payloads=[{"value": 42}], - ad_definition={ - "model": "Stub", - "version": "1", - "policy": "block", - "errorCode": "STUB_AD_FAIL", - }, - ) - coordinator = PredictiveCoordinator({"stub": service}) - task = PredictiveTask( - service="stub", request=PredictiveRequest(chemical_identifier="DTXSID0001") - ) - - result = coordinator.run([task]) - - assert result.succeeded is True - assert len(result.guardrails) == 0 - assert result.results[0].prediction == {"value": 42} - assert service.ad_checks >= 2 - assert service.predictions == 1 - - -def test_predictive_coordinator_blocks_on_ad_failure(): - ad = ADCheckResult(in_domain=False, confidence=0.3) - service = _StubPredictiveService( - name="Blocked", - ad_results=[ad], - payloads=[{"value": 1}], - ad_definition={ - "model": "Blocked", - "version": "1", - "policy": "block", - "errorCode": "BLOCKED_AD", - }, - ) - coordinator = PredictiveCoordinator({"blocked": service}) - task = PredictiveTask( - service="blocked", request=PredictiveRequest(chemical_identifier="DTXSID0002") - ) - - result = coordinator.run([task], require_ad_clearance=True) - - assert result.succeeded is False - assert result.results[0].status == "denied" - assert result.guardrails[0].status == "denied" - assert result.guardrails[0].code == "BLOCKED_AD" - # predict never invoked when AD fails hard - assert service.predictions == 0 - - -def test_predictive_coordinator_warn_policy_continues(): - ad = ADCheckResult(in_domain=False, confidence=0.55) - service = _StubPredictiveService( - name="Warning", - ad_results=[ad], - payloads=[{"value": 7}], - ad_definition={ - "model": "Warning", - "version": "1", - "policy": "warn", - "errorCode": "WARN_AD", - }, - ) - coordinator = PredictiveCoordinator( - {"warning": service}, default_require_ad_clearance=False - ) - task = PredictiveTask( - service="warning", request=PredictiveRequest(chemical_identifier="DTXSID0003") - ) - - result = coordinator.run([task]) - - assert result.succeeded is True - assert result.results[0].status == "success" - assert len(result.guardrails) == 1 - assert result.guardrails[0].status == "warning" - assert result.guardrails[0].code == "WARN_AD" - assert service.predictions == 1 - - -def test_genra_orchestrator_successful_bundle(tmp_path): - hazard_resource = _mock_resource(HazardResource) - exposure_resource = _mock_resource(ExposureResource) - cheminformatics_resource = _mock_resource(CheminformaticsResource) - chemical_resource = mock.Mock() - - hazard_resource.search_hazard.return_value = [{"hazard": 1}] - hazard_resource.get_last_metadata.return_value = {} - exposure_resource.search_httk.return_value = [{"httk": 2}] - exposure_resource.search_cpdat.return_value = [{"fc": "cat"}] - exposure_resource.get_last_metadata.return_value = {} - cheminformatics_resource.search_toxprints.return_value = {"toxprints": []} - cheminformatics_resource.get_last_metadata.return_value = {} - - chemical_resource.search_chemical.return_value = [ - {"dtxsid": "DTXSID0000001", "preferredName": "Example"} - ] - chemical_resource.get_chemical_details.return_value = { - "dtxsid": "DTXSID0000001", - "preferredName": "Example", - "casrn": "50-00-0", - } - chemical_resource.get_last_metadata.return_value = {} - - resolver = IdentifierResolver(chemical_resource=chemical_resource, cache_ttl=0) - assembler = CtxDataAssembler( - hazard_resource=hazard_resource, - exposure_resource=exposure_resource, - cheminformatics_resource=cheminformatics_resource, - include_toxprints=False, - cache_ttl=0, - ) - predictive_service = _StubPredictiveService( - name="Stub", - ad_results=[ADCheckResult(in_domain=True, confidence=0.9)], - payloads=[{"prediction": "ok"}], - ad_definition={ - "model": "Stub", - "version": "1", - "policy": "block", - "errorCode": "GENRA_AD_FAIL", - }, - ) - coordinator = PredictiveCoordinator({"stub": predictive_service}) - orchestrator = GenRAOrchestrator( - identifier_resolver=resolver, - ctx_data_assembler=assembler, - predictive_coordinator=coordinator, - persistence_dir=tmp_path, - clock=lambda: "2025-03-26T00:00:00Z", - ) - - bundle = orchestrator.run_workflow( - target_identifier="50-00-0", - identifier_type="casrn", - scenarios=["genra_read_across"], - predictive_plan=[ - PredictiveTask( - service="stub", - request=PredictiveRequest(chemical_identifier="DTXSID0000001"), - ) - ], - ) - - assert bundle["status"] == "success" - assert bundle["target"]["dtxsid"] == "DTXSID0000001" - assert bundle["ctxData"]["hazard"]["all"][0]["hazard"] == 1 - assert bundle["predictive"]["results"][0]["prediction"] == {"prediction": "ok"} - assert bundle["evidence"]["confidenceBand"] in {"Robust", "Limited", "Unavailable"} - run_dir = tmp_path / bundle["workflowRunId"] - bundle_path = run_dir / "bundle.json" - metadata_path = run_dir / "metadata.json" - attachments_dir = run_dir / "attachments" - assert bundle_path.exists() - assert metadata_path.exists() - assert (attachments_dir / "ctx_data.json").exists() - assert (attachments_dir / "predictive_results.json").exists() - assert (attachments_dir / "evidence.json").exists() - assert len(bundle["storage"]["attachments"]) >= 3 - assert bundle["storage"]["bundleChecksum"] - - -def test_predictive_coordinator_records_prediction_errors(): - ad = ADCheckResult(in_domain=True, confidence=0.8) - service = _StubPredictiveService( - name="Error", - ad_results=[ad], - payloads=[], # triggers runtime error inside predict - ad_definition={ - "model": "Error", - "version": "1", - "policy": "block", - "errorCode": "ERR_AD", - }, - ) - coordinator = PredictiveCoordinator({"error": service}) - task = PredictiveTask( - service="error", request=PredictiveRequest(chemical_identifier="DTXSID0004") - ) - - result = coordinator.run([task]) - - assert result.succeeded is False - assert result.results[0].status == "error" - assert result.guardrails[0].status == "error" - assert "No payload" in result.results[0].error - -==================================================================================================== -FILE: tests/test_predictive_regression.py -==================================================================================================== -from __future__ import annotations - -import json -from pathlib import Path -from typing import Any, Dict - -from fastapi import FastAPI -from fastapi.testclient import TestClient - -from epacomp_tox.metadata.applicability import ApplicabilityDomainStore -from epacomp_tox.predictive import ( - ADCheckResult, - OperaPropertyService, - PredictiveRequest, - PredictiveServiceBase, - TestConsensusPredictiveService, - build_predictive_router, -) -from epacomp_tox.predictive.clients import PredictiveClient - - -class StubClient(PredictiveClient): - def __init__(self, *, response, in_domain: bool, confidence: float = 0.9): - self.response = response - self.in_domain = in_domain - self.confidence = confidence - - def predict(self, request: PredictiveRequest): - return self.response - - def check_applicability_domain(self, request: PredictiveRequest) -> ADCheckResult: - return ADCheckResult(in_domain=self.in_domain, confidence=self.confidence) - - -def _write_ad( - tmp_path: Path, name: str, policy: str, error_code: str | None = None -) -> ApplicabilityDomainStore: - directory = tmp_path / "ad" - directory.mkdir() - payload = { - "model": name, - "version": "1", - "criteria": [], - "policy": policy, - } - if error_code: - payload["errorCode"] = error_code - (directory / "entry.json").write_text(json.dumps(payload)) - return ApplicabilityDomainStore(directory=directory) - - -def _create_client(app) -> TestClient: - return TestClient(app) - - -class _SchemaStubService(PredictiveServiceBase): - def __init__(self) -> None: - super().__init__( - config={ - "name": "schema-stub", - "version": "0.0.1", - } - ) - - def _predict_impl(self, request: PredictiveRequest) -> Dict[str, Any]: - return {"value": 42, "identifier": request.chemical_identifier} - - def _check_ad_impl(self, request: PredictiveRequest) -> ADCheckResult: - return ADCheckResult( - in_domain=True, confidence=0.99, details={"policy": "allow"} - ) - - -def test_block_policy_returns_error(tmp_path: Path) -> None: - ad_store = _write_ad( - tmp_path, "TEST Consensus Acute Toxicity", "block", "TEST_AD_FAIL" - ) - service = TestConsensusPredictiveService( - config={ - "name": "TEST Consensus Acute Toxicity", - "version": "5.2.0", - "ad_model_name": "TEST Consensus Acute Toxicity", - }, - client=StubClient(response={"value": 1.23}, in_domain=False), - ad_store=ad_store, - ) - router = build_predictive_router(service_factory=lambda: service, prefix="/test") - app = FastAPI() - app.include_router(router) - client = _create_client(app) - response = client.post("/test/predict", json={"chemical_identifier": "DTXSID1"}) - assert response.status_code == 400 - assert "TEST_AD_FAIL" in response.json()["detail"] - - -def test_warn_policy_allows_response(tmp_path: Path) -> None: - ad_store = _write_ad( - tmp_path, "OPERA Property Predictions", "warn", "OPERA_AD_WARN" - ) - service = OperaPropertyService( - config={ - "name": "OPERA Property Predictions", - "version": "3.6.1", - "ad_model_name": "OPERA Property Predictions", - }, - client=StubClient(response={"value": 0.5}, in_domain=False), - ad_store=ad_store, - ) - router = build_predictive_router(service_factory=lambda: service, prefix="/opera") - app = FastAPI() - app.include_router(router) - client = _create_client(app) - response = client.post("/opera/predict", json={"chemical_identifier": "DTXSID2"}) - assert response.status_code == 200 - body = response.json() - assert body["metadata"]["adWarning"] is True - assert "OPERA_AD_WARN" in body["metadata"]["adMessage"] - - -def test_predictive_router_validates_responses(monkeypatch) -> None: - service = _SchemaStubService() - router = build_predictive_router(service_factory=lambda: service, prefix="/schema") - app = FastAPI() - app.include_router(router) - client = _create_client(app) - - recorded: list[tuple[str, str]] = [] - - def _fake_validate(payload, *, namespace, name): # type: ignore[override] - recorded.append((namespace, name)) - - monkeypatch.setattr( - "epacomp_tox.predictive.router.validate_payload", _fake_validate - ) - - resp = client.post("/schema/predict", json={"chemical_identifier": "DTXSID3"}) - assert resp.status_code == 200 - ad_resp = client.post( - "/schema/check_applicability_domain", json={"chemical_identifier": "DTXSID3"} - ) - assert ad_resp.status_code == 200 - - assert ("predictive", "predict.response.schema") in recorded - assert ("predictive", "ad_check.response.schema") in recorded - -==================================================================================================== -FILE: tests/workflows/test_offline_workflows.py -==================================================================================================== -from __future__ import annotations - -from pathlib import Path -from typing import Dict, List - -import pytest - -from epacomp_tox import PredictiveRequest, PredictiveTask -from epacomp_tox.orchestrator.offline import ( - OFFLINE_SCENARIOS, - build_offline_orchestrator, -) - - -def _sanitize_bundle(bundle: Dict[str, any]) -> Dict[str, any]: - predictive = bundle["predictive"]["results"][0] - metadata = predictive["metadata"] - return { - "status": bundle["status"], - "scenarios": bundle.get("scenarios"), - "target": { - "dtxsid": bundle["target"]["dtxsid"], - "preferredName": bundle["target"].get("preferredName"), - "casrn": bundle["target"].get("casrn"), - "synonyms": sorted(bundle["target"].get("synonyms", [])), - }, - "guardrails": bundle.get("guardrails", []), - "ctxData": { - "hazardEndpoints": [ - item.get("endpoint") - for item in bundle["ctxData"]["hazard"].get("all", []) - ], - "exposureKeys": sorted(bundle["ctxData"]["exposure"].keys()), - "cheminformaticsKeys": sorted(bundle["ctxData"]["cheminformatics"].keys()), - "dataGaps": bundle["ctxData"].get("dataGaps", []), - }, - "predictive": { - "service": predictive["service"], - "status": predictive["status"], - "scenario": predictive.get("scenario"), - "prediction": predictive["prediction"], - "ad": predictive["ad"], - "metadata": { - "model": metadata.get("model"), - "model_version": metadata.get("model_version"), - "identifier": metadata.get("identifier"), - "identifier_type": metadata.get("identifier_type"), - "analogueCoverage": metadata.get("analogueCoverage"), - "evidenceQuality": metadata.get("evidenceQuality"), - "predictiveAgreement": metadata.get("predictiveAgreement"), - }, - }, - "evidence": { - "confidenceBand": bundle["evidence"].get("confidenceBand"), - "scores": bundle["evidence"].get("scores"), - "recommendedActions": bundle["evidence"].get("recommendedActions"), - }, - } - - -def _expected_snapshot(scenario: str) -> Dict[str, any]: - exposure_lookup = { - "acute_toxicity": ["cpdat:fc", "httk"], - "exposure_prioritization": [ - "cpdat:fc", - "cpdat:puc", - "httk", - "pathways", - "seem", - ], - "genra_read_across": ["cpdat:fc", "httk", "qsurs"], - } - return { - "status": "success", - "scenarios": [scenario], - "target": { - "dtxsid": "DTXSID0000001", - "preferredName": "Offline Example", - "casrn": "50-00-0", - "synonyms": ["Formaldehyde", "Methanal"], - }, - "guardrails": [], - "ctxData": { - "hazardEndpoints": ["Acute toxicity"], - "exposureKeys": exposure_lookup[scenario], - "cheminformaticsKeys": ["toxprints"], - "dataGaps": [], - }, - "predictive": { - "service": "offline_genra", - "status": "success", - "scenario": scenario, - "prediction": { - "prediction": "Read-across suggests low concern.", - "confidence": 0.82, - }, - "ad": { - "in_domain": True, - "confidence": 0.85, - "details": {"analogues": 4}, - }, - "metadata": { - "model": "Offline GenRA", - "model_version": "0.1", - "identifier": "DTXSID0000001", - "identifier_type": "dtxsid", - "analogueCoverage": 0.88, - "evidenceQuality": 0.74, - "predictiveAgreement": 0.85, - }, - }, - "evidence": { - "confidenceBand": "Limited", - "scores": { - "analogue_coverage": 0.88, - "evidence_quality": 0.74, - "predictive_agreement": 0.85, - }, - "recommendedActions": [ - "Seek SME review", - "Augment analogue set or supporting evidence", - ], - }, - } - - -@pytest.mark.parametrize("scenario", OFFLINE_SCENARIOS) -def test_offline_orchestrator_scenarios(tmp_path: Path, scenario: str) -> None: - orchestrator = build_offline_orchestrator( - persistence_dir=tmp_path, - clock=lambda: "2025-03-26T00:00:00Z", - ) - bundle = orchestrator.run_workflow( - target_identifier="50-00-0", - identifier_type="casrn", - scenarios=[scenario], - predictive_plan=[ - PredictiveTask( - service="offline_genra", - scenario=scenario, - request=PredictiveRequest(chemical_identifier="DTXSID0000001"), - ) - ], - ) - - sanitized = _sanitize_bundle(bundle) - assert sanitized == _expected_snapshot(scenario) - - run_dir = tmp_path / bundle["workflowRunId"] - bundle_path = run_dir / "bundle.json" - metadata_path = run_dir / "metadata.json" - attachments_dir = run_dir / "attachments" - - assert bundle_path.exists() - assert metadata_path.exists() - assert attachments_dir.exists() - - attachment_names = {path.name for path in attachments_dir.iterdir()} - assert {"ctx_data.json", "predictive_results.json", "evidence.json"}.issubset( - attachment_names - ) - - storage_meta = bundle.get("storage") or {} - assert storage_meta.get("bundlePath") == str(bundle_path.relative_to(tmp_path)) - assert storage_meta.get("bundleChecksum") - - -def test_audit_bundle_store_lists_runs(tmp_path: Path) -> None: - orchestrator = build_offline_orchestrator( - persistence_dir=tmp_path, - clock=lambda: "2025-03-26T00:00:00Z", - ) - bundle_ids: List[str] = [] - for scenario in OFFLINE_SCENARIOS: - result = orchestrator.run_workflow( - target_identifier="50-00-0", - identifier_type="casrn", - scenarios=[scenario], - predictive_plan=[ - PredictiveTask( - service="offline_genra", - scenario=scenario, - request=PredictiveRequest(chemical_identifier="DTXSID0000001"), - ) - ], - ) - bundle_ids.append(result["workflowRunId"]) - - store = orchestrator.bundle_store - assert store is not None - runs = store.list_runs() - assert len(runs) == len(OFFLINE_SCENARIOS) - retrieved_ids = {row["workflowRunId"] for row in runs} - assert retrieved_ids == set(bundle_ids) - -==================================================================================================== -FILE: tests/test_domain_contracts.py -==================================================================================================== -from __future__ import annotations - -import json -from pathlib import Path - -from jsonschema import Draft202012Validator - -from epacomp_tox.resources.bioactivity import BioactivityResource -from epacomp_tox.resources.exposure import ExposureResource -from epacomp_tox.resources.hazard import HazardResource -from epacomp_tox.resources.interop import InteropResource - -SCHEMA_PATHS = [ - Path("docs/contracts/schemas/hazard/search_hazard.response.schema.json"), - Path("docs/contracts/schemas/hazard/batch_search_hazard.response.schema.json"), - Path("docs/contracts/schemas/exposure/search_cpdat.response.schema.json"), - Path("docs/contracts/schemas/exposure/search_httk.response.schema.json"), - Path("docs/contracts/schemas/exposure/get_exposure_httk.response.schema.json"), - Path( - "docs/contracts/schemas/bioactivity/search_bioactivity_terms.response.schema.json" - ), - Path( - "docs/contracts/schemas/bioactivity/get_bioactivity_summary_by_dtxsid.response.schema.json" - ), - Path( - "docs/contracts/schemas/bioactivity/get_bioactivity_assay.response.schema.json" - ), - Path("docs/contracts/schemas/bioactivity/get_bioactivity_aop.response.schema.json"), - Path("docs/contracts/schemas/workflow/aop_linkage_summary.response.schema.json"), - Path("docs/contracts/schemas/workflow/pbpk_context_bundle.response.schema.json"), - Path("docs/contracts/schemas/workflow/comptox_evidence_pack.response.schema.json"), -] - - -def _load_json(path: Path) -> dict: - with path.open("r", encoding="utf-8") as handle: - return json.load(handle) - - -def _tool_map(resource) -> dict[str, dict]: - return {tool["name"]: tool for tool in resource.get_tools()} - - -def test_domain_response_schemas_are_valid() -> None: - for path in SCHEMA_PATHS: - Draft202012Validator.check_schema(_load_json(path)) - - -def test_hazard_tools_use_domain_specific_response_schemas() -> None: - tools = _tool_map(HazardResource(api_key="fake")) - assert tools["search_hazard"]["responseSchemaRef"] == { - "namespace": "hazard", - "name": "search_hazard.response.schema", - } - assert tools["batch_search_hazard"]["responseSchemaRef"] == { - "namespace": "hazard", - "name": "batch_search_hazard.response.schema", - } - - -def test_exposure_tools_use_domain_specific_response_schemas() -> None: - tools = _tool_map(ExposureResource(api_key="fake")) - assert tools["search_cpdat"]["responseSchemaRef"] == { - "namespace": "exposure", - "name": "search_cpdat.response.schema", - } - assert tools["search_httk"]["responseSchemaRef"] == { - "namespace": "exposure", - "name": "search_httk.response.schema", - } - assert tools["get_exposure_httk"]["responseSchemaRef"] == { - "namespace": "exposure", - "name": "get_exposure_httk.response.schema", - } - - -def test_bioactivity_tools_use_domain_specific_response_schemas() -> None: - tools = _tool_map(BioactivityResource(api_key="fake")) - assert tools["search_bioactivity_terms"]["responseSchemaRef"] == { - "namespace": "bioactivity", - "name": "search_bioactivity_terms.response.schema", - } - assert tools["get_bioactivity_summary_by_dtxsid"]["responseSchemaRef"] == { - "namespace": "bioactivity", - "name": "get_bioactivity_summary_by_dtxsid.response.schema", - } - assert tools["get_bioactivity_assay"]["responseSchemaRef"] == { - "namespace": "bioactivity", - "name": "get_bioactivity_assay.response.schema", - } - assert tools["get_bioactivity_aop"]["responseSchemaRef"] == { - "namespace": "bioactivity", - "name": "get_bioactivity_aop.response.schema", - } - - -def test_workflow_tools_use_domain_specific_response_schemas() -> None: - tools = _tool_map(InteropResource(api_key="fake")) - assert tools["assemble_comptox_evidence_pack"]["responseSchemaRef"] == { - "namespace": "workflow", - "name": "comptox_evidence_pack.response.schema", - } - assert tools["build_aop_linkage_summary"]["responseSchemaRef"] == { - "namespace": "workflow", - "name": "aop_linkage_summary.response.schema", - } - assert tools["build_pbpk_context_bundle"]["responseSchemaRef"] == { - "namespace": "workflow", - "name": "pbpk_context_bundle.response.schema", - } - -==================================================================================================== -FILE: tests/test_cross_suite_handoffs.py -==================================================================================================== -from __future__ import annotations - -import json -from pathlib import Path - -from tests.interop_test_support import ( - build_interop_resource, - sanitize_aop_handoff, - sanitize_pbpk_handoff, - validate_portable_schema, -) - -FIXTURES_DIR = Path(__file__).parent / "fixtures" / "cross_suite" - - -def _load_fixture(name: str) -> dict: - return json.loads((FIXTURES_DIR / name).read_text(encoding="utf-8")) - - -def test_comptox_to_aop_handoff_matches_fixture() -> None: - interop = build_interop_resource() - result = interop.execute_tool( - "build_aop_linkage_summary", - {"dtxsid": "DTXSID7020182", "max_assays": 5}, - ) - - validate_portable_schema("aopLinkageSummary.v1.json", result) - assert sanitize_aop_handoff(result) == _load_fixture("comptox_to_aop_handoff.json") - - -def test_comptox_to_pbpk_handoff_matches_fixture() -> None: - interop = build_interop_resource() - result = interop.execute_tool( - "build_pbpk_context_bundle", - {"dtxsid": "DTXSID7020182"}, - ) - - validate_portable_schema("pbpkContextBundle.v1.json", result) - assert sanitize_pbpk_handoff(result) == _load_fixture( - "comptox_to_pbpk_handoff.json" - ) - -==================================================================================================== -FILE: metadata/model_cards/genra_read_across.json -==================================================================================================== -{ - "schemaVersion": "1.0", - "modelDetails": { - "name": "GenRA Read-Across Workflow", - "version": "2.1.0", - "modelType": "Read-Across", - "description": "Generalized read-across workflow combining analogue search, evidence weighting, and prediction synthesis.", - "developers": [ - { - "name": "EPA Computational Toxicology" - } - ], - "organizations": [ - "US EPA" - ], - "releaseDate": "2025-03-05", - "license": "EPA Terms of Use" - }, - "intendedUse": { - "summary": "Supports regulatory read-across decisions for data gap filling and hazard assessment.", - "inScope": [ - "Organic chemicals with available ToxCast/ToxVal analogues" - ], - "outOfScope": [ - "Chemicals lacking sufficient analogue coverage", - "Mixtures" - ], - "limitations": [ - "Requires SME review when analogue similarity < 0.7." - ], - "warnings": [ - "Document evidence narrative before external submission." - ], - "regulatoryPrograms": [ - "TSCA New Chemicals", - "OECD Cooperative Chemicals Assessment" - ] - }, - "oecdValidationPrinciples": { - "definedEndpoint": { - "description": "Endpoints inherited from analogue dataset (e.g., repeat-dose toxicity LOAEL)", - "unit": "varies by endpoint" - }, - "unambiguousAlgorithm": { - "summary": "Analogue search using structural fingerprints, evidence scoring across data streams, Bayesian-weighted prediction aggregation.", - "methodClass": "Read-Across", - "implementation": "GenRA Service 2.1", - "references": [ - { - "citation": "Patlewicz et al. 2015", - "doi": "10.1093/toxsci/kfv169" - } - ] - }, - "definedApplicabilityDomain": { - "summary": "Assess analogue availability, structural similarity, and metadata completeness before generating predictions.", - "relatedTools": [ - "genra.check_applicability_domain" - ], - "references": [ - { - "citation": "GenRA Technical Manual 2025" - } - ] - }, - "goodnessOfFitMetrics": { - "internalValidation": [ - { - "name": "Coverage", - "value": 0.78, - "dataset": "historical read-across cases" - } - ], - "externalValidation": [ - { - "name": "Accuracy", - "value": 0.72, - "dataset": "case studies" - }, - { - "name": "Precision", - "value": 0.69, - "dataset": "case studies" - } - ] - }, - "mechanisticInterpretation": { - "summary": "Evidence weighting prioritizes analogues sharing mode-of-action descriptors and toxicity pathways.", - "confidence": "moderate" - } - }, - "trainingData": { - "dataset": { - "name": "GenRA Analogue Library 2025", - "source": "EPA CompTox", - "description": "Curated analogue relationships with experimental endpoints" - }, - "records": 1200, - "chemicalCount": 850, - "descriptorCount": 60, - "preprocessing": "Harmonized identifiers, removal of conflicting analogue evidence, assignment of mode-of-action tags." - }, - "evaluationData": { - "datasets": [ - { - "name": "GenRA Case Studies", - "source": "EPA Internal", - "description": "Historical regulatory read-across decisions" - } - ], - "validationApproach": "Leave-one-target-out analogue removal", - "metrics": [ - { - "name": "Balanced Accuracy", - "value": 0.71, - "dataset": "case studies" - }, - { - "name": "Coverage", - "value": 0.76, - "dataset": "case studies" - } - ], - "applicabilityDomainCoverage": 0.82 - }, - "applicabilityDomain": { - "summary": "Composite checks for analogue similarity, data completeness, and evidence diversity.", - "criteria": [ - { - "type": "similarity", - "description": "At least three structural analogues with Tanimoto similarity >= 0.7.", - "parameters": { - "threshold": 0.7, - "minAnalogues": 3 - } - }, - { - "type": "coverage", - "description": "Analogues must span at least two evidence domains (in vivo, in vitro, in silico).", - "parameters": { - "minDomains": 2 - } - }, - { - "type": "expert_rule", - "description": "Mode-of-action tags must align across selected analogues.", - "parameters": { - "allowableMismatch": 1 - } - } - ], - "enforcement": { - "mcpTools": [ - "genra.check_applicability_domain" - ], - "policy": "block", - "errorCodes": [ - "GENRA_AD_FAIL" - ] - }, - "confidenceBands": [ - { - "label": "Robust", - "minConfidence": 0.8, - "actions": [ - "Eligible for automated dossier generation" - ] - }, - { - "label": "Limited", - "minConfidence": 0.5, - "actions": [ - "Requires SME justification and documentation" - ] - } - ] - }, - "ethicalConsiderations": { - "risks": [ - "Analogues may introduce hidden biases when evidence base is uneven." - ], - "mitigations": [ - "Require documentation of analogue selection rationale and SME oversight." - ], - "humanOversight": "SME approval mandated for final predictions and evidence narratives." - }, - "provenance": { - "sourceRepositories": [ - "https://github.com/epa/genra" - ], - "build": { - "id": "genra-build-2025-03-05", - "timestamp": "2025-03-05T09:15:00Z", - "environment": "EPA CICD" - }, - "checksum": { - "algorithm": "SHA256", - "value": "3ce4ec4983d3e7c6b2089b967679f5fc293096750293eb98d2b211f780a1f95e" - }, - "reviewStatus": { - "approvedBy": [ - { - "name": "Regulatory Affairs Read-Across Committee" - } - ], - "approvalDate": "2025-03-10" - } - } -} - -==================================================================================================== -FILE: metadata/model_cards/opera_property.json -==================================================================================================== -{ - "schemaVersion": "1.0", - "modelDetails": { - "name": "OPERA Property Predictions", - "version": "3.6.1", - "modelType": "QSAR", - "description": "Predicts physicochemical properties (LogP, water solubility, vapor pressure) using OPERA ensemble models.", - "developers": [ - { - "name": "NIEHS NICEATM" - }, - { - "name": "EPA Computational Toxicology" - } - ], - "organizations": [ - "US EPA", - "NIEHS" - ], - "releaseDate": "2025-02-20", - "license": "OPERA EULA" - }, - "intendedUse": { - "summary": "Supports exposure assessment workflows requiring physicochemical property estimates for organic chemicals.", - "inScope": [ - "Neutral organic chemicals", - "Screening-level exposure modelling" - ], - "outOfScope": [ - "Inorganic substances", - "Highly ionized species" - ], - "limitations": [ - "Predictions outside training descriptor ranges may be unreliable." - ], - "warnings": [ - "Verify units when integrating with downstream PBPK models." - ], - "regulatoryPrograms": [ - "TSCA Existing Chemicals", - "REACH dossier support" - ] - }, - "oecdValidationPrinciples": { - "definedEndpoint": { - "description": "LogP, water solubility (log mol/L), vapor pressure (log Pa)", - "unit": "log scale" - }, - "unambiguousAlgorithm": { - "summary": "Random forest and support vector regression ensembles with descriptor selection.", - "methodClass": "Ensemble", - "implementation": "OPERA CLI 3.6", - "references": [ - { - "citation": "Mansouri et al. 2018", - "doi": "10.1021/acs.jcim.7b00524" - } - ] - }, - "definedApplicabilityDomain": { - "summary": "Combines leverage statistics with similarity to nearest neighbors in descriptor space.", - "relatedTools": [ - "opera.check_applicability_domain" - ], - "references": [ - { - "citation": "OPERA Technical Documentation 2024" - } - ] - }, - "goodnessOfFitMetrics": { - "internalValidation": [ - { - "name": "R2", - "value": 0.92, - "dataset": "training", - "description": "LogP" - } - ], - "externalValidation": [ - { - "name": "RMSE", - "value": 0.31, - "dataset": "external", - "description": "LogP", - "units": "log" - }, - { - "name": "RMSE", - "value": 0.45, - "dataset": "external", - "description": "Water Solubility", - "units": "log mol/L" - } - ] - }, - "mechanisticInterpretation": { - "summary": "Descriptors capture polar surface area, hydrogen bonding, and fragment counts aligned with property trends.", - "confidence": "moderate" - } - }, - "trainingData": { - "dataset": { - "name": "OPERA Training Library 2024", - "source": "EPA CompTox", - "description": "Consolidated experimental property measurements" - }, - "records": 2500, - "chemicalCount": 2200, - "descriptorCount": 45, - "preprocessing": "Standardized structures (neutralized), removal of salts, descriptor scaling.", - "classBalance": "Continuous endpoints" - }, - "evaluationData": { - "datasets": [ - { - "name": "OPERA External Validation", - "source": "EPA CompTox", - "description": "Hold-out dataset of curated property measurements" - } - ], - "validationApproach": "80/20 train-test split with 5-fold cross-validation", - "metrics": [ - { - "name": "MAE", - "value": 0.28, - "dataset": "external", - "description": "LogP" - }, - { - "name": "R2", - "value": 0.85, - "dataset": "external", - "description": "Vapor Pressure" - } - ], - "applicabilityDomainCoverage": 0.9 - }, - "applicabilityDomain": { - "summary": "Descriptor range checks plus nearest-neighbor similarity enforced before prediction delivery.", - "criteria": [ - { - "type": "descriptor_range", - "description": "Each descriptor must fall within training min/max after scaling.", - "parameters": { - "mode": "min_max" - } - }, - { - "type": "similarity", - "description": "Average Tanimoto similarity to top 5 training neighbors >= 0.6.", - "parameters": { - "threshold": 0.6, - "neighbors": 5 - } - } - ], - "enforcement": { - "mcpTools": [ - "opera.check_applicability_domain" - ], - "policy": "warn", - "errorCodes": [ - "OPERA_AD_WARN" - ] - }, - "confidenceBands": [ - { - "label": "High", - "minConfidence": 0.75, - "actions": [ - "Auto-approve" - ] - }, - { - "label": "Low", - "minConfidence": 0.5, - "actions": [ - "Escalate to SME" - ] - } - ], - "references": [ - { - "citation": "OPERA Manual 2025" - } - ] - }, - "ethicalConsiderations": { - "risks": [ - "Limited coverage for highly polar or reactive chemicals." - ], - "mitigations": [ - "Flag low-confidence predictions for manual review." - ] - }, - "provenance": { - "sourceRepositories": [ - "https://github.com/kmansouri/OPERA" - ], - "build": { - "id": "opera-build-2025-02-20", - "timestamp": "2025-02-20T14:30:00Z", - "environment": "GitHub Actions" - }, - "checksum": { - "algorithm": "SHA256", - "value": "79af18b3515e9a1d69037e2a154c7c6088cf3fae8c388ff901abdadf5a304a52" - }, - "reviewStatus": { - "approvedBy": [], - "notes": "Pending SME review" - } - } -} - -==================================================================================================== -FILE: metadata/model_cards/test_consensus.json -==================================================================================================== -{ - "schemaVersion": "1.0", - "modelDetails": { - "name": "TEST Consensus Acute Toxicity", - "version": "5.2.0", - "modelType": "QSAR", - "description": "Predicts acute aquatic toxicity using consensus of TEST models.", - "developers": [ - { - "name": "EPA Computational Toxicology" - } - ], - "organizations": [ - "US EPA" - ], - "releaseDate": "2025-01-15", - "license": "EPA Terms of Use" - }, - "intendedUse": { - "summary": "Supports screening-level acute aquatic toxicity assessments for organic chemicals.", - "inScope": [ - "Non-ionic organic chemicals", - "Screening-level prioritization" - ], - "outOfScope": [ - "Ionic species", - "Metals" - ], - "limitations": [ - "Do not apply to mixtures without expert review." - ], - "warnings": [ - "Use applicability domain checks prior to decision-making." - ], - "regulatoryPrograms": [ - "TSCA New Chemicals" - ] - }, - "oecdValidationPrinciples": { - "definedEndpoint": { - "description": "96-hour fathead minnow LC50", - "unit": "mg/L", - "speciesOrSystem": "Pimephales promelas" - }, - "unambiguousAlgorithm": { - "summary": "Consensus of multiple QSAR models combining regression and classification outputs.", - "methodClass": "Ensemble", - "implementation": "TEST v5.2", - "references": [ - { - "citation": "TEST user manual 2024" - } - ] - }, - "definedApplicabilityDomain": { - "summary": "Leverage and descriptor range checks against training set.", - "references": [ - { - "citation": "Mansouri et al. 2018", - "doi": "10.1021/acs.jcim.7b00524" - } - ], - "relatedTools": [ - "test.check_applicability_domain" - ] - }, - "goodnessOfFitMetrics": { - "internalValidation": [ - { - "name": "R2", - "value": 0.81, - "dataset": "training" - } - ], - "externalValidation": [ - { - "name": "Q2", - "value": 0.74, - "dataset": "external" - }, - { - "name": "RMSE", - "value": 0.45, - "units": "log10" - } - ] - }, - "mechanisticInterpretation": { - "summary": "Descriptors capture hydrophobicity and molecular size consistent with narcosis mode of action.", - "confidence": "moderate" - } - }, - "trainingData": { - "dataset": { - "name": "TEST Training Set 2024", - "source": "EPA CompTox", - "description": "Curated LC50 dataset for freshwater species" - }, - "records": 580, - "chemicalCount": 560, - "descriptorCount": 35, - "preprocessing": "Standardization of chemical identifiers and removal of salts." - }, - "evaluationData": { - "datasets": [ - { - "name": "Fathead Minnow External", - "source": "EPA AQUIRE", - "description": "Independent validation dataset" - } - ], - "validationApproach": "Hold-out external validation", - "metrics": [ - { - "name": "RMSE", - "value": 0.52, - "dataset": "external", - "units": "log10" - } - ], - "applicabilityDomainCoverage": 0.88 - }, - "applicabilityDomain": { - "summary": "Combines leverage thresholds with descriptor range checks.", - "criteria": [ - { - "type": "descriptor_range", - "description": "All descriptors must fall within 5th-95th percentile of training set.", - "parameters": { - "percentileLower": 0.05, - "percentileUpper": 0.95 - } - }, - { - "type": "similarity", - "description": "Tanimoto similarity to nearest neighbor must exceed 0.65.", - "parameters": { - "threshold": 0.65 - } - } - ], - "enforcement": { - "mcpTools": [ - "test.check_applicability_domain" - ], - "policy": "block", - "errorCodes": [ - "TEST_AD_FAIL" - ] - }, - "confidenceBands": [ - { - "label": "High", - "minConfidence": 0.8, - "actions": [ - "Eligible for automated workflow" - ] - }, - { - "label": "Moderate", - "minConfidence": 0.6, - "actions": [ - "Requires SME review" - ] - } - ] - }, - "ethicalConsiderations": { - "risks": [ - "Model is biased toward narcosis-class chemicals." - ], - "mitigations": [ - "Flag predictions with low similarity for SME review." - ], - "humanOversight": "Regulatory reviewer must approve high-impact predictions." - }, - "provenance": { - "sourceRepositories": [ - "https://github.com/epa/test" - ], - "build": { - "id": "build-2025-01-15", - "timestamp": "2025-01-15T10:00:00Z", - "environment": "GitHub Actions" - }, - "checksum": { - "algorithm": "SHA256", - "value": "4a2a288f4f9b15727ea63a2c70a786844bab608d75d0d70fd0d0d7e0dad32f90" - }, - "reviewStatus": { - "approvedBy": [ - { - "name": "Regulatory Affairs" - } - ], - "approvalDate": "2025-02-01" - } - } -} - -==================================================================================================== -FILE: metadata/applicability_domains/genra_read_across_ad.json -==================================================================================================== -{ - "model": "GenRA Read-Across Workflow", - "version": "2.1.0", - "criteria": [ - { - "type": "similarity", - "metric": "tanimoto", - "threshold": 0.7, - "minAnalogues": 3 - }, - { - "type": "coverage", - "requirements": ["in vivo", "in vitro"], - "minimumDomains": 2 - }, - { - "type": "expert_rule", - "rule": "Mode of action tags must align", - "allowableMismatch": 1 - } - ], - "policy": "block", - "errorCode": "GENRA_AD_FAIL", - "references": [ - { - "citation": "GenRA Technical Manual 2025" - } - ] -} - -==================================================================================================== -FILE: metadata/applicability_domains/opera_property_ad.json -==================================================================================================== -{ - "model": "OPERA Property Predictions", - "version": "3.6.1", - "criteria": [ - { - "type": "descriptor_range", - "descriptors": ["atomCount", "bondCount", "polarSurfaceArea"], - "range": {"mode": "min_max"} - }, - { - "type": "similarity", - "metric": "tanimoto", - "threshold": 0.6, - "neighbors": 5 - } - ], - "policy": "warn", - "errorCode": "OPERA_AD_WARN", - "references": [ - { - "citation": "OPERA Manual 2025" - } - ] -} - -==================================================================================================== -FILE: metadata/applicability_domains/test_consensus_ad.json -==================================================================================================== -{ - "model": "TEST Consensus Acute Toxicity", - "version": "5.2.0", - "criteria": [ - { - "type": "descriptor_range", - "descriptors": ["logS", "logP", "LUMO", "polarSurfaceArea"], - "range": {"lowerPercentile": 0.05, "upperPercentile": 0.95} - }, - { - "type": "similarity", - "metric": "tanimoto", - "threshold": 0.65, - "fingerprint": "pubchem" - } - ], - "policy": "block", - "errorCode": "TEST_AD_FAIL", - "references": [ - { - "citation": "Mansouri et al. 2018", - "doi": "10.1021/acs.jcim.7b00524" - } - ] -} diff --git a/scripts/manual/README.md b/scripts/manual/README.md new file mode 100644 index 0000000..6d5f2a3 --- /dev/null +++ b/scripts/manual/README.md @@ -0,0 +1,7 @@ +Manual helper scripts used for ad hoc local verification and debugging live outside the public package surface. + +- `start_epa_mcp.sh`: starts the local MCP server for manual testing. +- `test_epa_mcp_curl.sh`, `test_mcp_http.sh`, `test_legacy_uri.sh`: shell-based smoke checks for the transport layer. +- `epa_tool_runner.py`: JSON-RPC helper for direct `tools/call` execution against a local server. +- `test_api.py`, `test_chlorpyrifos_analysis.py`: one-off API probing scripts kept for manual diagnosis. +- `extract_api_structure.py`: captures a local CTX client method snapshot into ignored `artifacts/`. diff --git a/epa_tool_runner.py b/scripts/manual/epa_tool_runner.py similarity index 100% rename from epa_tool_runner.py rename to scripts/manual/epa_tool_runner.py diff --git a/extract_api_structure.py b/scripts/manual/extract_api_structure.py similarity index 83% rename from extract_api_structure.py rename to scripts/manual/extract_api_structure.py index 81c25f2..6ce4f4a 100644 --- a/extract_api_structure.py +++ b/scripts/manual/extract_api_structure.py @@ -1,7 +1,9 @@ -import os -import ctxpy as ctx import inspect import json +import os +from pathlib import Path + +import ctxpy as ctx # Initialize with API key from environment api_key = os.environ.get('CTX_API_KEY') or os.environ.get('EPA_COMPTOX_API_KEY') @@ -57,8 +59,12 @@ def extract_class_methods(cls, instance=None): 'search_toxprints': str(inspect.signature(ctx.search_toxprints)) } -# Save to file -with open('epa_comptox_api_structure.json', 'w') as f: +# Save to the repo-level ignored artifacts/ path so ad hoc snapshots stay out of the public tree. +repo_root = Path(__file__).resolve().parents[2] +output_path = repo_root / "artifacts" / "epa_comptox_api_structure.json" +output_path.parent.mkdir(parents=True, exist_ok=True) + +with output_path.open("w") as f: json.dump(api_structure, f, indent=2) -print('API structure extracted and saved to epa_comptox_api_structure.json') +print(f"API structure extracted and saved to {output_path}") diff --git a/test_api.py b/scripts/manual/test_api.py similarity index 100% rename from test_api.py rename to scripts/manual/test_api.py diff --git a/test_chlorpyrifos_analysis.py b/scripts/manual/test_chlorpyrifos_analysis.py similarity index 100% rename from test_chlorpyrifos_analysis.py rename to scripts/manual/test_chlorpyrifos_analysis.py diff --git a/src/epacomp_tox/assets.py b/src/epacomp_tox/assets.py new file mode 100644 index 0000000..f437675 --- /dev/null +++ b/src/epacomp_tox/assets.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import json +from importlib import resources +from typing import Any, Dict, Iterable, Optional + +DATA_PACKAGE = "epacomp_tox.data" + + +def data_root() -> Any: + """Return the packaged runtime data root.""" + return resources.files(DATA_PACKAGE) + + +def data_file(*parts: str) -> Any: + """Return a Traversable for a packaged runtime data file or directory.""" + current = data_root() + for part in parts: + current = current.joinpath(part) + return current + + +def read_json(*parts: str) -> Dict[str, Any]: + """Read JSON from a packaged runtime data file.""" + return json.loads(data_file(*parts).read_text(encoding="utf-8")) + + +def iter_data_files( + *parts: str, suffix: Optional[str] = None, recursive: bool = False +) -> Iterable[Any]: + """Iterate packaged runtime data files in deterministic name order.""" + base = data_file(*parts) + if not base.is_dir(): + return + for entry in sorted(base.iterdir(), key=lambda item: item.name): + if entry.name.startswith("."): + continue + if entry.is_dir(): + if recursive: + yield from iter_data_files( + *parts, entry.name, suffix=suffix, recursive=True + ) + continue + if suffix is None or entry.name.endswith(suffix): + yield entry diff --git a/src/epacomp_tox/contracts/__init__.py b/src/epacomp_tox/contracts/__init__.py index ec3e866..c947a42 100644 --- a/src/epacomp_tox/contracts/__init__.py +++ b/src/epacomp_tox/contracts/__init__.py @@ -2,30 +2,30 @@ import json from functools import lru_cache -from pathlib import Path from typing import Any, Dict, Tuple from jsonschema import Draft202012Validator -SCHEMA_ROOT = Path(__file__).resolve().parents[3] / "docs" / "contracts" / "schemas" +from epacomp_tox.assets import data_file class SchemaValidationError(RuntimeError): """Raised when a payload fails JSON Schema validation.""" -def _schema_path(namespace: str, name: str) -> Path: - return SCHEMA_ROOT / namespace / f"{name}.json" +def _schema_resource(namespace: str, name: str) -> Any: + return data_file("contracts", "schemas", namespace, f"{name}.json") @lru_cache(maxsize=128) def load_schema(namespace: str, name: str) -> Dict[str, Any]: """Load and cache a JSON Schema by namespace/name.""" - path = _schema_path(namespace, name) - if not path.exists(): - raise FileNotFoundError(f"Schema '{namespace}/{name}' not found at {path}") - with path.open("r", encoding="utf-8") as handle: - return json.load(handle) + resource = _schema_resource(namespace, name) + if not resource.is_file(): + raise FileNotFoundError( + f"Schema '{namespace}/{name}' not found in package data" + ) + return json.loads(resource.read_text(encoding="utf-8")) def validate_payload(payload: Any, *, namespace: str, name: str) -> None: diff --git a/src/epacomp_tox/data/__init__.py b/src/epacomp_tox/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/epacomp_tox/data/contracts/schemas/bioactivity/get_bioactivity_aop.response.schema.json b/src/epacomp_tox/data/contracts/schemas/bioactivity/get_bioactivity_aop.response.schema.json new file mode 100644 index 0000000..c384e2b --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/bioactivity/get_bioactivity_aop.response.schema.json @@ -0,0 +1,53 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "bioactivity.get_bioactivity_aop.response", + "description": "Adverse outcome pathway mapping records returned from CompTox bioactivity crosswalk queries.", + "type": "array", + "items": { + "type": "object", + "description": "A CompTox AOP mapping record.", + "additionalProperties": true, + "properties": { + "aopId": { + "type": [ + "string", + "number", + "null" + ] + }, + "eventNumber": { + "type": [ + "string", + "number", + "null" + ] + }, + "eventType": { + "type": [ + "string", + "null" + ] + }, + "eventLabel": { + "type": [ + "string", + "null" + ] + }, + "aeid": { + "type": [ + "string", + "number", + "null" + ] + }, + "entrezGeneId": { + "type": [ + "string", + "number", + "null" + ] + } + } + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/bioactivity/get_bioactivity_assay.response.schema.json b/src/epacomp_tox/data/contracts/schemas/bioactivity/get_bioactivity_assay.response.schema.json new file mode 100644 index 0000000..024c5aa --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/bioactivity/get_bioactivity_assay.response.schema.json @@ -0,0 +1,62 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "bioactivity.get_bioactivity_assay.response", + "description": "Assay annotation payload returned for all-assay, single AEID, single-concentration, or gene-scoped bioactivity assay queries.", + "anyOf": [ + { + "type": "object", + "description": "Structured assay payload returned by the upstream API.", + "additionalProperties": true, + "properties": { + "aeid": { + "type": [ + "string", + "number", + "null" + ] + }, + "assayName": { + "type": [ + "string", + "null" + ] + }, + "geneSymbol": { + "type": [ + "string", + "null" + ] + } + } + }, + { + "type": "array", + "items": { + "type": "object", + "description": "An assay annotation record.", + "additionalProperties": true, + "properties": { + "aeid": { + "type": [ + "string", + "number", + "null" + ] + }, + "assayName": { + "type": [ + "string", + "null" + ] + }, + "geneSymbol": { + "type": [ + "string", + "null" + ] + } + } + } + } + ] +} diff --git a/src/epacomp_tox/data/contracts/schemas/bioactivity/get_bioactivity_summary_by_dtxsid.response.schema.json b/src/epacomp_tox/data/contracts/schemas/bioactivity/get_bioactivity_summary_by_dtxsid.response.schema.json new file mode 100644 index 0000000..40d14c9 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/bioactivity/get_bioactivity_summary_by_dtxsid.response.schema.json @@ -0,0 +1,52 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "bioactivity.get_bioactivity_summary_by_dtxsid.response", + "description": "Bioactivity summary records for a chemical queried by DTXSID.", + "type": "array", + "items": { + "type": "object", + "description": "A bioactivity summary record associated with the requested chemical.", + "additionalProperties": true, + "properties": { + "dtxsid": { + "type": [ + "string", + "null" + ] + }, + "aeid": { + "type": [ + "string", + "number", + "null" + ] + }, + "assayName": { + "type": [ + "string", + "null" + ] + }, + "geneSymbol": { + "type": [ + "string", + "null" + ] + }, + "hitcall": { + "type": [ + "boolean", + "number", + "null" + ] + }, + "ac50": { + "type": [ + "number", + "string", + "null" + ] + } + } + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/bioactivity/search_bioactivity_terms.response.schema.json b/src/epacomp_tox/data/contracts/schemas/bioactivity/search_bioactivity_terms.response.schema.json new file mode 100644 index 0000000..7b5b373 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/bioactivity/search_bioactivity_terms.response.schema.json @@ -0,0 +1,39 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "bioactivity.search_bioactivity_terms.response", + "description": "Bioactivity search-term results returned for exact, prefix, or substring lookups.", + "type": "array", + "items": { + "anyOf": [ + { + "type": "string", + "description": "A matched bioactivity term." + }, + { + "type": "object", + "description": "A matched bioactivity term record when the upstream API returns structured results.", + "additionalProperties": true, + "properties": { + "term": { + "type": [ + "string", + "null" + ] + }, + "displayName": { + "type": [ + "string", + "null" + ] + }, + "category": { + "type": [ + "string", + "null" + ] + } + } + } + ] + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/chemical/ghs_links.response.schema.json b/src/epacomp_tox/data/contracts/schemas/chemical/ghs_links.response.schema.json new file mode 100644 index 0000000..b4091c8 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/chemical/ghs_links.response.schema.json @@ -0,0 +1,19 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "chemical.check_chemical_ghs_links.response", + "type": "object", + "required": ["source", "results"], + "properties": { + "source": { + "type": "string" + }, + "results": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + } + }, + "additionalProperties": false +} diff --git a/src/epacomp_tox/data/contracts/schemas/chemical/indigo_convert.response.schema.json b/src/epacomp_tox/data/contracts/schemas/chemical/indigo_convert.response.schema.json new file mode 100644 index 0000000..e01ec5a --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/chemical/indigo_convert.response.schema.json @@ -0,0 +1,13 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "chemical.indigo_convert.response", + "type": "object", + "required": ["outputFormat", "value"], + "properties": { + "outputFormat": { + "type": "string" + }, + "value": {} + }, + "additionalProperties": false +} diff --git a/src/epacomp_tox/data/contracts/schemas/chemical/opsin_convert.response.schema.json b/src/epacomp_tox/data/contracts/schemas/chemical/opsin_convert.response.schema.json new file mode 100644 index 0000000..11b0000 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/chemical/opsin_convert.response.schema.json @@ -0,0 +1,16 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "chemical.opsin_convert.response", + "type": "object", + "required": ["name", "outputFormat", "value"], + "properties": { + "name": { + "type": "string" + }, + "outputFormat": { + "type": "string" + }, + "value": {} + }, + "additionalProperties": false +} diff --git a/src/epacomp_tox/data/contracts/schemas/chemical/resolve_chemical_identifier.response.schema.json b/src/epacomp_tox/data/contracts/schemas/chemical/resolve_chemical_identifier.response.schema.json new file mode 100644 index 0000000..f8d1498 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/chemical/resolve_chemical_identifier.response.schema.json @@ -0,0 +1,72 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "chemical.resolve_chemical_identifier.response", + "description": "Deterministic identifier-resolution result for a single chemical input.", + "type": "object", + "additionalProperties": false, + "required": [ + "status", + "inputIdentifier", + "inputType", + "canonicalDtxsid", + "preferredName", + "casrn", + "searchModeUsed", + "candidateCount", + "candidates", + "warnings" + ], + "properties": { + "status": { + "type": "string", + "enum": ["resolved", "ambiguous", "not_found"] + }, + "inputIdentifier": { + "type": "string" + }, + "inputType": { + "type": "string" + }, + "canonicalDtxsid": { + "type": ["string", "null"] + }, + "preferredName": { + "type": ["string", "null"] + }, + "casrn": { + "type": ["string", "null"] + }, + "searchModeUsed": { + "type": ["string", "null"] + }, + "candidateCount": { + "type": "integer", + "minimum": 0 + }, + "candidates": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "dtxsid": {"type": ["string", "null"]}, + "dtxcid": {"type": ["string", "null"]}, + "casrn": {"type": ["string", "null"]}, + "preferredName": {"type": ["string", "null"]}, + "smiles": {"type": ["string", "null"]}, + "searchName": {"type": ["string", "null"]}, + "searchValue": {"type": ["string", "null"]}, + "rank": {"type": ["integer", "null"]}, + "synonyms": { + "type": "array", + "items": {"type": "string"} + } + } + } + }, + "warnings": { + "type": "array", + "items": {"type": "string"} + } + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/chemical/search_chemical.response.schema.json b/src/epacomp_tox/data/contracts/schemas/chemical/search_chemical.response.schema.json new file mode 100644 index 0000000..c98988d --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/chemical/search_chemical.response.schema.json @@ -0,0 +1,53 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "chemical.search_chemical.response", + "description": "A list of chemicals matching the search criteria.", + "type": "array", + "items": { + "type": "object", + "description": "A single chemical record from the search results.", + "properties": { + "dtxsid": { + "type": ["string", "null"], + "description": "DSSTox Substance Identifier - unique identifier for the chemical substance" + }, + "dtxcid": { + "type": ["string", "null"], + "description": "DSSTox Chemical Identifier - unique identifier for the chemical structure" + }, + "casrn": { + "type": ["string", "null"], + "description": "CAS Registry Number - Chemical Abstracts Service registry number" + }, + "preferredName": { + "type": ["string", "null"], + "description": "Preferred chemical name in the database" + }, + "smiles": { + "type": ["string", "null"], + "description": "SMILES (Simplified Molecular Input Line Entry System) representation of the chemical structure" + }, + "hasStructureImage": { + "type": "integer", + "description": "Flag indicating if a structure image is available (1 = yes, 0 = no)" + }, + "isMarkush": { + "type": "boolean", + "description": "Indicates if this is a Markush structure (generic chemical structure)" + }, + "searchName": { + "type": "string", + "description": "The field name that was searched" + }, + "searchValue": { + "type": "string", + "description": "The value that was searched for" + }, + "rank": { + "type": "integer", + "description": "Search result ranking score" + } + }, + "required": ["dtxsid", "preferredName", "rank"] + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/chemical/structure_file.response.schema.json b/src/epacomp_tox/data/contracts/schemas/chemical/structure_file.response.schema.json new file mode 100644 index 0000000..360ae55 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/chemical/structure_file.response.schema.json @@ -0,0 +1,40 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "chemical.structure_file.response", + "type": "object", + "required": [ + "encoding", + "data", + "identifier", + "identifierType", + "fileFormat" + ], + "properties": { + "encoding": { + "type": "string", + "enum": ["base64", "utf-8"] + }, + "data": { + "type": "string" + }, + "contentType": { + "type": "string" + }, + "identifier": { + "type": "string" + }, + "identifierType": { + "type": "string" + }, + "fileFormat": { + "type": "string" + }, + "imageFormat": { + "type": "string" + }, + "length": { + "type": ["integer", "null"] + } + }, + "additionalProperties": false +} diff --git a/src/epacomp_tox/data/contracts/schemas/cheminformatics/toxprints.response.schema.json b/src/epacomp_tox/data/contracts/schemas/cheminformatics/toxprints.response.schema.json new file mode 100644 index 0000000..8f60ec1 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/cheminformatics/toxprints.response.schema.json @@ -0,0 +1,7 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "cheminformatics.toxprints.response", + "type": "object", + "description": "Generic mapping returned by ToxPrint searches; keys and shapes depend on upstream service.", + "additionalProperties": true +} diff --git a/src/epacomp_tox/data/contracts/schemas/common/list_generic.response.schema.json b/src/epacomp_tox/data/contracts/schemas/common/list_generic.response.schema.json new file mode 100644 index 0000000..2a351d9 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/common/list_generic.response.schema.json @@ -0,0 +1,7 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "common.list_generic.response", + "description": "Generic array schema used for MCP tools that return lists of records or scalar values.", + "type": "array", + "items": {} +} diff --git a/src/epacomp_tox/data/contracts/schemas/common/mapping_list_generic.response.schema.json b/src/epacomp_tox/data/contracts/schemas/common/mapping_list_generic.response.schema.json new file mode 100644 index 0000000..930f5f8 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/common/mapping_list_generic.response.schema.json @@ -0,0 +1,10 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "common.mapping_list_generic.response", + "description": "Schema for responses keyed by identifier with array payloads.", + "type": "object", + "additionalProperties": { + "type": "array", + "items": {} + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/common/object.response.schema.json b/src/epacomp_tox/data/contracts/schemas/common/object.response.schema.json new file mode 100644 index 0000000..f40726e --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/common/object.response.schema.json @@ -0,0 +1,7 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "common.object.response", + "description": "Generic object schema for MCP tools returning structured mappings.", + "type": "object", + "additionalProperties": true +} diff --git a/src/epacomp_tox/data/contracts/schemas/common/object_or_list.response.schema.json b/src/epacomp_tox/data/contracts/schemas/common/object_or_list.response.schema.json new file mode 100644 index 0000000..312aa61 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/common/object_or_list.response.schema.json @@ -0,0 +1,15 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "common.object_or_list.response", + "description": "Schema allowing tools to return either an object or an array of records.", + "anyOf": [ + { + "type": "object", + "additionalProperties": true + }, + { + "type": "array", + "items": {} + } + ] +} diff --git a/src/epacomp_tox/data/contracts/schemas/exposure/get_exposure_httk.response.schema.json b/src/epacomp_tox/data/contracts/schemas/exposure/get_exposure_httk.response.schema.json new file mode 100644 index 0000000..4283a9d --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/exposure/get_exposure_httk.response.schema.json @@ -0,0 +1,63 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "exposure.get_exposure_httk.response", + "description": "Detailed HTTK records for a single chemical retrieved from the dedicated exposure HTTK endpoint.", + "type": "array", + "items": { + "type": "object", + "description": "A detailed HTTK record.", + "additionalProperties": true, + "properties": { + "dtxsid": { + "type": [ + "string", + "null" + ] + }, + "casrn": { + "type": [ + "string", + "null" + ] + }, + "preferredName": { + "type": [ + "string", + "null" + ] + }, + "species": { + "type": [ + "string", + "null" + ] + }, + "parameter": { + "type": [ + "string", + "null" + ] + }, + "value": { + "type": [ + "number", + "string", + "null" + ] + }, + "unit": { + "type": [ + "string", + "null" + ] + }, + "model": { + "type": [ + "string", + "null" + ], + "description": "HTTK model or parameter family when present." + } + } + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/exposure/search_cpdat.response.schema.json b/src/epacomp_tox/data/contracts/schemas/exposure/search_cpdat.response.schema.json new file mode 100644 index 0000000..df348ec --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/exposure/search_cpdat.response.schema.json @@ -0,0 +1,53 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "exposure.search_cpdat.response", + "description": "CPDat records returned for functional use, product use category, or list-presence searches.", + "type": "array", + "items": { + "type": "object", + "description": "A CPDat exposure record.", + "additionalProperties": true, + "properties": { + "dtxsid": { + "type": [ + "string", + "null" + ], + "description": "DSSTox substance identifier when present." + }, + "casrn": { + "type": [ + "string", + "null" + ] + }, + "preferredName": { + "type": [ + "string", + "null" + ] + }, + "functionalUse": { + "type": [ + "string", + "null" + ], + "description": "Functional use term when returned by the chosen vocabulary." + }, + "productUseCategory": { + "type": [ + "string", + "null" + ], + "description": "Product use category when returned by the chosen vocabulary." + }, + "listPresence": { + "type": [ + "string", + "null" + ], + "description": "List-presence tag or keyword when returned by the chosen vocabulary." + } + } + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/exposure/search_httk.response.schema.json b/src/epacomp_tox/data/contracts/schemas/exposure/search_httk.response.schema.json new file mode 100644 index 0000000..993284b --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/exposure/search_httk.response.schema.json @@ -0,0 +1,57 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "exposure.search_httk.response", + "description": "HTTK records returned by the CompTox exposure search surface.", + "type": "array", + "items": { + "type": "object", + "description": "A high-throughput toxicokinetic record.", + "additionalProperties": true, + "properties": { + "dtxsid": { + "type": [ + "string", + "null" + ] + }, + "casrn": { + "type": [ + "string", + "null" + ] + }, + "preferredName": { + "type": [ + "string", + "null" + ] + }, + "species": { + "type": [ + "string", + "null" + ] + }, + "parameter": { + "type": [ + "string", + "null" + ], + "description": "HTTK parameter name when the payload is parameterized." + }, + "value": { + "type": [ + "number", + "string", + "null" + ] + }, + "unit": { + "type": [ + "string", + "null" + ] + } + } + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/hazard/batch_search_hazard.response.schema.json b/src/epacomp_tox/data/contracts/schemas/hazard/batch_search_hazard.response.schema.json new file mode 100644 index 0000000..70ac005 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/hazard/batch_search_hazard.response.schema.json @@ -0,0 +1,59 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "hazard.batch_search_hazard.response", + "description": "Mapping from DTXSID to hazard dataset records for a batch hazard lookup.", + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "type": "object", + "description": "A hazard record returned for a specific DTXSID in the batch request.", + "additionalProperties": true, + "properties": { + "dtxsid": { + "type": [ + "string", + "null" + ] + }, + "casrn": { + "type": [ + "string", + "null" + ] + }, + "preferredName": { + "type": [ + "string", + "null" + ] + }, + "source": { + "type": [ + "string", + "null" + ] + }, + "effect": { + "type": [ + "string", + "null" + ] + }, + "value": { + "type": [ + "number", + "string", + "null" + ] + }, + "unit": { + "type": [ + "string", + "null" + ] + } + } + } + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/hazard/search_hazard.response.schema.json b/src/epacomp_tox/data/contracts/schemas/hazard/search_hazard.response.schema.json new file mode 100644 index 0000000..6cd8b57 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/hazard/search_hazard.response.schema.json @@ -0,0 +1,63 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "hazard.search_hazard.response", + "description": "Hazard dataset records returned for a single chemical lookup across ToxValDB, ToxRefDB, cancer, genetox, ADME/IVIVE, IRIS, PPRTV, or HAWC selectors.", + "type": "array", + "items": { + "type": "object", + "description": "A hazard record from the selected CompTox hazard dataset.", + "additionalProperties": true, + "properties": { + "dtxsid": { + "type": [ + "string", + "null" + ], + "description": "DSSTox substance identifier when present in the upstream payload." + }, + "casrn": { + "type": [ + "string", + "null" + ], + "description": "CAS Registry Number when present." + }, + "preferredName": { + "type": [ + "string", + "null" + ], + "description": "Preferred chemical name when present." + }, + "source": { + "type": [ + "string", + "null" + ], + "description": "Source dataset, agency, or citation label." + }, + "effect": { + "type": [ + "string", + "null" + ], + "description": "Reported effect or endpoint label when applicable." + }, + "value": { + "type": [ + "number", + "string", + "null" + ], + "description": "Reported hazard value when applicable." + }, + "unit": { + "type": [ + "string", + "null" + ], + "description": "Unit associated with the reported value." + } + } + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/manifest/get_contract_manifest.response.schema.json b/src/epacomp_tox/data/contracts/schemas/manifest/get_contract_manifest.response.schema.json new file mode 100644 index 0000000..2ce17b4 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/manifest/get_contract_manifest.response.schema.json @@ -0,0 +1,153 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://toxmcp.org/schemas/contracts/manifest/get_contract_manifest.response.schema.json", + "title": "get_contract_manifest response", + "type": "object", + "properties": { + "server": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "title": { "type": "string" }, + "version": { "type": "string" }, + "resourceCount": { "type": "integer", "minimum": 0 }, + "toolCount": { "type": "integer", "minimum": 0 }, + "transportEndpoints": { + "type": "array", + "items": { "type": "string" } + } + }, + "required": ["name", "title", "version", "resourceCount", "toolCount", "transportEndpoints"], + "additionalProperties": true + }, + "publicBoundary": { + "type": "object", + "properties": { + "primaryRole": { "type": "string" }, + "screeningRole": { "type": "string" }, + "experimentalModules": { + "type": "array", + "items": { "type": "string" } + }, + "notOwnedByCompToxMcp": { + "type": "array", + "items": { "type": "string" } + } + }, + "required": ["primaryRole", "screeningRole", "experimentalModules", "notOwnedByCompToxMcp"], + "additionalProperties": true + }, + "resources": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "description": { "type": "string" }, + "url": { "type": "string" }, + "toolNames": { + "type": "array", + "items": { "type": "string" } + } + }, + "required": ["name", "description", "url", "toolNames"], + "additionalProperties": true + } + }, + "tools": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "resource": { "type": "string" }, + "hasOutputSchema": { "type": "boolean" }, + "responseSchemaRef": { + "type": ["object", "null"], + "properties": { + "namespace": { "type": "string" }, + "name": { "type": "string" } + }, + "required": ["namespace", "name"], + "additionalProperties": false + } + }, + "required": ["name", "resource", "hasOutputSchema"], + "additionalProperties": true + } + }, + "portableObjectSchemas": { + "type": "array", + "items": { + "type": "object", + "properties": { + "file": { "type": "string" }, + "title": { "type": "string" }, + "schemaId": { "type": "string" }, + "exampleFile": { "type": "string" } + }, + "required": ["file"], + "additionalProperties": true + } + }, + "responseSchemas": { + "type": "array", + "items": { + "type": "object", + "properties": { + "namespace": { "type": "string" }, + "file": { "type": "string" }, + "path": { "type": "string" } + }, + "required": ["namespace", "file", "path"], + "additionalProperties": false + } + }, + "publicContractReferences": { + "type": "object", + "properties": { + "interop": { + "type": "array", + "items": { "$ref": "#/$defs/contractReference" } + }, + "screeningPrioritization": { + "type": "array", + "items": { "$ref": "#/$defs/contractReference" } + } + }, + "required": ["interop", "screeningPrioritization"], + "additionalProperties": false + } + }, + "required": [ + "server", + "publicBoundary", + "resources", + "tools", + "portableObjectSchemas", + "responseSchemas", + "publicContractReferences" + ], + "additionalProperties": true, + "$defs": { + "contractReference": { + "type": "object", + "properties": { + "toolName": { "type": "string" }, + "responseSchemaRef": { + "type": "object", + "properties": { + "namespace": { "type": "string" }, + "name": { "type": "string" } + }, + "required": ["namespace", "name"], + "additionalProperties": false + }, + "portableSchema": { "type": "string" }, + "exampleFile": { "type": "string" } + }, + "required": ["toolName", "responseSchemaRef"], + "additionalProperties": true + } + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/metadata/applicability_detail.response.schema.json b/src/epacomp_tox/data/contracts/schemas/metadata/applicability_detail.response.schema.json new file mode 100644 index 0000000..5195e64 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/metadata/applicability_detail.response.schema.json @@ -0,0 +1,41 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "metadata.get_applicability_domain.response", + "type": "object", + "required": ["model", "version", "criteria", "policy"], + "properties": { + "model": {"type": "string"}, + "version": {"type": "string"}, + "criteria": { + "type": "array", + "items": {"type": "object", "additionalProperties": true} + }, + "policy": {"type": "string"}, + "errorCode": {"type": ["string", "null"]}, + "documentedCriteria": { + "type": "array", + "items": {"type": "object", "additionalProperties": true} + }, + "delegatedCriteria": { + "type": "array", + "items": {"type": "object", "additionalProperties": true} + }, + "locallyEnforcedCriteria": { + "type": "array", + "items": {"type": "string"} + }, + "enforcementLocation": { + "type": "string", + "enum": ["delegated-service", "local-engine"] + }, + "guardrailStatus": { + "type": "object", + "additionalProperties": true + }, + "references": { + "type": "array", + "items": {"type": "object", "additionalProperties": true} + } + }, + "additionalProperties": true +} diff --git a/src/epacomp_tox/data/contracts/schemas/metadata/applicability_list.response.schema.json b/src/epacomp_tox/data/contracts/schemas/metadata/applicability_list.response.schema.json new file mode 100644 index 0000000..2934c9f --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/metadata/applicability_list.response.schema.json @@ -0,0 +1,14 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "metadata.list_applicability_domain.response", + "type": "object", + "required": ["applicabilityDomains", "nextCursor"], + "properties": { + "applicabilityDomains": { + "type": "array", + "items": {"type": "object", "additionalProperties": true} + }, + "nextCursor": {"type": ["string", "null"]} + }, + "additionalProperties": false +} diff --git a/src/epacomp_tox/data/contracts/schemas/metadata/model_cards.response.schema.json b/src/epacomp_tox/data/contracts/schemas/metadata/model_cards.response.schema.json new file mode 100644 index 0000000..e46dfe7 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/metadata/model_cards.response.schema.json @@ -0,0 +1,43 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "metadata.get_model_card.response", + "type": "object", + "required": ["modelCards", "nextCursor"], + "properties": { + "modelCards": { + "type": "array", + "items": { + "type": "object", + "required": ["card", "checksum", "lastModified"], + "properties": { + "card": {"type": "object"}, + "checksum": {"type": "string"}, + "lastModified": {"type": "string"}, + "documentedCriteria": { + "type": "array", + "items": {"type": "object", "additionalProperties": true} + }, + "delegatedCriteria": { + "type": "array", + "items": {"type": "object", "additionalProperties": true} + }, + "locallyEnforcedCriteria": { + "type": "array", + "items": {"type": "string"} + }, + "enforcementLocation": { + "type": "string", + "enum": ["delegated-service", "local-engine"] + }, + "guardrailStatus": { + "type": "object", + "additionalProperties": true + } + }, + "additionalProperties": true + } + }, + "nextCursor": {"type": ["string", "null"]} + }, + "additionalProperties": false +} diff --git a/src/epacomp_tox/data/contracts/schemas/predictive/ad_check.response.schema.json b/src/epacomp_tox/data/contracts/schemas/predictive/ad_check.response.schema.json new file mode 100644 index 0000000..618a875 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/predictive/ad_check.response.schema.json @@ -0,0 +1,12 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "predictive.check_applicability_domain.response", + "type": "object", + "required": ["in_domain", "confidence", "details"], + "properties": { + "in_domain": {"type": "boolean"}, + "confidence": {"type": "number"}, + "details": {"type": "object", "additionalProperties": true} + }, + "additionalProperties": true +} diff --git a/src/epacomp_tox/data/contracts/schemas/predictive/predict.response.schema.json b/src/epacomp_tox/data/contracts/schemas/predictive/predict.response.schema.json new file mode 100644 index 0000000..c4df305 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/predictive/predict.response.schema.json @@ -0,0 +1,32 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "predictive.predict.response", + "type": "object", + "required": ["prediction", "applicability_domain", "metadata"], + "properties": { + "prediction": { + "type": "object", + "additionalProperties": true + }, + "applicability_domain": { + "$ref": "#/$defs/adCheck" + }, + "metadata": { + "type": "object", + "additionalProperties": true + } + }, + "$defs": { + "adCheck": { + "type": "object", + "required": ["in_domain", "confidence", "details"], + "properties": { + "in_domain": {"type": "boolean"}, + "confidence": {"type": "number"}, + "details": {"type": "object", "additionalProperties": true} + }, + "additionalProperties": true + } + }, + "additionalProperties": false +} diff --git a/src/epacomp_tox/data/contracts/schemas/risk/prioritize_risk_signals.response.schema.json b/src/epacomp_tox/data/contracts/schemas/risk/prioritize_risk_signals.response.schema.json new file mode 100644 index 0000000..2221254 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/risk/prioritize_risk_signals.response.schema.json @@ -0,0 +1,135 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://toxmcp.org/schemas/contracts/risk/prioritize_risk_signals.response.schema.json", + "title": "prioritize_risk_signals response", + "type": "object", + "properties": { + "chemicalRef": { + "type": "object", + "properties": { + "dtxsid": { "type": "string" }, + "preferredName": { "type": "string" }, + "casrn": { "type": "string" } + }, + "required": ["dtxsid", "preferredName"], + "additionalProperties": true + }, + "identityResolution": { + "type": "object", + "additionalProperties": true + }, + "hazardSignal": { + "type": "object", + "properties": { + "recordCount": { "type": "integer", "minimum": 0 }, + "sourceTool": { "type": "string" }, + "selectedMetric": { + "type": ["object", "null"], + "additionalProperties": true + } + }, + "required": ["recordCount", "sourceTool"], + "additionalProperties": true + }, + "exposureSignal": { + "type": "object", + "properties": { + "seem": { "$ref": "#/$defs/evidenceSlice" }, + "httk": { "$ref": "#/$defs/evidenceSlice" }, + "mmdb": { "$ref": "#/$defs/evidenceSlice" }, + "cpdat": { "$ref": "#/$defs/evidenceSlice" } + }, + "required": ["seem", "httk", "mmdb", "cpdat"], + "additionalProperties": false + }, + "prioritization": { + "type": "object", + "properties": { + "priorityBand": { + "type": "string", + "enum": ["higher", "moderate", "lower", "inconclusive"] + }, + "marginOfExposure": { "type": ["number", "null"] }, + "hazardPointOfDeparture": { "type": ["number", "null"] }, + "hazardUnit": { "type": ["string", "null"] }, + "exposureEstimate": { "type": ["number", "null"] }, + "exposureUnit": { "type": ["string", "null"] }, + "signalDirection": { "type": "string" }, + "priorityHeuristic": { + "type": "object", + "additionalProperties": { "type": "string" } + }, + "basis": { "type": "string" }, + "supportingSignals": { + "type": "array", + "items": { "type": "string" } + }, + "caveats": { + "type": "array", + "items": { "type": "string" } + } + }, + "required": [ + "priorityBand", + "marginOfExposure", + "signalDirection", + "basis", + "caveats" + ], + "additionalProperties": true + }, + "knownDataGaps": { + "type": "array", + "items": { "type": "string" } + }, + "limitations": { + "type": "array", + "items": { "type": "string" } + }, + "generatedFromTools": { + "type": "array", + "items": { "type": "string" } + }, + "provenanceSummary": { + "type": "object", + "properties": { + "generatedBy": { "type": "string" }, + "generatedAt": { "type": "string" }, + "sourceCount": { "type": "integer", "minimum": 0 }, + "sourceTools": { + "type": "array", + "items": { "type": "string" } + }, + "identityMode": { "type": "string" } + }, + "required": ["generatedBy", "generatedAt", "sourceCount", "sourceTools", "identityMode"], + "additionalProperties": true + } + }, + "required": [ + "chemicalRef", + "hazardSignal", + "exposureSignal", + "prioritization", + "knownDataGaps", + "limitations", + "generatedFromTools", + "provenanceSummary" + ], + "additionalProperties": true, + "$defs": { + "evidenceSlice": { + "type": "object", + "properties": { + "recordCount": { "type": "integer", "minimum": 0 }, + "sourceTool": { "type": "string" }, + "selectedMetrics": { + "type": ["object", "null"], + "additionalProperties": true + } + }, + "required": ["recordCount", "sourceTool"], + "additionalProperties": true + } + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/workflow/aop_linkage_summary.response.schema.json b/src/epacomp_tox/data/contracts/schemas/workflow/aop_linkage_summary.response.schema.json new file mode 100644 index 0000000..24797d8 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/workflow/aop_linkage_summary.response.schema.json @@ -0,0 +1,125 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "workflow.aop_linkage_summary.response", + "description": "CompTox-side AOP linkage summary prepared for downstream mechanistic consumers.", + "type": "object", + "additionalProperties": false, + "required": [ + "chemicalRef", + "lookupMode", + "mappings", + "supportingAssays", + "confidence", + "provenance" + ], + "properties": { + "chemicalRef": { + "type": "object", + "additionalProperties": true, + "required": [ + "dtxsid", + "preferredName" + ], + "properties": { + "dtxsid": { + "type": "string" + }, + "preferredName": { + "type": "string" + }, + "casrn": { + "type": [ + "string", + "null" + ] + } + } + }, + "lookupMode": { + "type": "string" + }, + "mappings": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + "supportingAssays": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + "confidence": { + "type": "object", + "additionalProperties": true, + "required": [ + "score" + ], + "properties": { + "score": { + "type": "number" + } + } + }, + "identityResolution": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "data": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "knownDataGaps": { + "type": "array", + "items": {"type": "string"} + }, + "limitations": { + "type": "array", + "items": {"type": "string"} + }, + "generatedFromTools": { + "type": "array", + "items": {"type": "string"} + }, + "provenanceSummary": { + "type": "object", + "additionalProperties": true + }, + "metadata": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "provenance": { + "type": "object", + "additionalProperties": true, + "required": [ + "sourceMcp", + "generatedAt", + "sources" + ], + "properties": { + "sourceMcp": { + "type": "string" + }, + "generatedAt": { + "type": "string" + }, + "sources": { + "type": "array" + } + } + } + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/workflow/comptox_evidence_pack.response.schema.json b/src/epacomp_tox/data/contracts/schemas/workflow/comptox_evidence_pack.response.schema.json new file mode 100644 index 0000000..7aaa3ed --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/workflow/comptox_evidence_pack.response.schema.json @@ -0,0 +1,127 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "workflow.comptox_evidence_pack.response", + "description": "Portable CompTox evidence pack assembled from identity, hazard, exposure, bioactivity, AOP, and PBPK-context slices.", + "type": "object", + "additionalProperties": false, + "required": [ + "chemicalIdentity", + "metadata", + "audit", + "semanticCoverage" + ], + "properties": { + "chemicalIdentity": { + "type": "object", + "additionalProperties": true, + "required": [ + "dtxsid", + "preferredName", + "provenance" + ] + }, + "hazardEvidenceSummary": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "exposureEvidenceSummary": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "bioactivityEvidenceSummary": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "aopLinkageSummary": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "pbpkContextBundle": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "identityResolution": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "data": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "knownDataGaps": { + "type": "array", + "items": {"type": "string"} + }, + "limitations": { + "type": "array", + "items": {"type": "string"} + }, + "generatedFromTools": { + "type": "array", + "items": {"type": "string"} + }, + "provenanceSummary": { + "type": "object", + "additionalProperties": true + }, + "metadata": { + "type": "object", + "additionalProperties": true, + "required": [ + "packId", + "sourceMcp", + "createdAt", + "suiteRole" + ] + }, + "mcpMetadata": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "audit": { + "type": "object", + "additionalProperties": true, + "required": [ + "generatedAt", + "generatedBy", + "sourceTools" + ] + }, + "semanticCoverage": { + "type": "object", + "additionalProperties": true, + "required": [ + "identity", + "hazard", + "exposure", + "bioactivity", + "aopLinkage", + "pbpkContext" + ] + } + } +} diff --git a/src/epacomp_tox/data/contracts/schemas/workflow/pbpk_context_bundle.response.schema.json b/src/epacomp_tox/data/contracts/schemas/workflow/pbpk_context_bundle.response.schema.json new file mode 100644 index 0000000..3e748d3 --- /dev/null +++ b/src/epacomp_tox/data/contracts/schemas/workflow/pbpk_context_bundle.response.schema.json @@ -0,0 +1,126 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "workflow.pbpk_context_bundle.response", + "description": "CompTox-side PBPK context package prepared for downstream PBPK workflows.", + "type": "object", + "additionalProperties": false, + "required": [ + "chemicalIdentityRef", + "httkSlice", + "hazardAdmeIviveSlice", + "exposureHints", + "modelCardRefs", + "provenance", + "handoffTarget" + ], + "properties": { + "chemicalIdentityRef": { + "type": "object", + "additionalProperties": true, + "required": [ + "dtxsid", + "preferredName", + "provenance" + ], + "properties": { + "dtxsid": { + "type": "string" + }, + "preferredName": { + "type": "string" + }, + "provenance": { + "type": "object" + } + } + }, + "httkSlice": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "hazardAdmeIviveSlice": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "exposureHints": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + "modelCardRefs": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + "identityResolution": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "data": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "knownDataGaps": { + "type": "array", + "items": {"type": "string"} + }, + "limitations": { + "type": "array", + "items": {"type": "string"} + }, + "generatedFromTools": { + "type": "array", + "items": {"type": "string"} + }, + "provenanceSummary": { + "type": "object", + "additionalProperties": true + }, + "metadata": { + "type": [ + "object", + "null" + ], + "additionalProperties": true + }, + "provenance": { + "type": "object", + "additionalProperties": true, + "required": [ + "sourceMcp", + "generatedAt", + "sources" + ], + "properties": { + "sourceMcp": { + "type": "string" + }, + "generatedAt": { + "type": "string" + }, + "sources": { + "type": "array" + } + } + }, + "handoffTarget": { + "type": "string" + } + } +} diff --git a/src/epacomp_tox/data/metadata/applicability_domains/README.md b/src/epacomp_tox/data/metadata/applicability_domains/README.md new file mode 100644 index 0000000..9a9eff9 --- /dev/null +++ b/src/epacomp_tox/data/metadata/applicability_domains/README.md @@ -0,0 +1,8 @@ +# Applicability Domain Reference Data + +Machine-readable definitions for applicability domain (AD) guardrails. Each JSON file aligns with a predictive model card and provides detailed parameters used by `PredictiveServiceBase` implementations during AD enforcement. + +Conventions: +- One JSON per model (e.g., `test_consensus_ad.json`, `opera_property_ad.json`, `genra_read_across_ad.json`). +- Each file contains descriptors, thresholds, similarity parameters, and references. +- Files should be versioned alongside model cards and validated in CI (Task 2.5). diff --git a/src/epacomp_tox/data/metadata/applicability_domains/genra_read_across_ad.json b/src/epacomp_tox/data/metadata/applicability_domains/genra_read_across_ad.json new file mode 100644 index 0000000..1d8be0b --- /dev/null +++ b/src/epacomp_tox/data/metadata/applicability_domains/genra_read_across_ad.json @@ -0,0 +1,29 @@ +{ + "model": "GenRA Read-Across Workflow", + "version": "2.1.0", + "criteria": [ + { + "type": "similarity", + "metric": "tanimoto", + "threshold": 0.7, + "minAnalogues": 3 + }, + { + "type": "coverage", + "requirements": ["in vivo", "in vitro"], + "minimumDomains": 2 + }, + { + "type": "expert_rule", + "rule": "Mode of action tags must align", + "allowableMismatch": 1 + } + ], + "policy": "block", + "errorCode": "GENRA_AD_FAIL", + "references": [ + { + "citation": "GenRA Technical Manual 2025" + } + ] +} diff --git a/src/epacomp_tox/data/metadata/applicability_domains/opera_property_ad.json b/src/epacomp_tox/data/metadata/applicability_domains/opera_property_ad.json new file mode 100644 index 0000000..cd5ad98 --- /dev/null +++ b/src/epacomp_tox/data/metadata/applicability_domains/opera_property_ad.json @@ -0,0 +1,24 @@ +{ + "model": "OPERA Property Predictions", + "version": "3.6.1", + "criteria": [ + { + "type": "descriptor_range", + "descriptors": ["atomCount", "bondCount", "polarSurfaceArea"], + "range": {"mode": "min_max"} + }, + { + "type": "similarity", + "metric": "tanimoto", + "threshold": 0.6, + "neighbors": 5 + } + ], + "policy": "warn", + "errorCode": "OPERA_AD_WARN", + "references": [ + { + "citation": "OPERA Manual 2025" + } + ] +} diff --git a/src/epacomp_tox/data/metadata/applicability_domains/test_consensus_ad.json b/src/epacomp_tox/data/metadata/applicability_domains/test_consensus_ad.json new file mode 100644 index 0000000..21b9efe --- /dev/null +++ b/src/epacomp_tox/data/metadata/applicability_domains/test_consensus_ad.json @@ -0,0 +1,25 @@ +{ + "model": "TEST Consensus Acute Toxicity", + "version": "5.2.0", + "criteria": [ + { + "type": "descriptor_range", + "descriptors": ["logS", "logP", "LUMO", "polarSurfaceArea"], + "range": {"lowerPercentile": 0.05, "upperPercentile": 0.95} + }, + { + "type": "similarity", + "metric": "tanimoto", + "threshold": 0.65, + "fingerprint": "pubchem" + } + ], + "policy": "block", + "errorCode": "TEST_AD_FAIL", + "references": [ + { + "citation": "Mansouri et al. 2018", + "doi": "10.1021/acs.jcim.7b00524" + } + ] +} diff --git a/src/epacomp_tox/data/metadata/model_cards/genra_read_across.json b/src/epacomp_tox/data/metadata/model_cards/genra_read_across.json new file mode 100644 index 0000000..8a88c9a --- /dev/null +++ b/src/epacomp_tox/data/metadata/model_cards/genra_read_across.json @@ -0,0 +1,209 @@ +{ + "schemaVersion": "1.0", + "modelDetails": { + "name": "GenRA Read-Across Workflow", + "version": "2.1.0", + "modelType": "Read-Across", + "description": "Generalized read-across workflow combining analogue search, evidence weighting, and prediction synthesis.", + "developers": [ + { + "name": "EPA Computational Toxicology" + } + ], + "organizations": [ + "US EPA" + ], + "releaseDate": "2025-03-05", + "license": "EPA Terms of Use" + }, + "intendedUse": { + "summary": "Supports regulatory read-across decisions for data gap filling and hazard assessment.", + "inScope": [ + "Organic chemicals with available ToxCast/ToxVal analogues" + ], + "outOfScope": [ + "Chemicals lacking sufficient analogue coverage", + "Mixtures" + ], + "limitations": [ + "Requires SME review when analogue similarity < 0.7." + ], + "warnings": [ + "Document evidence narrative before external submission." + ], + "regulatoryPrograms": [ + "TSCA New Chemicals", + "OECD Cooperative Chemicals Assessment" + ] + }, + "oecdValidationPrinciples": { + "definedEndpoint": { + "description": "Endpoints inherited from analogue dataset (e.g., repeat-dose toxicity LOAEL)", + "unit": "varies by endpoint" + }, + "unambiguousAlgorithm": { + "summary": "Analogue search using structural fingerprints, evidence scoring across data streams, Bayesian-weighted prediction aggregation.", + "methodClass": "Read-Across", + "implementation": "GenRA Service 2.1", + "references": [ + { + "citation": "Patlewicz et al. 2015", + "doi": "10.1093/toxsci/kfv169" + } + ] + }, + "definedApplicabilityDomain": { + "summary": "Assess analogue availability, structural similarity, and metadata completeness before generating predictions.", + "relatedTools": [ + "genra.check_applicability_domain" + ], + "references": [ + { + "citation": "GenRA Technical Manual 2025" + } + ] + }, + "goodnessOfFitMetrics": { + "internalValidation": [ + { + "name": "Coverage", + "value": 0.78, + "dataset": "historical read-across cases" + } + ], + "externalValidation": [ + { + "name": "Accuracy", + "value": 0.72, + "dataset": "case studies" + }, + { + "name": "Precision", + "value": 0.69, + "dataset": "case studies" + } + ] + }, + "mechanisticInterpretation": { + "summary": "Evidence weighting prioritizes analogues sharing mode-of-action descriptors and toxicity pathways.", + "confidence": "moderate" + } + }, + "trainingData": { + "dataset": { + "name": "GenRA Analogue Library 2025", + "source": "EPA CompTox", + "description": "Curated analogue relationships with experimental endpoints" + }, + "records": 1200, + "chemicalCount": 850, + "descriptorCount": 60, + "preprocessing": "Harmonized identifiers, removal of conflicting analogue evidence, assignment of mode-of-action tags." + }, + "evaluationData": { + "datasets": [ + { + "name": "GenRA Case Studies", + "source": "EPA Internal", + "description": "Historical regulatory read-across decisions" + } + ], + "validationApproach": "Leave-one-target-out analogue removal", + "metrics": [ + { + "name": "Balanced Accuracy", + "value": 0.71, + "dataset": "case studies" + }, + { + "name": "Coverage", + "value": 0.76, + "dataset": "case studies" + } + ], + "applicabilityDomainCoverage": 0.82 + }, + "applicabilityDomain": { + "summary": "Composite checks for analogue similarity, data completeness, and evidence diversity.", + "criteria": [ + { + "type": "similarity", + "description": "At least three structural analogues with Tanimoto similarity >= 0.7.", + "parameters": { + "threshold": 0.7, + "minAnalogues": 3 + } + }, + { + "type": "coverage", + "description": "Analogues must span at least two evidence domains (in vivo, in vitro, in silico).", + "parameters": { + "minDomains": 2 + } + }, + { + "type": "expert_rule", + "description": "Mode-of-action tags must align across selected analogues.", + "parameters": { + "allowableMismatch": 1 + } + } + ], + "enforcement": { + "mcpTools": [ + "genra.check_applicability_domain" + ], + "policy": "block", + "errorCodes": [ + "GENRA_AD_FAIL" + ] + }, + "confidenceBands": [ + { + "label": "Robust", + "minConfidence": 0.8, + "actions": [ + "Eligible for automated dossier generation" + ] + }, + { + "label": "Limited", + "minConfidence": 0.5, + "actions": [ + "Requires SME justification and documentation" + ] + } + ] + }, + "ethicalConsiderations": { + "risks": [ + "Analogues may introduce hidden biases when evidence base is uneven." + ], + "mitigations": [ + "Require documentation of analogue selection rationale and SME oversight." + ], + "humanOversight": "SME approval mandated for final predictions and evidence narratives." + }, + "provenance": { + "sourceRepositories": [ + "https://github.com/epa/genra" + ], + "build": { + "id": "genra-build-2025-03-05", + "timestamp": "2025-03-05T09:15:00Z", + "environment": "EPA CICD" + }, + "checksum": { + "algorithm": "SHA256", + "value": "3ce4ec4983d3e7c6b2089b967679f5fc293096750293eb98d2b211f780a1f95e" + }, + "reviewStatus": { + "approvedBy": [ + { + "name": "Regulatory Affairs Read-Across Committee" + } + ], + "approvalDate": "2025-03-10" + } + } +} \ No newline at end of file diff --git a/src/epacomp_tox/data/metadata/model_cards/opera_property.json b/src/epacomp_tox/data/metadata/model_cards/opera_property.json new file mode 100644 index 0000000..edc6439 --- /dev/null +++ b/src/epacomp_tox/data/metadata/model_cards/opera_property.json @@ -0,0 +1,215 @@ +{ + "schemaVersion": "1.0", + "modelDetails": { + "name": "OPERA Property Predictions", + "version": "3.6.1", + "modelType": "QSAR", + "description": "Predicts physicochemical properties (LogP, water solubility, vapor pressure) using OPERA ensemble models.", + "developers": [ + { + "name": "NIEHS NICEATM" + }, + { + "name": "EPA Computational Toxicology" + } + ], + "organizations": [ + "US EPA", + "NIEHS" + ], + "releaseDate": "2025-02-20", + "license": "OPERA EULA" + }, + "intendedUse": { + "summary": "Supports exposure assessment workflows requiring physicochemical property estimates for organic chemicals.", + "inScope": [ + "Neutral organic chemicals", + "Screening-level exposure modelling" + ], + "outOfScope": [ + "Inorganic substances", + "Highly ionized species" + ], + "limitations": [ + "Predictions outside training descriptor ranges may be unreliable." + ], + "warnings": [ + "Verify units when integrating with downstream PBPK models." + ], + "regulatoryPrograms": [ + "TSCA Existing Chemicals", + "REACH dossier support" + ] + }, + "oecdValidationPrinciples": { + "definedEndpoint": { + "description": "LogP, water solubility (log mol/L), vapor pressure (log Pa)", + "unit": "log scale" + }, + "unambiguousAlgorithm": { + "summary": "Random forest and support vector regression ensembles with descriptor selection.", + "methodClass": "Ensemble", + "implementation": "OPERA CLI 3.6", + "references": [ + { + "citation": "Mansouri et al. 2018", + "doi": "10.1021/acs.jcim.7b00524" + } + ] + }, + "definedApplicabilityDomain": { + "summary": "Combines leverage statistics with similarity to nearest neighbors in descriptor space.", + "relatedTools": [ + "opera.check_applicability_domain" + ], + "references": [ + { + "citation": "OPERA Technical Documentation 2024" + } + ] + }, + "goodnessOfFitMetrics": { + "internalValidation": [ + { + "name": "R2", + "value": 0.92, + "dataset": "training", + "description": "LogP" + } + ], + "externalValidation": [ + { + "name": "RMSE", + "value": 0.31, + "dataset": "external", + "description": "LogP", + "units": "log" + }, + { + "name": "RMSE", + "value": 0.45, + "dataset": "external", + "description": "Water Solubility", + "units": "log mol/L" + } + ] + }, + "mechanisticInterpretation": { + "summary": "Descriptors capture polar surface area, hydrogen bonding, and fragment counts aligned with property trends.", + "confidence": "moderate" + } + }, + "trainingData": { + "dataset": { + "name": "OPERA Training Library 2024", + "source": "EPA CompTox", + "description": "Consolidated experimental property measurements" + }, + "records": 2500, + "chemicalCount": 2200, + "descriptorCount": 45, + "preprocessing": "Standardized structures (neutralized), removal of salts, descriptor scaling.", + "classBalance": "Continuous endpoints" + }, + "evaluationData": { + "datasets": [ + { + "name": "OPERA External Validation", + "source": "EPA CompTox", + "description": "Hold-out dataset of curated property measurements" + } + ], + "validationApproach": "80/20 train-test split with 5-fold cross-validation", + "metrics": [ + { + "name": "MAE", + "value": 0.28, + "dataset": "external", + "description": "LogP" + }, + { + "name": "R2", + "value": 0.85, + "dataset": "external", + "description": "Vapor Pressure" + } + ], + "applicabilityDomainCoverage": 0.9 + }, + "applicabilityDomain": { + "summary": "Descriptor range checks plus nearest-neighbor similarity enforced before prediction delivery.", + "criteria": [ + { + "type": "descriptor_range", + "description": "Each descriptor must fall within training min/max after scaling.", + "parameters": { + "mode": "min_max" + } + }, + { + "type": "similarity", + "description": "Average Tanimoto similarity to top 5 training neighbors >= 0.6.", + "parameters": { + "threshold": 0.6, + "neighbors": 5 + } + } + ], + "enforcement": { + "mcpTools": [ + "opera.check_applicability_domain" + ], + "policy": "warn", + "errorCodes": [ + "OPERA_AD_WARN" + ] + }, + "confidenceBands": [ + { + "label": "High", + "minConfidence": 0.75, + "actions": [ + "Auto-approve" + ] + }, + { + "label": "Low", + "minConfidence": 0.5, + "actions": [ + "Escalate to SME" + ] + } + ], + "references": [ + { + "citation": "OPERA Manual 2025" + } + ] + }, + "ethicalConsiderations": { + "risks": [ + "Limited coverage for highly polar or reactive chemicals." + ], + "mitigations": [ + "Flag low-confidence predictions for manual review." + ] + }, + "provenance": { + "sourceRepositories": [ + "https://github.com/kmansouri/OPERA" + ], + "build": { + "id": "opera-build-2025-02-20", + "timestamp": "2025-02-20T14:30:00Z", + "environment": "GitHub Actions" + }, + "checksum": { + "algorithm": "SHA256", + "value": "79af18b3515e9a1d69037e2a154c7c6088cf3fae8c388ff901abdadf5a304a52" + }, + "reviewStatus": { + "approvedBy": [], + "notes": "Pending SME review" + } + } +} \ No newline at end of file diff --git a/src/epacomp_tox/data/metadata/model_cards/test_consensus.json b/src/epacomp_tox/data/metadata/model_cards/test_consensus.json new file mode 100644 index 0000000..c92aed0 --- /dev/null +++ b/src/epacomp_tox/data/metadata/model_cards/test_consensus.json @@ -0,0 +1,199 @@ +{ + "schemaVersion": "1.0", + "modelDetails": { + "name": "TEST Consensus Acute Toxicity", + "version": "5.2.0", + "modelType": "QSAR", + "description": "Predicts acute aquatic toxicity using consensus of TEST models.", + "developers": [ + { + "name": "EPA Computational Toxicology" + } + ], + "organizations": [ + "US EPA" + ], + "releaseDate": "2025-01-15", + "license": "EPA Terms of Use" + }, + "intendedUse": { + "summary": "Supports screening-level acute aquatic toxicity assessments for organic chemicals.", + "inScope": [ + "Non-ionic organic chemicals", + "Screening-level prioritization" + ], + "outOfScope": [ + "Ionic species", + "Metals" + ], + "limitations": [ + "Do not apply to mixtures without expert review." + ], + "warnings": [ + "Use applicability domain checks prior to decision-making." + ], + "regulatoryPrograms": [ + "TSCA New Chemicals" + ] + }, + "oecdValidationPrinciples": { + "definedEndpoint": { + "description": "96-hour fathead minnow LC50", + "unit": "mg/L", + "speciesOrSystem": "Pimephales promelas" + }, + "unambiguousAlgorithm": { + "summary": "Consensus of multiple QSAR models combining regression and classification outputs.", + "methodClass": "Ensemble", + "implementation": "TEST v5.2", + "references": [ + { + "citation": "TEST user manual 2024" + } + ] + }, + "definedApplicabilityDomain": { + "summary": "Leverage and descriptor range checks against training set.", + "references": [ + { + "citation": "Mansouri et al. 2018", + "doi": "10.1021/acs.jcim.7b00524" + } + ], + "relatedTools": [ + "test.check_applicability_domain" + ] + }, + "goodnessOfFitMetrics": { + "internalValidation": [ + { + "name": "R2", + "value": 0.81, + "dataset": "training" + } + ], + "externalValidation": [ + { + "name": "Q2", + "value": 0.74, + "dataset": "external" + }, + { + "name": "RMSE", + "value": 0.45, + "units": "log10" + } + ] + }, + "mechanisticInterpretation": { + "summary": "Descriptors capture hydrophobicity and molecular size consistent with narcosis mode of action.", + "confidence": "moderate" + } + }, + "trainingData": { + "dataset": { + "name": "TEST Training Set 2024", + "source": "EPA CompTox", + "description": "Curated LC50 dataset for freshwater species" + }, + "records": 580, + "chemicalCount": 560, + "descriptorCount": 35, + "preprocessing": "Standardization of chemical identifiers and removal of salts." + }, + "evaluationData": { + "datasets": [ + { + "name": "Fathead Minnow External", + "source": "EPA AQUIRE", + "description": "Independent validation dataset" + } + ], + "validationApproach": "Hold-out external validation", + "metrics": [ + { + "name": "RMSE", + "value": 0.52, + "dataset": "external", + "units": "log10" + } + ], + "applicabilityDomainCoverage": 0.88 + }, + "applicabilityDomain": { + "summary": "Combines leverage thresholds with descriptor range checks.", + "criteria": [ + { + "type": "descriptor_range", + "description": "All descriptors must fall within 5th-95th percentile of training set.", + "parameters": { + "percentileLower": 0.05, + "percentileUpper": 0.95 + } + }, + { + "type": "similarity", + "description": "Tanimoto similarity to nearest neighbor must exceed 0.65.", + "parameters": { + "threshold": 0.65 + } + } + ], + "enforcement": { + "mcpTools": [ + "test.check_applicability_domain" + ], + "policy": "block", + "errorCodes": [ + "TEST_AD_FAIL" + ] + }, + "confidenceBands": [ + { + "label": "High", + "minConfidence": 0.8, + "actions": [ + "Eligible for automated workflow" + ] + }, + { + "label": "Moderate", + "minConfidence": 0.6, + "actions": [ + "Requires SME review" + ] + } + ] + }, + "ethicalConsiderations": { + "risks": [ + "Model is biased toward narcosis-class chemicals." + ], + "mitigations": [ + "Flag predictions with low similarity for SME review." + ], + "humanOversight": "Regulatory reviewer must approve high-impact predictions." + }, + "provenance": { + "sourceRepositories": [ + "https://github.com/epa/test" + ], + "build": { + "id": "build-2025-01-15", + "timestamp": "2025-01-15T10:00:00Z", + "environment": "GitHub Actions" + }, + "checksum": { + "algorithm": "SHA256", + "value": "4a2a288f4f9b15727ea63a2c70a786844bab608d75d0d70fd0d0d7e0dad32f90" + }, + "reviewStatus": { + "approvedBy": [ + { + "name": "Regulatory Affairs" + } + ], + "approvalDate": "2025-02-01" + } + } +} \ No newline at end of file diff --git a/src/epacomp_tox/data/schemas/README.md b/src/epacomp_tox/data/schemas/README.md new file mode 100644 index 0000000..aa375cc --- /dev/null +++ b/src/epacomp_tox/data/schemas/README.md @@ -0,0 +1,28 @@ +# Portable CompTox Schemas + +The `schemas/` directory publishes portable evidence objects for cross-suite handoff. + +- `docs/contracts/schemas/`: MCP response wrappers for live tool responses. +- `schemas/`: portable objects that downstream MCPs and orchestrators can consume without depending on a specific transport call. + +Portable schema versions are intentionally independent from package patch releases. +For example, a package cleanup release may tighten docs, tests, or release tooling +without changing the `*.v1.json` portable object family. + +Current portable objects: + +- `chemicalIdentityRecord.v1.json` +- `hazardEvidenceSummary.v1.json` +- `exposureEvidenceSummary.v1.json` +- `bioactivityEvidenceSummary.v1.json` +- `aopLinkageSummary.v1.json` +- `pbpkContextBundle.v1.json` +- `comptoxEvidencePack.v1.json` +- `comptox_model_card.schema.json` + +Design rules: + +- Objects are lean and composable. +- CompTox owns evidence ingress and handoff packaging, not downstream AOP semantics or PBPK execution outputs. +- Model-card semantics are reused from `comptox_model_card.schema.json` instead of cloned into a second schema family. +- Example instances live under `schemas/examples/` and are validated in tests. diff --git a/src/epacomp_tox/data/schemas/aopLinkageSummary.v1.json b/src/epacomp_tox/data/schemas/aopLinkageSummary.v1.json new file mode 100644 index 0000000..55c1c43 --- /dev/null +++ b/src/epacomp_tox/data/schemas/aopLinkageSummary.v1.json @@ -0,0 +1,232 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://epa.gov/comptox/schemas/aopLinkageSummary.v1.json", + "title": "CompTox AOP Linkage Summary v1", + "description": "Portable CompTox-side AOP linkage object for downstream mechanistic consumers.", + "type": "object", + "additionalProperties": false, + "required": [ + "chemicalRef", + "lookupMode", + "mappings", + "supportingAssays", + "confidence", + "provenance" + ], + "properties": { + "chemicalRef": { + "$ref": "#/$defs/chemicalRef" + }, + "lookupMode": { + "type": "string", + "enum": [ + "chemical", + "dtxsid", + "assay", + "target" + ] + }, + "mappings": { + "type": "array", + "items": { + "$ref": "#/$defs/mapping" + } + }, + "supportingAssays": { + "type": "array", + "items": { + "$ref": "#/$defs/assayReference" + } + }, + "confidence": { + "$ref": "#/$defs/confidence" + }, + "identityResolution": { + "type": ["object", "null"], + "additionalProperties": true + }, + "knownDataGaps": { + "type": "array", + "items": {"type": "string"} + }, + "limitations": { + "type": "array", + "items": {"type": "string"} + }, + "generatedFromTools": { + "type": "array", + "items": {"type": "string"} + }, + "provenanceSummary": { + "type": "object", + "additionalProperties": true + }, + "provenance": { + "$ref": "#/$defs/provenance" + } + }, + "$defs": { + "chemicalRef": { + "type": "object", + "additionalProperties": false, + "required": [ + "dtxsid", + "preferredName" + ], + "properties": { + "dtxsid": { + "type": "string", + "pattern": "^DTXSID[0-9A-Z]+$" + }, + "preferredName": { + "type": "string" + }, + "casrn": { + "type": [ + "string", + "null" + ] + } + } + }, + "mapping": { + "type": "object", + "additionalProperties": false, + "required": [ + "aopId", + "eventType", + "eventLabel" + ], + "properties": { + "aopId": { + "type": "string" + }, + "aopTitle": { + "type": "string" + }, + "keyEventId": { + "type": "string" + }, + "eventType": { + "type": "string" + }, + "eventLabel": { + "type": "string" + }, + "relationship": { + "type": "string" + }, + "evidenceDirection": { + "type": "string" + }, + "confidence": { + "type": "number", + "minimum": 0, + "maximum": 1 + } + } + }, + "assayReference": { + "type": "object", + "additionalProperties": false, + "required": [ + "aeid", + "assayName" + ], + "properties": { + "aeid": { + "type": "string" + }, + "assayName": { + "type": "string" + }, + "targetName": { + "type": "string" + } + } + }, + "confidence": { + "type": "object", + "additionalProperties": false, + "required": [ + "score" + ], + "properties": { + "score": { + "type": "number", + "minimum": 0, + "maximum": 1 + }, + "band": { + "type": "string" + }, + "basis": { + "type": "string" + } + } + }, + "sourceRecord": { + "type": "object", + "additionalProperties": false, + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string" + }, + "toolName": { + "type": "string" + }, + "url": { + "type": "string", + "format": "uri" + }, + "retrievedAt": { + "type": "string", + "format": "date-time" + }, + "citation": { + "type": "string" + } + } + }, + "provenance": { + "type": "object", + "additionalProperties": false, + "required": [ + "sourceMcp", + "generatedAt", + "sources" + ], + "properties": { + "sourceMcp": { + "type": "string" + }, + "generatedAt": { + "type": "string", + "format": "date-time" + }, + "generatedBy": { + "type": "string" + }, + "traceId": { + "type": "string" + }, + "sources": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/$defs/sourceRecord" + } + }, + "notes": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } +} diff --git a/src/epacomp_tox/data/schemas/bioactivityEvidenceSummary.v1.json b/src/epacomp_tox/data/schemas/bioactivityEvidenceSummary.v1.json new file mode 100644 index 0000000..60b3ca8 --- /dev/null +++ b/src/epacomp_tox/data/schemas/bioactivityEvidenceSummary.v1.json @@ -0,0 +1,252 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://epa.gov/comptox/schemas/bioactivityEvidenceSummary.v1.json", + "title": "CompTox Bioactivity Evidence Summary v1", + "description": "Portable summary of CompTox bioactivity, assay, target, and AOP crosswalk information.", + "type": "object", + "additionalProperties": false, + "required": [ + "chemicalRef", + "summary", + "assays", + "targets", + "aopMappings", + "provenance" + ], + "properties": { + "chemicalRef": { + "$ref": "#/$defs/chemicalRef" + }, + "summary": { + "$ref": "#/$defs/summary" + }, + "assays": { + "type": "array", + "items": { + "$ref": "#/$defs/assaySummary" + } + }, + "targets": { + "type": "array", + "items": { + "$ref": "#/$defs/targetSummary" + } + }, + "aopMappings": { + "type": "array", + "items": { + "$ref": "#/$defs/aopMapping" + } + }, + "provenance": { + "$ref": "#/$defs/provenance" + } + }, + "$defs": { + "chemicalRef": { + "type": "object", + "additionalProperties": false, + "required": [ + "dtxsid", + "preferredName" + ], + "properties": { + "dtxsid": { + "type": "string", + "pattern": "^DTXSID[0-9A-Z]+$" + }, + "preferredName": { + "type": "string" + }, + "casrn": { + "type": [ + "string", + "null" + ] + } + } + }, + "summary": { + "type": "object", + "additionalProperties": false, + "required": [ + "assayCount", + "targetCount" + ], + "properties": { + "assayCount": { + "type": "integer", + "minimum": 0 + }, + "activeAssayCount": { + "type": "integer", + "minimum": 0 + }, + "targetCount": { + "type": "integer", + "minimum": 0 + }, + "referenceAssaySet": { + "type": "string" + }, + "notes": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "assaySummary": { + "type": "object", + "additionalProperties": false, + "required": [ + "aeid", + "assayName" + ], + "properties": { + "aeid": { + "type": "string" + }, + "assayName": { + "type": "string" + }, + "assayComponent": { + "type": "string" + }, + "activityDirection": { + "type": "string" + }, + "activityValue": { + "type": [ + "number", + "string", + "null" + ] + }, + "unit": { + "type": "string" + }, + "hitCall": { + "type": [ + "boolean", + "null" + ] + } + } + }, + "targetSummary": { + "type": "object", + "additionalProperties": false, + "required": [ + "targetName" + ], + "properties": { + "targetName": { + "type": "string" + }, + "geneSymbol": { + "type": "string" + }, + "targetFamily": { + "type": "string" + }, + "assayCount": { + "type": "integer", + "minimum": 0 + } + } + }, + "aopMapping": { + "type": "object", + "additionalProperties": false, + "required": [ + "aopId", + "eventType", + "eventLabel" + ], + "properties": { + "aopId": { + "type": "string" + }, + "aopTitle": { + "type": "string" + }, + "eventType": { + "type": "string" + }, + "eventLabel": { + "type": "string" + }, + "confidence": { + "type": "number", + "minimum": 0, + "maximum": 1 + } + } + }, + "sourceRecord": { + "type": "object", + "additionalProperties": false, + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string" + }, + "toolName": { + "type": "string" + }, + "url": { + "type": "string", + "format": "uri" + }, + "retrievedAt": { + "type": "string", + "format": "date-time" + }, + "citation": { + "type": "string" + } + } + }, + "provenance": { + "type": "object", + "additionalProperties": false, + "required": [ + "sourceMcp", + "generatedAt", + "sources" + ], + "properties": { + "sourceMcp": { + "type": "string" + }, + "generatedAt": { + "type": "string", + "format": "date-time" + }, + "generatedBy": { + "type": "string" + }, + "traceId": { + "type": "string" + }, + "sources": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/$defs/sourceRecord" + } + }, + "notes": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } +} diff --git a/src/epacomp_tox/data/schemas/chemicalIdentityRecord.v1.json b/src/epacomp_tox/data/schemas/chemicalIdentityRecord.v1.json new file mode 100644 index 0000000..a3d13c5 --- /dev/null +++ b/src/epacomp_tox/data/schemas/chemicalIdentityRecord.v1.json @@ -0,0 +1,123 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://epa.gov/comptox/schemas/chemicalIdentityRecord.v1.json", + "title": "CompTox Chemical Identity Record v1", + "description": "Portable chemical identity object for cross-suite evidence handoff.", + "type": "object", + "additionalProperties": false, + "required": [ + "dtxsid", + "preferredName", + "provenance" + ], + "properties": { + "dtxsid": { + "type": "string", + "pattern": "^DTXSID[0-9A-Z]+$", + "description": "DSSTox substance identifier." + }, + "preferredName": { + "type": "string", + "description": "Preferred CompTox chemical name." + }, + "casrn": { + "type": [ + "string", + "null" + ], + "description": "CAS Registry Number when available." + }, + "inchikey": { + "type": [ + "string", + "null" + ], + "description": "Standard InChIKey when available." + }, + "smiles": { + "type": [ + "string", + "null" + ], + "description": "Canonical or source-provided SMILES when available." + }, + "synonyms": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Useful alternate names for downstream matching." + }, + "provenance": { + "$ref": "#/$defs/provenance" + } + }, + "$defs": { + "sourceRecord": { + "type": "object", + "additionalProperties": false, + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string" + }, + "toolName": { + "type": "string" + }, + "url": { + "type": "string", + "format": "uri" + }, + "retrievedAt": { + "type": "string", + "format": "date-time" + }, + "citation": { + "type": "string" + }, + "license": { + "type": "string" + } + } + }, + "provenance": { + "type": "object", + "additionalProperties": false, + "required": [ + "sourceMcp", + "generatedAt", + "sources" + ], + "properties": { + "sourceMcp": { + "type": "string" + }, + "generatedAt": { + "type": "string", + "format": "date-time" + }, + "generatedBy": { + "type": "string" + }, + "traceId": { + "type": "string" + }, + "sources": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/$defs/sourceRecord" + } + }, + "notes": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } +} diff --git a/src/epacomp_tox/data/schemas/comptoxEvidencePack.v1.json b/src/epacomp_tox/data/schemas/comptoxEvidencePack.v1.json new file mode 100644 index 0000000..f1cf001 --- /dev/null +++ b/src/epacomp_tox/data/schemas/comptoxEvidencePack.v1.json @@ -0,0 +1,251 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://epa.gov/comptox/schemas/comptoxEvidencePack.v1.json", + "title": "CompTox Evidence Pack v1", + "description": "Portable cross-domain evidence package assembled from CompTox retrieval outputs.", + "type": "object", + "additionalProperties": false, + "required": [ + "chemicalIdentity", + "metadata", + "audit", + "semanticCoverage" + ], + "properties": { + "chemicalIdentity": { + "$ref": "https://epa.gov/comptox/schemas/chemicalIdentityRecord.v1.json" + }, + "hazardEvidenceSummary": { + "oneOf": [ + { + "$ref": "https://epa.gov/comptox/schemas/hazardEvidenceSummary.v1.json" + }, + { + "type": "null" + } + ] + }, + "exposureEvidenceSummary": { + "oneOf": [ + { + "$ref": "https://epa.gov/comptox/schemas/exposureEvidenceSummary.v1.json" + }, + { + "type": "null" + } + ] + }, + "bioactivityEvidenceSummary": { + "oneOf": [ + { + "$ref": "https://epa.gov/comptox/schemas/bioactivityEvidenceSummary.v1.json" + }, + { + "type": "null" + } + ] + }, + "aopLinkageSummary": { + "oneOf": [ + { + "$ref": "https://epa.gov/comptox/schemas/aopLinkageSummary.v1.json" + }, + { + "type": "null" + } + ] + }, + "pbpkContextBundle": { + "oneOf": [ + { + "$ref": "https://epa.gov/comptox/schemas/pbpkContextBundle.v1.json" + }, + { + "type": "null" + } + ] + }, + "identityResolution": { + "type": ["object", "null"], + "additionalProperties": true + }, + "knownDataGaps": { + "type": "array", + "items": {"type": "string"} + }, + "limitations": { + "type": "array", + "items": {"type": "string"} + }, + "generatedFromTools": { + "type": "array", + "items": {"type": "string"} + }, + "provenanceSummary": { + "type": "object", + "additionalProperties": true + }, + "metadata": { + "$ref": "#/$defs/metadata" + }, + "audit": { + "$ref": "#/$defs/audit" + }, + "semanticCoverage": { + "$ref": "#/$defs/semanticCoverage" + } + }, + "$defs": { + "metadata": { + "type": "object", + "additionalProperties": false, + "required": [ + "packId", + "sourceMcp", + "createdAt", + "suiteRole" + ], + "properties": { + "packId": { + "type": "string" + }, + "sourceMcp": { + "type": "string" + }, + "createdAt": { + "type": "string", + "format": "date-time" + }, + "suiteRole": { + "type": "string", + "const": "evidence-federation" + }, + "downstreamConsumers": { + "type": "array", + "items": { + "type": "string" + } + }, + "modelCardRefs": { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "https://epa.gov/comptox/schemas/comptox-model-card.schema.json" + }, + { + "$ref": "#/$defs/modelCardReference" + } + ] + } + } + } + }, + "modelCardReference": { + "type": "object", + "additionalProperties": false, + "required": [ + "modelName", + "modelVersion" + ], + "properties": { + "modelName": { + "type": "string" + }, + "modelVersion": { + "type": "string" + }, + "endpoint": { + "type": "string" + }, + "cardUri": { + "type": "string", + "format": "uri" + }, + "limitations": { + "type": "array", + "items": {"type": "string"} + }, + "warnings": { + "type": "array", + "items": {"type": "string"} + } + } + }, + "audit": { + "type": "object", + "additionalProperties": false, + "required": [ + "generatedAt", + "generatedBy", + "sourceTools" + ], + "properties": { + "generatedAt": { + "type": "string", + "format": "date-time" + }, + "generatedBy": { + "type": "string" + }, + "requestId": { + "type": "string" + }, + "sourceTools": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + }, + "notes": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "semanticCoverage": { + "type": "object", + "additionalProperties": false, + "required": [ + "identity", + "hazard", + "exposure", + "bioactivity", + "aopLinkage", + "pbpkContext" + ], + "properties": { + "identity": { + "$ref": "#/$defs/coverageLevel" + }, + "hazard": { + "$ref": "#/$defs/coverageLevel" + }, + "exposure": { + "$ref": "#/$defs/coverageLevel" + }, + "bioactivity": { + "$ref": "#/$defs/coverageLevel" + }, + "aopLinkage": { + "$ref": "#/$defs/coverageLevel" + }, + "pbpkContext": { + "$ref": "#/$defs/coverageLevel" + } + } + }, + "coverageLevel": { + "type": "string", + "enum": [ + "none", + "linked", + "summary", + "detailed" + ] + } + } +} diff --git a/src/epacomp_tox/data/schemas/comptox_model_card.schema.json b/src/epacomp_tox/data/schemas/comptox_model_card.schema.json new file mode 100644 index 0000000..7d988fd --- /dev/null +++ b/src/epacomp_tox/data/schemas/comptox_model_card.schema.json @@ -0,0 +1,658 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://epa.gov/comptox/schemas/comptox-model-card.schema.json", + "title": "CompTox ModelCard", + "description": "Machine-readable CompTox model card aligned with OECD QSAR validation principles and MCP requirements.", + "type": "object", + "required": [ + "schemaVersion", + "modelDetails", + "intendedUse", + "oecdValidationPrinciples", + "trainingData", + "evaluationData", + "applicabilityDomain", + "ethicalConsiderations", + "provenance" + ], + "additionalProperties": true, + "properties": { + "schemaVersion": { + "type": "string", + "pattern": "^v?\\d+\\.\\d+(\\.\\d+)?$", + "description": "Semantic version of the CompTox model card schema used to validate this document." + }, + "modelDetails": { + "type": "object", + "description": "Core identity and lifecycle information for the model.", + "required": [ + "name", + "version", + "description", + "modelType", + "developers", + "organizations", + "releaseDate" + ], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Canonical model name (e.g., TEST Consensus Acute Toxicity)." + }, + "version": { + "type": "string", + "description": "Model semantic version or tagged release identifier." + }, + "modelType": { + "type": "string", + "description": "High-level classification for the model (e.g., QSAR, Read-Across, PBPK, Analogue-Search)." + }, + "description": { + "type": "string", + "description": "Short overview of the model purpose and capabilities." + }, + "developers": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/person" }, + "description": "Primary model developers or maintainers." + }, + "organizations": { + "type": "array", + "minItems": 1, + "items": { "type": "string" }, + "description": "Affiliated organizations sponsoring or maintaining the model." + }, + "releaseDate": { + "type": "string", + "format": "date", + "description": "Date when this model version was released to production." + }, + "lastUpdated": { + "type": "string", + "format": "date", + "description": "Date of the latest material update to this model card or implementation." + }, + "license": { + "type": "string", + "description": "License governing model distribution or use." + }, + "tags": { + "type": "array", + "items": { "type": "string" }, + "description": "Keywords describing the domain, endpoints, or regulatory programs." + }, + "documentation": { + "type": "array", + "items": { "$ref": "#/$defs/reference" }, + "description": "Links to human-readable technical documentation, publications, or SOPs." + } + } + }, + "intendedUse": { + "type": "object", + "description": "Specific use cases, audiences, and limitations for the model.", + "required": ["summary", "inScope", "outOfScope"], + "additionalProperties": false, + "properties": { + "summary": { + "type": "string", + "description": "Narrative summary of intended purpose and regulatory context." + }, + "inScope": { + "type": "array", + "items": { "type": "string" }, + "description": "Explicitly supported use cases, decision contexts, or chemical classes." + }, + "outOfScope": { + "type": "array", + "items": { "type": "string" }, + "description": "Use cases, populations, or chemicals for which the model should not be applied." + }, + "limitations": { + "type": "array", + "items": { "type": "string" }, + "description": "Known scientific or technical limitations relevant to agents or reviewers." + }, + "warnings": { + "type": "array", + "items": { "type": "string" }, + "description": "Cautionary statements that must be surfaced alongside predictions." + }, + "regulatoryPrograms": { + "type": "array", + "items": { "type": "string" }, + "description": "Regulatory programs or guidance documents under which this model is accepted." + } + } + }, + "oecdValidationPrinciples": { + "type": "object", + "description": "Compliance details for the OECD five principles of QSAR validation.", + "required": [ + "definedEndpoint", + "unambiguousAlgorithm", + "definedApplicabilityDomain", + "goodnessOfFitMetrics", + "mechanisticInterpretation" + ], + "additionalProperties": false, + "properties": { + "definedEndpoint": { + "type": "object", + "required": ["description", "unit"], + "additionalProperties": false, + "properties": { + "description": { + "type": "string", + "description": "Precise description of the biological or physicochemical endpoint predicted by the model." + }, + "unit": { + "type": "string", + "description": "Measurement unit associated with the endpoint, if applicable." + }, + "speciesOrSystem": { + "type": "string", + "description": "Species, biological system, or experimental context used to measure the endpoint." + }, + "references": { + "type": "array", + "items": { "$ref": "#/$defs/reference" }, + "description": "Supporting references defining the endpoint." + } + } + }, + "unambiguousAlgorithm": { + "type": "object", + "required": ["summary"], + "additionalProperties": false, + "properties": { + "summary": { + "type": "string", + "description": "Concise description of the algorithm, statistical method, or machine learning architecture." + }, + "methodClass": { + "type": "string", + "description": "High-level classification (e.g., linear regression, random forest, k-NN, read-across)." + }, + "implementation": { + "type": "string", + "description": "Primary implementation reference (e.g., software package, repository URL)." + }, + "references": { + "type": "array", + "items": { "$ref": "#/$defs/reference" }, + "description": "Peer-reviewed publications or technical reports detailing the algorithm." + } + } + }, + "definedApplicabilityDomain": { + "type": "object", + "description": "Summary of AD approach; detailed machine-readable definition is in the top-level applicabilityDomain section.", + "required": ["summary"], + "additionalProperties": false, + "properties": { + "summary": { + "type": "string", + "description": "Narrative summary of the applicability domain technique(s)." + }, + "references": { + "type": "array", + "items": { "$ref": "#/$defs/reference" }, + "description": "Citations supporting the AD methodology." + }, + "relatedTools": { + "type": "array", + "items": { "type": "string" }, + "description": "Names of MCP tools that enforce this AD (e.g., test.check_applicability_domain)." + } + } + }, + "goodnessOfFitMetrics": { + "type": "object", + "description": "Quantitative performance metrics for internal and external validation.", + "additionalProperties": false, + "properties": { + "internalValidation": { + "type": "array", + "items": { "$ref": "#/$defs/metric" }, + "description": "Metrics derived from training or cross-validation." + }, + "externalValidation": { + "type": "array", + "items": { "$ref": "#/$defs/metric" }, + "description": "Metrics derived from external or hold-out datasets." + }, + "applicabilityDomainMetrics": { + "type": "array", + "items": { "$ref": "#/$defs/metric" }, + "description": "Metrics specific to AD performance (coverage, false positive rate, etc.)." + } + } + }, + "mechanisticInterpretation": { + "type": "object", + "required": ["summary"], + "additionalProperties": false, + "properties": { + "summary": { + "type": "string", + "description": "Explanation of mechanistic plausibility or descriptor relevance." + }, + "confidence": { + "type": "string", + "description": "Assessment of strength of mechanistic evidence (e.g., strong, moderate, speculative)." + }, + "references": { + "type": "array", + "items": { "$ref": "#/$defs/reference" }, + "description": "Supporting publications or expert assessments." + } + } + } + } + }, + "trainingData": { + "type": "object", + "description": "Provenance and characteristics of the training dataset.", + "required": ["dataset", "records", "chemicalCount"], + "additionalProperties": false, + "properties": { + "dataset": { "$ref": "#/$defs/dataset" }, + "records": { + "type": "integer", + "minimum": 0, + "description": "Total number of records (rows) used for training." + }, + "chemicalCount": { + "type": "integer", + "minimum": 0, + "description": "Number of unique chemicals represented." + }, + "descriptorCount": { + "type": "integer", + "minimum": 0, + "description": "Number of descriptors/features used by the model." + }, + "preprocessing": { + "type": "string", + "description": "Data curation and preprocessing steps applied prior to model training." + }, + "classBalance": { + "type": "string", + "description": "Summary of class balance or distribution characteristics, if applicable." + } + } + }, + "evaluationData": { + "type": "object", + "description": "Details for external validation datasets and methodologies.", + "required": ["datasets", "validationApproach", "metrics"], + "additionalProperties": false, + "properties": { + "datasets": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/dataset" }, + "description": "External datasets used for validation." + }, + "validationApproach": { + "type": "string", + "description": "Description of the validation methodology (e.g., k-fold CV, external test set)." + }, + "metrics": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/metric" }, + "description": "Quantitative performance metrics derived from evaluation datasets." + }, + "applicabilityDomainCoverage": { + "type": "number", + "minimum": 0, + "maximum": 1, + "description": "Fraction of evaluation dataset predictions that fell within the applicability domain." + } + } + }, + "applicabilityDomain": { + "type": "object", + "description": "Machine-readable applicability domain definition used to enforce guardrails.", + "required": ["summary", "criteria", "enforcement"], + "additionalProperties": false, + "properties": { + "summary": { + "type": "string", + "description": "Overall description of the AD methodology and its role in the workflow." + }, + "criteria": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#/$defs/applicabilityCriterion" }, + "description": "List of criteria that define the applicability domain boundaries." + }, + "enforcement": { + "type": "object", + "required": ["mcpTools"], + "additionalProperties": false, + "properties": { + "mcpTools": { + "type": "array", + "items": { "type": "string" }, + "description": "Names of MCP tools responsible for enforcing the AD checks." + }, + "policy": { + "type": "string", + "description": "Policy guidance controlling how out-of-domain results are handled (e.g., block, warn)." + }, + "errorCodes": { + "type": "array", + "items": { "type": "string" }, + "description": "Expected error codes or statuses emitted when AD checks fail." + } + } + }, + "confidenceBands": { + "type": "array", + "items": { "$ref": "#/$defs/confidenceBand" }, + "description": "Optional mapping of AD confidence to downstream decision guidance." + }, + "references": { + "type": "array", + "items": { "$ref": "#/$defs/reference" }, + "description": "Citations describing the AD algorithm or validation." + } + } + }, + "ethicalConsiderations": { + "type": "object", + "description": "Bias, fairness, or ethical implications of the model.", + "required": ["risks"], + "additionalProperties": false, + "properties": { + "risks": { + "type": "array", + "items": { "type": "string" }, + "description": "Known ethical or bias-related risks to surface to users." + }, + "mitigations": { + "type": "array", + "items": { "type": "string" }, + "description": "Mitigation strategies or guidance for interpreting model outputs." + }, + "humanOversight": { + "type": "string", + "description": "Description of required human oversight or SME review." + } + } + }, + "provenance": { + "type": "object", + "description": "Provenance, version control, and audit metadata.", + "required": ["sourceRepositories", "build", "checksum"], + "additionalProperties": false, + "properties": { + "sourceRepositories": { + "type": "array", + "items": { "type": "string", "format": "uri" }, + "description": "Repositories, data stores, or registries containing canonical model assets." + }, + "build": { + "type": "object", + "required": ["id", "timestamp"], + "additionalProperties": false, + "properties": { + "id": { + "type": "string", + "description": "Build identifier or commit hash used to produce the deployable artifact." + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the build was produced." + }, + "environment": { + "type": "string", + "description": "Description of build environment (e.g., GitHub Actions workflow, container digest)." + } + } + }, + "checksum": { + "type": "object", + "required": ["algorithm", "value"], + "additionalProperties": false, + "properties": { + "algorithm": { + "type": "string", + "description": "Hash algorithm used (e.g., SHA256)." + }, + "value": { + "type": "string", + "description": "Checksum value for the model artifact or dataset bundle." + } + } + }, + "versionHistory": { + "type": "array", + "items": { + "type": "object", + "required": ["version", "changes"], + "additionalProperties": false, + "properties": { + "version": { "type": "string" }, + "changes": { + "type": "array", + "items": { "type": "string" } + }, + "date": { + "type": "string", + "format": "date" + } + } + }, + "description": "Chronological record of major updates to the model or card." + }, + "reviewStatus": { + "type": "object", + "properties": { + "approvedBy": { + "type": "array", + "items": { "$ref": "#/$defs/person" }, + "description": "Stakeholders who have reviewed/approved the model card." + }, + "approvalDate": { + "type": "string", + "format": "date" + }, + "notes": { + "type": "string" + } + } + } + } + }, + "notes": { + "type": "string", + "description": "Free-form notes or additional context not captured elsewhere." + } + }, + "$defs": { + "person": { + "type": "object", + "required": ["name"], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Full name of the person." + }, + "affiliation": { + "type": "string", + "description": "Organization or institution." + }, + "email": { + "type": "string", + "format": "email", + "description": "Contact email for follow-up questions." + }, + "orcid": { + "type": "string", + "pattern": "^\\d{4}-\\d{4}-\\d{4}-\\d{3}[\\dX]$", + "description": "ORCID identifier, if available." + } + } + }, + "reference": { + "type": "object", + "required": ["citation"], + "additionalProperties": false, + "properties": { + "citation": { + "type": "string", + "description": "Human-readable citation." + }, + "doi": { + "type": "string", + "description": "Digital object identifier." + }, + "url": { + "type": "string", + "format": "uri", + "description": "Resolvable URL for the reference." + }, + "notes": { + "type": "string", + "description": "Context or relevance of the reference." + } + } + }, + "metric": { + "type": "object", + "required": ["name", "value"], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Metric name (e.g., R2, RMSE, MAE)." + }, + "value": { + "type": "number", + "description": "Numeric value for the metric." + }, + "dataset": { + "type": "string", + "description": "Dataset name or split associated with the metric." + }, + "units": { + "type": "string", + "description": "Measurement units, if applicable." + }, + "description": { + "type": "string", + "description": "Additional context for the metric calculation." + } + } + }, + "dataset": { + "type": "object", + "required": ["name", "source"], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Dataset name or identifier." + }, + "source": { + "type": "string", + "description": "Data source or repository from which the dataset was obtained." + }, + "description": { + "type": "string", + "description": "Narrative description of the dataset contents." + }, + "accessUrl": { + "type": "string", + "format": "uri", + "description": "URL where the dataset can be accessed or requested." + }, + "license": { + "type": "string", + "description": "License governing dataset usage." + }, + "recordCount": { + "type": "integer", + "minimum": 0, + "description": "Number of records in the dataset." + }, + "timeRange": { + "type": "object", + "properties": { + "start": { + "type": "string", + "format": "date" + }, + "end": { + "type": "string", + "format": "date" + } + } + } + } + }, + "applicabilityCriterion": { + "type": "object", + "required": ["type", "description"], + "additionalProperties": false, + "properties": { + "type": { + "type": "string", + "enum": ["descriptor_range", "similarity", "distance", "coverage", "expert_rule", "other"], + "description": "Category of applicability domain criterion." + }, + "description": { + "type": "string", + "description": "Human-readable explanation of the criterion." + }, + "parameters": { + "type": "object", + "description": "Machine-readable parameters defining the criterion (thresholds, descriptor names, etc.)." + }, + "threshold": { + "type": "number", + "description": "Primary numeric threshold, when applicable." + }, + "unit": { + "type": "string", + "description": "Unit associated with the threshold." + }, + "references": { + "type": "array", + "items": { "$ref": "#/$defs/reference" }, + "description": "References backing the criterion rationale." + } + } + }, + "confidenceBand": { + "type": "object", + "required": ["label", "minConfidence", "actions"], + "additionalProperties": false, + "properties": { + "label": { + "type": "string", + "description": "Human-readable label for the confidence band (e.g., High, Moderate, Low)." + }, + "minConfidence": { + "type": "number", + "minimum": 0, + "maximum": 1, + "description": "Minimum confidence score (0-1) included in the band." + }, + "actions": { + "type": "array", + "items": { "type": "string" }, + "description": "Recommended actions or policy guidance when a prediction falls in this band." + } + } + } + } +} diff --git a/src/epacomp_tox/data/schemas/examples/aopLinkageSummary.example.json b/src/epacomp_tox/data/schemas/examples/aopLinkageSummary.example.json new file mode 100644 index 0000000..6a3e171 --- /dev/null +++ b/src/epacomp_tox/data/schemas/examples/aopLinkageSummary.example.json @@ -0,0 +1,50 @@ +{ + "chemicalRef": { + "dtxsid": "DTXSID7020182", + "preferredName": "Bisphenol A", + "casrn": "80-05-7" + }, + "lookupMode": "dtxsid", + "mappings": [ + { + "aopId": "AOP:42", + "aopTitle": "Estrogen receptor activation leading to reproductive effects", + "keyEventId": "KE:1735", + "eventType": "molecular_initiating_event", + "eventLabel": "Activation of estrogen receptor", + "relationship": "linked_via_assay_mapping", + "evidenceDirection": "supports", + "confidence": 0.81 + } + ], + "supportingAssays": [ + { + "aeid": "AEID:1856", + "assayName": "ATG_ERa_TRANS_up", + "targetName": "Estrogen receptor alpha" + } + ], + "confidence": { + "score": 0.79, + "band": "moderate", + "basis": "Single-target concordance across linked ToxCast assays and AOP crosswalk coverage." + }, + "provenance": { + "sourceMcp": "epacomp-tox-mcp", + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "build_aop_linkage_summary", + "traceId": "ctx-example-aop-001", + "sources": [ + { + "name": "CompTox Bioactivity AOP mapping", + "toolName": "get_bioactivity_aop", + "url": "https://comptox.epa.gov/dashboard", + "retrievedAt": "2026-03-21T09:59:10Z", + "citation": "EPA CompTox AOP crosswalk endpoint" + } + ], + "notes": [ + "CompTox-side linkage object only; downstream AOP normalization belongs in aop-mcp." + ] + } +} diff --git a/src/epacomp_tox/data/schemas/examples/bioactivityEvidenceSummary.example.json b/src/epacomp_tox/data/schemas/examples/bioactivityEvidenceSummary.example.json new file mode 100644 index 0000000..3d872c4 --- /dev/null +++ b/src/epacomp_tox/data/schemas/examples/bioactivityEvidenceSummary.example.json @@ -0,0 +1,77 @@ +{ + "chemicalRef": { + "dtxsid": "DTXSID7020182", + "preferredName": "Bisphenol A", + "casrn": "80-05-7" + }, + "summary": { + "assayCount": 3, + "activeAssayCount": 2, + "targetCount": 2, + "referenceAssaySet": "toxcast-phase-iii", + "notes": [ + "Example summary highlighting endocrine-relevant activity." + ] + }, + "assays": [ + { + "aeid": "AEID:1856", + "assayName": "ATG_ERa_TRANS_up", + "assayComponent": "estrogen receptor alpha transcriptional activation", + "activityDirection": "up", + "activityValue": 0.48, + "unit": "uM", + "hitCall": true + }, + { + "aeid": "AEID:1234", + "assayName": "TOX21_AR_BLA_Antagonist_ratio", + "assayComponent": "androgen receptor antagonist", + "activityDirection": "down", + "activityValue": 5.1, + "unit": "uM", + "hitCall": false + } + ], + "targets": [ + { + "targetName": "Estrogen receptor alpha", + "geneSymbol": "ESR1", + "targetFamily": "nuclear receptor", + "assayCount": 2 + }, + { + "targetName": "Estrogen receptor beta", + "geneSymbol": "ESR2", + "targetFamily": "nuclear receptor", + "assayCount": 1 + } + ], + "aopMappings": [ + { + "aopId": "AOP:42", + "aopTitle": "Estrogen receptor activation leading to reproductive effects", + "eventType": "molecular_initiating_event", + "eventLabel": "Activation of estrogen receptor", + "confidence": 0.81 + } + ], + "provenance": { + "sourceMcp": "epacomp-tox-mcp", + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "assemble_comptox_evidence_pack", + "traceId": "ctx-example-bioactivity-001", + "sources": [ + { + "name": "CompTox Bioactivity APIs", + "toolName": "get_bioactivity_summary_by_dtxsid", + "url": "https://comptox.epa.gov/dashboard", + "retrievedAt": "2026-03-21T09:59:00Z", + "citation": "EPA CompTox bioactivity endpoints" + } + ], + "notes": [ + "Illustrative bioactivity summary object." + ] + } +} diff --git a/src/epacomp_tox/data/schemas/examples/chemicalIdentityRecord.example.json b/src/epacomp_tox/data/schemas/examples/chemicalIdentityRecord.example.json new file mode 100644 index 0000000..bc7e01f --- /dev/null +++ b/src/epacomp_tox/data/schemas/examples/chemicalIdentityRecord.example.json @@ -0,0 +1,29 @@ +{ + "dtxsid": "DTXSID7020182", + "preferredName": "Bisphenol A", + "casrn": "80-05-7", + "inchikey": "IHQYDGMOQILFNV-UHFFFAOYSA-N", + "smiles": "CC(C)(C1=CC=C(C=C1)O)C2=CC=C(C=C2)O", + "synonyms": [ + "BPA", + "4,4'-Isopropylidenediphenol" + ], + "provenance": { + "sourceMcp": "epacomp-tox-mcp", + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "assemble_comptox_evidence_pack", + "traceId": "ctx-example-chemical-001", + "sources": [ + { + "name": "CompTox Chemicals Dashboard API", + "toolName": "search_chemical", + "url": "https://comptox.epa.gov/dashboard", + "retrievedAt": "2026-03-21T09:58:00Z", + "citation": "EPA CompTox Chemicals Dashboard" + } + ], + "notes": [ + "Example identity record for portable handoff validation." + ] + } +} diff --git a/src/epacomp_tox/data/schemas/examples/comptoxEvidencePack.example.json b/src/epacomp_tox/data/schemas/examples/comptoxEvidencePack.example.json new file mode 100644 index 0000000..cfb3eb2 --- /dev/null +++ b/src/epacomp_tox/data/schemas/examples/comptoxEvidencePack.example.json @@ -0,0 +1,366 @@ +{ + "chemicalIdentity": { + "dtxsid": "DTXSID7020182", + "preferredName": "Bisphenol A", + "casrn": "80-05-7", + "inchikey": "IHQYDGMOQILFNV-UHFFFAOYSA-N", + "smiles": "CC(C)(C1=CC=C(C=C1)O)C2=CC=C(C=C2)O", + "synonyms": [ + "BPA" + ], + "provenance": { + "sourceMcp": "epacomp-tox-mcp", + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "assemble_comptox_evidence_pack", + "traceId": "ctx-example-pack-chemical-001", + "sources": [ + { + "name": "CompTox Chemicals Dashboard API", + "toolName": "search_chemical", + "url": "https://comptox.epa.gov/dashboard", + "retrievedAt": "2026-03-21T09:58:00Z", + "citation": "EPA CompTox Chemicals Dashboard" + } + ], + "notes": [ + "Embedded identity record." + ] + } + }, + "hazardEvidenceSummary": { + "chemicalRef": { + "dtxsid": "DTXSID7020182", + "preferredName": "Bisphenol A", + "casrn": "80-05-7" + }, + "datasets": [ + { + "dataset": "toxval", + "summaryLevel": "summary", + "recordCount": 1, + "records": [ + { + "effect": "NOEL", + "value": 40, + "unit": "mg/kg-day" + } + ], + "sourceTool": "search_hazard", + "retrievedAt": "2026-03-21T09:58:00Z" + } + ], + "keyFindings": [ + { + "statement": "A repeated-dose oral NOEL is available in ToxValDB.", + "sourceDataset": "toxval", + "endpoint": "NOEL", + "value": 40, + "unit": "mg/kg-day" + } + ], + "references": [ + { + "citation": "EPA CompTox hazard datasets for Bisphenol A.", + "url": "https://comptox.epa.gov/dashboard" + } + ], + "provenance": { + "sourceMcp": "epacomp-tox-mcp", + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "assemble_comptox_evidence_pack", + "traceId": "ctx-example-pack-hazard-001", + "sources": [ + { + "name": "CompTox Hazard APIs", + "toolName": "search_hazard", + "url": "https://comptox.epa.gov/dashboard", + "retrievedAt": "2026-03-21T09:58:00Z", + "citation": "EPA CompTox hazard endpoints" + } + ] + }, + "requestMetadata": { + "sourceTools": [ + "search_hazard" + ], + "requestedAt": "2026-03-21T09:58:00Z", + "summaryOnly": true + } + }, + "exposureEvidenceSummary": { + "chemicalRef": { + "dtxsid": "DTXSID7020182", + "preferredName": "Bisphenol A", + "casrn": "80-05-7" + }, + "cpdat": { + "recordCount": 1, + "records": [ + { + "productUseCategory": "Food contact material" + } + ], + "sourceTool": "search_cpdat", + "retrievedAt": "2026-03-21T09:58:30Z" + }, + "seem": null, + "httk": { + "recordCount": 1, + "records": [ + { + "fractionUnboundPlasma": 0.06 + } + ], + "sourceTool": "search_httk", + "retrievedAt": "2026-03-21T09:59:00Z" + }, + "mmdb": null, + "qsurs": { + "recordCount": 1, + "records": [ + { + "useDescriptor": "plasticizer", + "probability": 0.62 + } + ], + "sourceTool": "search_qsurs", + "retrievedAt": "2026-03-21T09:59:30Z" + }, + "provenance": { + "sourceMcp": "epacomp-tox-mcp", + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "assemble_comptox_evidence_pack", + "traceId": "ctx-example-pack-exposure-001", + "sources": [ + { + "name": "CompTox Exposure APIs", + "toolName": "search_cpdat", + "url": "https://comptox.epa.gov/dashboard", + "retrievedAt": "2026-03-21T09:58:30Z", + "citation": "EPA CompTox exposure endpoints" + } + ] + } + }, + "bioactivityEvidenceSummary": { + "chemicalRef": { + "dtxsid": "DTXSID7020182", + "preferredName": "Bisphenol A", + "casrn": "80-05-7" + }, + "summary": { + "assayCount": 2, + "activeAssayCount": 1, + "targetCount": 1 + }, + "assays": [ + { + "aeid": "AEID:1856", + "assayName": "ATG_ERa_TRANS_up", + "activityDirection": "up", + "activityValue": 0.48, + "unit": "uM", + "hitCall": true + } + ], + "targets": [ + { + "targetName": "Estrogen receptor alpha", + "geneSymbol": "ESR1", + "targetFamily": "nuclear receptor", + "assayCount": 1 + } + ], + "aopMappings": [ + { + "aopId": "AOP:42", + "eventType": "molecular_initiating_event", + "eventLabel": "Activation of estrogen receptor", + "confidence": 0.81 + } + ], + "provenance": { + "sourceMcp": "epacomp-tox-mcp", + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "assemble_comptox_evidence_pack", + "traceId": "ctx-example-pack-bioactivity-001", + "sources": [ + { + "name": "CompTox Bioactivity APIs", + "toolName": "get_bioactivity_summary_by_dtxsid", + "url": "https://comptox.epa.gov/dashboard", + "retrievedAt": "2026-03-21T09:59:00Z", + "citation": "EPA CompTox bioactivity endpoints" + } + ] + } + }, + "aopLinkageSummary": { + "chemicalRef": { + "dtxsid": "DTXSID7020182", + "preferredName": "Bisphenol A", + "casrn": "80-05-7" + }, + "lookupMode": "dtxsid", + "mappings": [ + { + "aopId": "AOP:42", + "eventType": "molecular_initiating_event", + "eventLabel": "Activation of estrogen receptor", + "confidence": 0.81 + } + ], + "supportingAssays": [ + { + "aeid": "AEID:1856", + "assayName": "ATG_ERa_TRANS_up" + } + ], + "confidence": { + "score": 0.79, + "basis": "Example CompTox linkage confidence." + }, + "provenance": { + "sourceMcp": "epacomp-tox-mcp", + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "build_aop_linkage_summary", + "traceId": "ctx-example-pack-aop-001", + "sources": [ + { + "name": "CompTox AOP mapping", + "toolName": "get_bioactivity_aop", + "url": "https://comptox.epa.gov/dashboard", + "retrievedAt": "2026-03-21T09:59:10Z", + "citation": "EPA CompTox AOP crosswalk endpoint" + } + ] + } + }, + "pbpkContextBundle": { + "chemicalIdentityRef": { + "dtxsid": "DTXSID7020182", + "preferredName": "Bisphenol A", + "casrn": "80-05-7", + "inchikey": "IHQYDGMOQILFNV-UHFFFAOYSA-N", + "smiles": "CC(C)(C1=CC=C(C=C1)O)C2=CC=C(C=C2)O", + "synonyms": [ + "BPA" + ], + "provenance": { + "sourceMcp": "epacomp-tox-mcp", + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "build_pbpk_context_bundle", + "traceId": "ctx-example-pack-pbpk-chemical-001", + "sources": [ + { + "name": "CompTox Chemicals Dashboard API", + "toolName": "search_chemical", + "url": "https://comptox.epa.gov/dashboard", + "retrievedAt": "2026-03-21T09:58:00Z", + "citation": "EPA CompTox Chemicals Dashboard" + } + ] + } + }, + "httkSlice": { + "recordCount": 1, + "records": [ + { + "fractionUnboundPlasma": 0.06 + } + ], + "sourceTool": "get_exposure_httk", + "retrievedAt": "2026-03-21T09:59:00Z", + "selectedMetrics": { + "fractionUnboundPlasma": 0.06 + } + }, + "hazardAdmeIviveSlice": { + "recordCount": 1, + "records": [ + { + "intrinsicClearance": 12.4 + } + ], + "sourceTool": "get_hazard_adme_ivive", + "retrievedAt": "2026-03-21T09:59:20Z", + "selectedMetrics": { + "intrinsicClearance": 12.4 + } + }, + "exposureHints": [ + { + "hintType": "population_exposure", + "value": 0.02, + "unit": "mg/kg-day", + "source": "SEEM" + } + ], + "modelCardRefs": [ + { + "modelName": "CompTox HTTK 3-Compartment Model", + "modelVersion": "1.0.0", + "endpoint": "screening-level internal concentration", + "cardUri": "https://github.com/ToxMCP/comptox-mcp/blob/main/schemas/comptox_model_card.schema.json" + } + ], + "provenance": { + "sourceMcp": "epacomp-tox-mcp", + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "build_pbpk_context_bundle", + "traceId": "ctx-example-pack-pbpk-001", + "sources": [ + { + "name": "CompTox Exposure and Hazard APIs", + "toolName": "get_exposure_httk", + "url": "https://comptox.epa.gov/dashboard", + "retrievedAt": "2026-03-21T09:59:00Z", + "citation": "EPA CompTox HTTK and ADME endpoints" + } + ] + }, + "handoffTarget": "pbpk-mcp" + }, + "metadata": { + "packId": "comptox-pack-bpa-001", + "sourceMcp": "epacomp-tox-mcp", + "createdAt": "2026-03-21T10:00:00Z", + "suiteRole": "evidence-federation", + "downstreamConsumers": [ + "aop-mcp", + "pbpk-mcp" + ], + "modelCardRefs": [ + { + "modelName": "CompTox HTTK 3-Compartment Model", + "modelVersion": "1.0.0", + "endpoint": "screening-level internal concentration", + "cardUri": "https://github.com/ToxMCP/comptox-mcp/blob/main/schemas/comptox_model_card.schema.json" + } + ] + }, + "audit": { + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "assemble_comptox_evidence_pack", + "requestId": "comptox-pack-request-001", + "sourceTools": [ + "search_chemical", + "search_hazard", + "search_cpdat", + "get_bioactivity_summary_by_dtxsid", + "get_bioactivity_aop", + "get_exposure_httk" + ], + "notes": [ + "Illustrative pack showing the intended portable handoff shape." + ] + }, + "semanticCoverage": { + "identity": "detailed", + "hazard": "summary", + "exposure": "summary", + "bioactivity": "summary", + "aopLinkage": "linked", + "pbpkContext": "summary" + } +} diff --git a/src/epacomp_tox/data/schemas/examples/exposureEvidenceSummary.example.json b/src/epacomp_tox/data/schemas/examples/exposureEvidenceSummary.example.json new file mode 100644 index 0000000..57d66e5 --- /dev/null +++ b/src/epacomp_tox/data/schemas/examples/exposureEvidenceSummary.example.json @@ -0,0 +1,82 @@ +{ + "chemicalRef": { + "dtxsid": "DTXSID7020182", + "preferredName": "Bisphenol A", + "casrn": "80-05-7" + }, + "cpdat": { + "recordCount": 1, + "records": [ + { + "productUseCategory": "Food contact material", + "presence": "reported" + } + ], + "sourceTool": "search_cpdat", + "retrievedAt": "2026-03-21T09:58:30Z" + }, + "seem": { + "recordCount": 1, + "records": [ + { + "population": "adult", + "medianExposure": 0.02, + "unit": "mg/kg-day" + } + ], + "sourceTool": "get_seem_general", + "retrievedAt": "2026-03-21T09:58:45Z" + }, + "httk": { + "recordCount": 1, + "records": [ + { + "model": "httk-3compartment", + "clint": 12.4, + "clintUnit": "uL/min/10^6 hepatocytes" + } + ], + "sourceTool": "search_httk", + "retrievedAt": "2026-03-21T09:59:00Z" + }, + "mmdb": { + "recordCount": 1, + "records": [ + { + "medium": "drinking_water", + "detectionFrequency": 0.18 + } + ], + "sourceTool": "get_exposure_mmdb_aggregate_by_dtxsid", + "retrievedAt": "2026-03-21T09:59:15Z" + }, + "qsurs": { + "recordCount": 1, + "records": [ + { + "useDescriptor": "plasticizer", + "probability": 0.62 + } + ], + "sourceTool": "search_qsurs", + "retrievedAt": "2026-03-21T09:59:30Z" + }, + "provenance": { + "sourceMcp": "epacomp-tox-mcp", + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "assemble_comptox_evidence_pack", + "traceId": "ctx-example-exposure-001", + "sources": [ + { + "name": "CompTox Exposure APIs", + "toolName": "search_cpdat", + "url": "https://comptox.epa.gov/dashboard", + "retrievedAt": "2026-03-21T09:58:30Z", + "citation": "EPA CompTox exposure endpoints" + } + ], + "notes": [ + "Illustrative exposure summary object." + ] + } +} diff --git a/src/epacomp_tox/data/schemas/examples/hazardEvidenceSummary.example.json b/src/epacomp_tox/data/schemas/examples/hazardEvidenceSummary.example.json new file mode 100644 index 0000000..84799d1 --- /dev/null +++ b/src/epacomp_tox/data/schemas/examples/hazardEvidenceSummary.example.json @@ -0,0 +1,91 @@ +{ + "chemicalRef": { + "dtxsid": "DTXSID7020182", + "preferredName": "Bisphenol A", + "casrn": "80-05-7" + }, + "datasets": [ + { + "dataset": "toxval", + "summaryLevel": "summary", + "recordCount": 2, + "records": [ + { + "effect": "NOEL", + "value": 40, + "unit": "mg/kg-day", + "species": "Rat" + }, + { + "effect": "LOAEL", + "value": 120, + "unit": "mg/kg-day", + "species": "Rat" + } + ], + "sourceTool": "search_hazard", + "retrievedAt": "2026-03-21T09:58:00Z" + }, + { + "dataset": "cancer", + "summaryLevel": "summary", + "recordCount": 1, + "records": [ + { + "agency": "IARC", + "classification": "Not classifiable as to carcinogenicity to humans" + } + ], + "sourceTool": "search_hazard", + "retrievedAt": "2026-03-21T09:58:00Z" + } + ], + "keyFindings": [ + { + "statement": "ToxValDB includes repeated-dose oral values for Bisphenol A.", + "sourceDataset": "toxval", + "endpoint": "NOEL", + "value": 40, + "unit": "mg/kg-day", + "confidence": 0.77 + }, + { + "statement": "Cancer summary records are sparse relative to general hazard coverage.", + "sourceDataset": "cancer", + "context": "Use as a supporting line of evidence rather than a sole hazard basis." + } + ], + "references": [ + { + "citation": "EPA CompTox Chemicals Dashboard hazard datasets for Bisphenol A.", + "url": "https://comptox.epa.gov/dashboard" + } + ], + "provenance": { + "sourceMcp": "epacomp-tox-mcp", + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "assemble_comptox_evidence_pack", + "traceId": "ctx-example-hazard-001", + "sources": [ + { + "name": "CompTox Hazard APIs", + "toolName": "search_hazard", + "url": "https://comptox.epa.gov/dashboard", + "retrievedAt": "2026-03-21T09:58:00Z", + "citation": "EPA CompTox hazard endpoints" + } + ], + "notes": [ + "Illustrative hazard summary object." + ] + }, + "requestMetadata": { + "sourceTools": [ + "search_hazard", + "get_hazard_toxval" + ], + "requestedAt": "2026-03-21T09:58:00Z", + "summaryOnly": true, + "requestId": "hazard-example-request-001" + } +} diff --git a/src/epacomp_tox/data/schemas/examples/pbpkContextBundle.example.json b/src/epacomp_tox/data/schemas/examples/pbpkContextBundle.example.json new file mode 100644 index 0000000..c71dcd4 --- /dev/null +++ b/src/epacomp_tox/data/schemas/examples/pbpkContextBundle.example.json @@ -0,0 +1,214 @@ +{ + "chemicalIdentityRef": { + "dtxsid": "DTXSID7020182", + "preferredName": "Bisphenol A", + "casrn": "80-05-7", + "inchikey": "IHQYDGMOQILFNV-UHFFFAOYSA-N", + "smiles": "CC(C)(C1=CC=C(C=C1)O)C2=CC=C(C=C2)O", + "synonyms": [ + "BPA", + "4,4'-Isopropylidenediphenol" + ], + "provenance": { + "sourceMcp": "epacomp-tox-mcp", + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "build_pbpk_context_bundle", + "traceId": "ctx-example-pbpk-chemical-001", + "sources": [ + { + "name": "CompTox Chemicals Dashboard API", + "toolName": "search_chemical", + "url": "https://comptox.epa.gov/dashboard", + "retrievedAt": "2026-03-21T09:58:00Z", + "citation": "EPA CompTox Chemicals Dashboard" + } + ], + "notes": [ + "Embedded identity record for PBPK handoff." + ] + } + }, + "httkSlice": { + "recordCount": 1, + "records": [ + { + "model": "httk-3compartment", + "species": "human", + "fractionUnboundPlasma": 0.06, + "intrinsicClearance": 12.4 + } + ], + "sourceTool": "get_exposure_httk", + "retrievedAt": "2026-03-21T09:59:00Z", + "selectedMetrics": { + "fractionUnboundPlasma": 0.06, + "intrinsicClearance": 12.4, + "clearanceUnit": "uL/min/10^6 hepatocytes" + } + }, + "hazardAdmeIviveSlice": { + "recordCount": 1, + "records": [ + { + "assay": "hepatocyte_clearance", + "value": 12.4, + "unit": "uL/min/10^6 hepatocytes" + } + ], + "sourceTool": "get_hazard_adme_ivive", + "retrievedAt": "2026-03-21T09:59:20Z", + "selectedMetrics": { + "primaryAdmeSignal": "hepatocyte_clearance" + } + }, + "exposureHints": [ + { + "hintType": "population_exposure", + "value": 0.02, + "unit": "mg/kg-day", + "source": "SEEM", + "context": "Adult central tendency estimate." + }, + { + "hintType": "product_presence", + "value": "food_contact_material", + "source": "CPDat" + } + ], + "modelCardRefs": [ + { + "schemaVersion": "1.0", + "modelDetails": { + "name": "CompTox HTTK 3-Compartment Model", + "version": "1.0.0", + "modelType": "PBPK", + "description": "Illustrative model card for an HTTK-derived PBPK handoff example.", + "developers": [ + { + "name": "EPA CompTox Team" + } + ], + "organizations": [ + "U.S. EPA" + ], + "releaseDate": "2025-01-01" + }, + "intendedUse": { + "summary": "Provide a transportable model description for downstream PBPK qualification review.", + "inScope": [ + "Screening-level internal exposure context" + ], + "outOfScope": [ + "Final PBPK qualification decisions" + ], + "limitations": [ + "Example model card embedded for schema validation only." + ], + "warnings": [ + "Not a substitute for pbpk-mcp qualification outputs." + ], + "regulatoryPrograms": [ + "NGRA workflow support" + ] + }, + "oecdValidationPrinciples": { + "definedEndpoint": { + "description": "Internal concentration time-course surrogates", + "unit": "mg/L" + }, + "unambiguousAlgorithm": { + "summary": "Three-compartment HTTK parameterization." + }, + "definedApplicabilityDomain": { + "summary": "Applies when chemical-specific HTTK parameters are available." + }, + "goodnessOfFitMetrics": {}, + "mechanisticInterpretation": { + "summary": "Mechanistic interpretation derives from toxicokinetic parameterization." + } + }, + "trainingData": { + "dataset": { + "name": "HTTK parameter tables", + "source": "CompTox" + }, + "records": 1, + "chemicalCount": 1 + }, + "evaluationData": { + "datasets": [ + { + "name": "Illustrative evaluation set", + "source": "CompTox" + } + ], + "validationApproach": "Reference example", + "metrics": [ + { + "name": "coverage", + "value": 1.0 + } + ] + }, + "applicabilityDomain": { + "summary": "Requires a resolvable chemical identity and available HTTK parameter slice.", + "criteria": [ + { + "type": "coverage", + "description": "HTTK parameters must be available." + } + ], + "enforcement": { + "mcpTools": [ + "build_pbpk_context_bundle" + ] + } + }, + "ethicalConsiderations": { + "risks": [ + "Users may overinterpret screening-level context as a final PBPK decision." + ] + }, + "provenance": { + "sourceRepositories": [ + "https://github.com/ToxMCP/comptox-mcp" + ], + "build": { + "id": "example-build", + "timestamp": "2026-03-21T10:00:00Z" + }, + "checksum": { + "algorithm": "SHA256", + "value": "examplechecksum" + }, + "reviewStatus": { + "approvedBy": [ + { + "name": "QA Reviewer" + } + ], + "approvalDate": "2026-03-21" + } + } + } + ], + "provenance": { + "sourceMcp": "epacomp-tox-mcp", + "generatedAt": "2026-03-21T10:00:00Z", + "generatedBy": "build_pbpk_context_bundle", + "traceId": "ctx-example-pbpk-001", + "sources": [ + { + "name": "CompTox Exposure and Hazard APIs", + "toolName": "get_exposure_httk", + "url": "https://comptox.epa.gov/dashboard", + "retrievedAt": "2026-03-21T09:59:00Z", + "citation": "EPA CompTox HTTK and ADME endpoints" + } + ], + "notes": [ + "This bundle provides context only; it does not emit internal exposure estimates." + ] + }, + "handoffTarget": "pbpk-mcp" +} diff --git a/src/epacomp_tox/data/schemas/exposureEvidenceSummary.v1.json b/src/epacomp_tox/data/schemas/exposureEvidenceSummary.v1.json new file mode 100644 index 0000000..e08bcf3 --- /dev/null +++ b/src/epacomp_tox/data/schemas/exposureEvidenceSummary.v1.json @@ -0,0 +1,191 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://epa.gov/comptox/schemas/exposureEvidenceSummary.v1.json", + "title": "CompTox Exposure Evidence Summary v1", + "description": "Portable summary of exposure and HTTK evidence retrieved from CompTox.", + "type": "object", + "additionalProperties": false, + "required": [ + "chemicalRef", + "cpdat", + "seem", + "httk", + "mmdb", + "qsurs", + "provenance" + ], + "properties": { + "chemicalRef": { + "$ref": "#/$defs/chemicalRef" + }, + "cpdat": { + "oneOf": [ + { + "$ref": "#/$defs/evidenceSlice" + }, + { + "type": "null" + } + ] + }, + "seem": { + "oneOf": [ + { + "$ref": "#/$defs/evidenceSlice" + }, + { + "type": "null" + } + ] + }, + "httk": { + "oneOf": [ + { + "$ref": "#/$defs/evidenceSlice" + }, + { + "type": "null" + } + ] + }, + "mmdb": { + "oneOf": [ + { + "$ref": "#/$defs/evidenceSlice" + }, + { + "type": "null" + } + ] + }, + "qsurs": { + "oneOf": [ + { + "$ref": "#/$defs/evidenceSlice" + }, + { + "type": "null" + } + ] + }, + "provenance": { + "$ref": "#/$defs/provenance" + } + }, + "$defs": { + "chemicalRef": { + "type": "object", + "additionalProperties": false, + "required": [ + "dtxsid", + "preferredName" + ], + "properties": { + "dtxsid": { + "type": "string", + "pattern": "^DTXSID[0-9A-Z]+$" + }, + "preferredName": { + "type": "string" + }, + "casrn": { + "type": [ + "string", + "null" + ] + } + } + }, + "evidenceSlice": { + "type": "object", + "additionalProperties": false, + "required": [ + "recordCount", + "records" + ], + "properties": { + "recordCount": { + "type": "integer", + "minimum": 0 + }, + "records": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + "sourceTool": { + "type": "string" + }, + "retrievedAt": { + "type": "string", + "format": "date-time" + } + } + }, + "sourceRecord": { + "type": "object", + "additionalProperties": false, + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string" + }, + "toolName": { + "type": "string" + }, + "url": { + "type": "string", + "format": "uri" + }, + "retrievedAt": { + "type": "string", + "format": "date-time" + }, + "citation": { + "type": "string" + } + } + }, + "provenance": { + "type": "object", + "additionalProperties": false, + "required": [ + "sourceMcp", + "generatedAt", + "sources" + ], + "properties": { + "sourceMcp": { + "type": "string" + }, + "generatedAt": { + "type": "string", + "format": "date-time" + }, + "generatedBy": { + "type": "string" + }, + "traceId": { + "type": "string" + }, + "sources": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/$defs/sourceRecord" + } + }, + "notes": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } +} diff --git a/src/epacomp_tox/data/schemas/hazardEvidenceSummary.v1.json b/src/epacomp_tox/data/schemas/hazardEvidenceSummary.v1.json new file mode 100644 index 0000000..8998fff --- /dev/null +++ b/src/epacomp_tox/data/schemas/hazardEvidenceSummary.v1.json @@ -0,0 +1,248 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://epa.gov/comptox/schemas/hazardEvidenceSummary.v1.json", + "title": "CompTox Hazard Evidence Summary v1", + "description": "Portable summary of hazard evidence retrieved from CompTox hazard datasets.", + "type": "object", + "additionalProperties": false, + "required": [ + "chemicalRef", + "datasets", + "keyFindings", + "references", + "provenance", + "requestMetadata" + ], + "properties": { + "chemicalRef": { + "$ref": "#/$defs/chemicalRef" + }, + "datasets": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/$defs/datasetSlice" + } + }, + "keyFindings": { + "type": "array", + "items": { + "$ref": "#/$defs/keyFinding" + } + }, + "references": { + "type": "array", + "items": { + "$ref": "https://epa.gov/comptox/schemas/comptox-model-card.schema.json#/$defs/reference" + } + }, + "provenance": { + "$ref": "#/$defs/provenance" + }, + "requestMetadata": { + "$ref": "#/$defs/requestMetadata" + } + }, + "$defs": { + "chemicalRef": { + "type": "object", + "additionalProperties": false, + "required": [ + "dtxsid", + "preferredName" + ], + "properties": { + "dtxsid": { + "type": "string", + "pattern": "^DTXSID[0-9A-Z]+$" + }, + "preferredName": { + "type": "string" + }, + "casrn": { + "type": [ + "string", + "null" + ] + } + } + }, + "datasetSlice": { + "type": "object", + "additionalProperties": false, + "required": [ + "dataset", + "summaryLevel", + "recordCount" + ], + "properties": { + "dataset": { + "type": "string", + "enum": [ + "toxval", + "toxref", + "cancer", + "genetox", + "adme_ivive", + "iris", + "pprtv", + "hawc" + ] + }, + "summaryLevel": { + "type": "string", + "enum": [ + "summary", + "detail", + "mixed" + ] + }, + "recordCount": { + "type": "integer", + "minimum": 0 + }, + "records": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + "sourceTool": { + "type": "string" + }, + "retrievedAt": { + "type": "string", + "format": "date-time" + } + } + }, + "keyFinding": { + "type": "object", + "additionalProperties": false, + "required": [ + "statement", + "sourceDataset" + ], + "properties": { + "statement": { + "type": "string" + }, + "sourceDataset": { + "type": "string" + }, + "endpoint": { + "type": "string" + }, + "value": { + "type": [ + "number", + "string", + "null" + ] + }, + "unit": { + "type": "string" + }, + "confidence": { + "type": "number", + "minimum": 0, + "maximum": 1 + }, + "context": { + "type": "string" + } + } + }, + "sourceRecord": { + "type": "object", + "additionalProperties": false, + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string" + }, + "toolName": { + "type": "string" + }, + "url": { + "type": "string", + "format": "uri" + }, + "retrievedAt": { + "type": "string", + "format": "date-time" + }, + "citation": { + "type": "string" + } + } + }, + "provenance": { + "type": "object", + "additionalProperties": false, + "required": [ + "sourceMcp", + "generatedAt", + "sources" + ], + "properties": { + "sourceMcp": { + "type": "string" + }, + "generatedAt": { + "type": "string", + "format": "date-time" + }, + "generatedBy": { + "type": "string" + }, + "traceId": { + "type": "string" + }, + "sources": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/$defs/sourceRecord" + } + }, + "notes": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "requestMetadata": { + "type": "object", + "additionalProperties": false, + "required": [ + "sourceTools", + "requestedAt" + ], + "properties": { + "sourceTools": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + }, + "requestedAt": { + "type": "string", + "format": "date-time" + }, + "summaryOnly": { + "type": "boolean" + }, + "requestId": { + "type": "string" + } + } + } + } +} diff --git a/src/epacomp_tox/data/schemas/pbpkContextBundle.v1.json b/src/epacomp_tox/data/schemas/pbpkContextBundle.v1.json new file mode 100644 index 0000000..520e691 --- /dev/null +++ b/src/epacomp_tox/data/schemas/pbpkContextBundle.v1.json @@ -0,0 +1,245 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://epa.gov/comptox/schemas/pbpkContextBundle.v1.json", + "title": "CompTox PBPK Context Bundle v1", + "description": "Portable CompTox-side context package for downstream PBPK workflows.", + "type": "object", + "additionalProperties": false, + "required": [ + "chemicalIdentityRef", + "httkSlice", + "hazardAdmeIviveSlice", + "exposureHints", + "modelCardRefs", + "provenance", + "handoffTarget" + ], + "properties": { + "chemicalIdentityRef": { + "$ref": "https://epa.gov/comptox/schemas/chemicalIdentityRecord.v1.json" + }, + "httkSlice": { + "oneOf": [ + { + "$ref": "#/$defs/evidenceSlice" + }, + { + "type": "null" + } + ] + }, + "hazardAdmeIviveSlice": { + "oneOf": [ + { + "$ref": "#/$defs/evidenceSlice" + }, + { + "type": "null" + } + ] + }, + "exposureHints": { + "type": "array", + "items": { + "$ref": "#/$defs/exposureHint" + } + }, + "modelCardRefs": { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "https://epa.gov/comptox/schemas/comptox-model-card.schema.json" + }, + { + "$ref": "#/$defs/modelCardReference" + } + ] + } + }, + "identityResolution": { + "type": ["object", "null"], + "additionalProperties": true + }, + "knownDataGaps": { + "type": "array", + "items": {"type": "string"} + }, + "limitations": { + "type": "array", + "items": {"type": "string"} + }, + "generatedFromTools": { + "type": "array", + "items": {"type": "string"} + }, + "provenanceSummary": { + "type": "object", + "additionalProperties": true + }, + "provenance": { + "$ref": "#/$defs/provenance" + }, + "handoffTarget": { + "type": "string", + "const": "pbpk-mcp" + } + }, + "$defs": { + "evidenceSlice": { + "type": "object", + "additionalProperties": false, + "required": [ + "recordCount", + "records" + ], + "properties": { + "recordCount": { + "type": "integer", + "minimum": 0 + }, + "records": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": true + } + }, + "sourceTool": { + "type": "string" + }, + "retrievedAt": { + "type": "string", + "format": "date-time" + }, + "selectedMetrics": { + "type": "object", + "additionalProperties": true + } + } + }, + "exposureHint": { + "type": "object", + "additionalProperties": false, + "required": [ + "hintType", + "value", + "source" + ], + "properties": { + "hintType": { + "type": "string" + }, + "value": { + "type": [ + "number", + "string" + ] + }, + "unit": { + "type": "string" + }, + "source": { + "type": "string" + }, + "context": { + "type": "string" + } + } + }, + "modelCardReference": { + "type": "object", + "additionalProperties": false, + "required": [ + "modelName", + "modelVersion" + ], + "properties": { + "modelName": { + "type": "string" + }, + "modelVersion": { + "type": "string" + }, + "endpoint": { + "type": "string" + }, + "cardUri": { + "type": "string", + "format": "uri" + }, + "limitations": { + "type": "array", + "items": {"type": "string"} + }, + "warnings": { + "type": "array", + "items": {"type": "string"} + } + } + }, + "sourceRecord": { + "type": "object", + "additionalProperties": false, + "required": [ + "name" + ], + "properties": { + "name": { + "type": "string" + }, + "toolName": { + "type": "string" + }, + "url": { + "type": "string", + "format": "uri" + }, + "retrievedAt": { + "type": "string", + "format": "date-time" + }, + "citation": { + "type": "string" + } + } + }, + "provenance": { + "type": "object", + "additionalProperties": false, + "required": [ + "sourceMcp", + "generatedAt", + "sources" + ], + "properties": { + "sourceMcp": { + "type": "string" + }, + "generatedAt": { + "type": "string", + "format": "date-time" + }, + "generatedBy": { + "type": "string" + }, + "traceId": { + "type": "string" + }, + "sources": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/$defs/sourceRecord" + } + }, + "notes": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } +} diff --git a/src/epacomp_tox/metadata/applicability.py b/src/epacomp_tox/metadata/applicability.py index 39034bb..9dc1706 100644 --- a/src/epacomp_tox/metadata/applicability.py +++ b/src/epacomp_tox/metadata/applicability.py @@ -4,15 +4,22 @@ from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Tuple -DEFAULT_AD_DIR = Path(Path.cwd(), "metadata", "applicability_domains") +from epacomp_tox.assets import data_file + +DEFAULT_AD_DIR = data_file("metadata", "applicability_domains") class ApplicabilityDomainStore: """File-backed access to applicability domain reference data.""" def __init__(self, directory: Optional[Path] = None): - self.directory = Path(directory or DEFAULT_AD_DIR) - self.directory.mkdir(parents=True, exist_ok=True) + if directory is None: + self.directory = DEFAULT_AD_DIR + self._filesystem_backed = False + else: + self.directory = Path(directory) + self.directory.mkdir(parents=True, exist_ok=True) + self._filesystem_backed = True def list_definitions( self, @@ -37,7 +44,19 @@ def get_definition(self, model_name: str) -> Optional[Dict[str, Any]]: return None def _iter_defs(self) -> Iterable[Dict[str, Any]]: - for path in sorted(self.directory.glob("*.json")): + paths = ( + sorted(self.directory.glob("*.json")) + if self._filesystem_backed + else sorted( + ( + entry + for entry in self.directory.iterdir() + if entry.is_file() and entry.name.endswith(".json") + ), + key=lambda entry: entry.name, + ) + ) + for path in paths: try: payload = json.loads(path.read_text(encoding="utf-8")) except ( @@ -45,5 +64,11 @@ def _iter_defs(self) -> Iterable[Dict[str, Any]]: json.JSONDecodeError, ): # pragma: no cover - logged upstream continue - payload["path"] = str(path) + if self._filesystem_backed: + payload["path"] = str(path) + else: + payload["path"] = ( + "package://epacomp_tox.data/metadata/" + f"applicability_domains/{path.name}" + ) yield payload diff --git a/src/epacomp_tox/metadata/model_cards.py b/src/epacomp_tox/metadata/model_cards.py index 73162c8..75122c9 100644 --- a/src/epacomp_tox/metadata/model_cards.py +++ b/src/epacomp_tox/metadata/model_cards.py @@ -7,7 +7,10 @@ from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Tuple -DEFAULT_MODEL_CARD_DIR = Path(Path.cwd(), "metadata", "model_cards") +from epacomp_tox.assets import data_file + +DEFAULT_MODEL_CARD_DIR = data_file("metadata", "model_cards") +PACKAGED_LAST_MODIFIED = "1970-01-01T00:00:00+00:00" @dataclass @@ -21,8 +24,13 @@ class ModelCardStore: """Simple file-backed store for CompTox model cards.""" def __init__(self, directory: Optional[Path] = None): - self.directory = Path(directory or DEFAULT_MODEL_CARD_DIR) - self.directory.mkdir(parents=True, exist_ok=True) + if directory is None: + self.directory = DEFAULT_MODEL_CARD_DIR + self._filesystem_backed = False + else: + self.directory = Path(directory) + self.directory.mkdir(parents=True, exist_ok=True) + self._filesystem_backed = True def list_cards( self, @@ -42,7 +50,19 @@ def list_cards( return page, next_cursor def _iter_cards(self) -> Iterable[Dict[str, Any]]: - for path in sorted(self.directory.glob("*.json")): + paths = ( + sorted(self.directory.glob("*.json")) + if self._filesystem_backed + else sorted( + ( + entry + for entry in self.directory.iterdir() + if entry.is_file() and entry.name.endswith(".json") + ), + key=lambda entry: entry.name, + ) + ) + for path in paths: try: raw = path.read_text(encoding="utf-8") payload = json.loads(raw) @@ -52,12 +72,20 @@ def _iter_cards(self) -> Iterable[Dict[str, Any]]: ): # pragma: no cover - logged upstream continue checksum = hashlib.sha256(raw.encode("utf-8")).hexdigest() - stat = path.stat() + if self._filesystem_backed: + stat = path.stat() + last_modified = datetime.fromtimestamp(stat.st_mtime).isoformat() + path_value = str(path) + else: + last_modified = PACKAGED_LAST_MODIFIED + path_value = ( + f"package://epacomp_tox.data/metadata/model_cards/{path.name}" + ) yield { "card": payload, "checksum": checksum, - "path": str(path), - "lastModified": datetime.fromtimestamp(stat.st_mtime).isoformat(), + "path": path_value, + "lastModified": last_modified, } @staticmethod diff --git a/src/epacomp_tox/orchestrator/audit.py b/src/epacomp_tox/orchestrator/audit.py index 113026d..aeb8709 100644 --- a/src/epacomp_tox/orchestrator/audit.py +++ b/src/epacomp_tox/orchestrator/audit.py @@ -2,10 +2,14 @@ import hashlib import json +import re +import time from datetime import datetime, timezone -from pathlib import Path +from pathlib import Path, PurePosixPath from typing import Dict, Iterable, List, Optional, Tuple, Union +SAFE_PATH_COMPONENT = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_.-]{0,127}$") + class AuditBundleStore: """Durable storage for orchestrator audit bundles and attachments.""" @@ -13,7 +17,7 @@ class AuditBundleStore: def __init__( self, base_dir: Union[str, Path], *, retention_days: Optional[int] = None ) -> None: - self.base_dir = Path(base_dir) + self.base_dir = Path(base_dir).resolve() self.base_dir.mkdir(parents=True, exist_ok=True) self.retention_days = retention_days @@ -26,8 +30,9 @@ def save( run_id = bundle.get("workflowRunId") if not run_id: raise ValueError("Bundle must include 'workflowRunId'.") + run_id = self._safe_component(str(run_id), "workflowRunId") - run_dir = self.base_dir / run_id + run_dir = self._resolve_under_base(self.base_dir / run_id) run_dir.mkdir(parents=True, exist_ok=True) created_at = datetime.now(timezone.utc).isoformat() @@ -35,7 +40,7 @@ def save( bundle, ensure_ascii=False, indent=2, sort_keys=True ).encode("utf-8") bundle_path = run_dir / "bundle.json" - bundle_path.write_bytes(payload) + self._atomic_write(bundle_path, payload) bundle_checksum = hashlib.sha256(payload).hexdigest() attachments_meta: List[Dict[str, any]] = [] @@ -43,13 +48,13 @@ def save( attachments_dir = run_dir / "attachments" attachments_dir.mkdir(parents=True, exist_ok=True) for name, content in attachments.items(): - target = attachments_dir / name + safe_name, target = self._safe_attachment_path(attachments_dir, name) target.parent.mkdir(parents=True, exist_ok=True) data = content.encode("utf-8") if isinstance(content, str) else content - target.write_bytes(data) + self._atomic_write(target, data) attachments_meta.append( { - "name": name, + "name": safe_name, "path": str(target.relative_to(self.base_dir)), "size": len(data), "checksum": hashlib.sha256(data).hexdigest(), @@ -71,9 +76,9 @@ def save( } metadata_path = run_dir / "metadata.json" - metadata_path.write_text( - json.dumps(metadata, indent=2, sort_keys=True), - encoding="utf-8", + self._atomic_write( + metadata_path, + json.dumps(metadata, indent=2, sort_keys=True).encode("utf-8"), ) # Update chain manifest with latest hash @@ -100,9 +105,9 @@ def _update_chain_manifest( "updatedAt": created_at, } try: - chain_manifest_path.write_text( - json.dumps(manifest, indent=2, sort_keys=True), - encoding="utf-8", + self._atomic_write( + chain_manifest_path, + json.dumps(manifest, indent=2, sort_keys=True).encode("utf-8"), ) except OSError: # pragma: no cover - defensive pass @@ -122,7 +127,9 @@ def verify_chain(self) -> Tuple[bool, List[str]]: errors.append(f"Run {run_id}: previous hash mismatch") # Recompute bundle hash from file - bundle_path = self.base_dir / meta.get("bundlePath", "") + bundle_path = self._resolve_under_base( + self.base_dir / meta.get("bundlePath", "") + ) if bundle_path.exists(): computed = hashlib.sha256(bundle_path.read_bytes()).hexdigest() if computed != meta.get("bundleChecksum"): @@ -135,13 +142,19 @@ def verify_chain(self) -> Tuple[bool, List[str]]: return (not errors, errors) def load_bundle(self, run_id: str) -> Dict[str, any]: - bundle_path = self.base_dir / run_id / "bundle.json" + safe_run_id = self._safe_component(str(run_id), "workflowRunId") + bundle_path = self._resolve_under_base( + self.base_dir / safe_run_id / "bundle.json" + ) if not bundle_path.exists(): raise FileNotFoundError(f"No bundle found for run {run_id}") return json.loads(bundle_path.read_text(encoding="utf-8")) def load_metadata(self, run_id: str) -> Dict[str, any]: - metadata_path = self.base_dir / run_id / "metadata.json" + safe_run_id = self._safe_component(str(run_id), "workflowRunId") + metadata_path = self._resolve_under_base( + self.base_dir / safe_run_id / "metadata.json" + ) if not metadata_path.exists(): raise FileNotFoundError(f"No metadata found for run {run_id}") return json.loads(metadata_path.read_text(encoding="utf-8")) @@ -159,3 +172,38 @@ def list_runs(self) -> List[Dict[str, any]]: except json.JSONDecodeError: continue return runs + + @staticmethod + def _safe_component(value: str, label: str) -> str: + if not SAFE_PATH_COMPONENT.match(value) or ".." in value: + raise ValueError(f"Unsafe {label}: {value!r}") + return value + + def _resolve_under_base(self, path: Path) -> Path: + resolved = path.resolve() + try: + resolved.relative_to(self.base_dir) + except ValueError as exc: + raise ValueError("Resolved audit path escapes store root.") from exc + return resolved + + def _safe_attachment_path( + self, attachments_dir: Path, name: Union[str, Path] + ) -> Tuple[str, Path]: + raw_name = str(name).replace("\\", "/") + relative = PurePosixPath(raw_name) + if relative.is_absolute() or not relative.parts: + raise ValueError(f"Unsafe attachment name: {raw_name!r}") + safe_parts = [ + self._safe_component(part, "attachment path component") + for part in relative.parts + ] + safe_name = "/".join(safe_parts) + target = self._resolve_under_base(attachments_dir.joinpath(*safe_parts)) + return safe_name, target + + @staticmethod + def _atomic_write(path: Path, payload: bytes) -> None: + tmp_path = path.with_name(f".{path.name}.{time.time_ns()}.tmp") + tmp_path.write_bytes(payload) + tmp_path.replace(path) diff --git a/src/epacomp_tox/resources/manifest.py b/src/epacomp_tox/resources/manifest.py index f481fef..010ce4e 100644 --- a/src/epacomp_tox/resources/manifest.py +++ b/src/epacomp_tox/resources/manifest.py @@ -3,6 +3,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional +from epacomp_tox.assets import data_file, iter_data_files from epacomp_tox.contracts import schema_ref from .base import BaseResource @@ -23,7 +24,7 @@ def __init__( ) -> None: super().__init__(api_key) self._server_getter = server_getter - self._repo_root = repo_root or Path(__file__).resolve().parents[3] + self._repo_root = Path(repo_root) if repo_root is not None else None @property def name(self) -> str: @@ -167,8 +168,12 @@ def _tool_entries(self, server: "MCPServer") -> List[Dict[str, Any]]: def _portable_schema_entries(self) -> List[Dict[str, Any]]: entries: List[Dict[str, Any]] = [] - schemas_dir = self._repo_root / "schemas" - for path in sorted(schemas_dir.glob("*.json")): + paths = ( + sorted((self._repo_root / "schemas").glob("*.json")) + if self._repo_root is not None + else list(iter_data_files("schemas", suffix=".json")) + ) + for path in paths: if path.name.startswith("."): continue data = self._load_json(path) @@ -187,13 +192,26 @@ def _portable_schema_entries(self) -> List[Dict[str, Any]]: def _response_schema_entries(self) -> List[Dict[str, Any]]: entries: List[Dict[str, Any]] = [] - schemas_root = self._repo_root / "docs" / "contracts" / "schemas" - for path in sorted(schemas_root.glob("*/*.json")): + paths = ( + sorted( + (self._repo_root / "docs" / "contracts" / "schemas").glob("*/*.json") + ) + if self._repo_root is not None + else list( + iter_data_files("contracts", "schemas", suffix=".json", recursive=True) + ) + ) + for path in paths: + relative_path = ( + str(path.relative_to(self._repo_root)) + if self._repo_root is not None + else f"docs/contracts/schemas/{path.parent.name}/{path.name}" + ) entries.append( { "namespace": path.parent.name, "file": path.name, - "path": str(path.relative_to(self._repo_root)), + "path": relative_path, } ) return entries @@ -216,9 +234,16 @@ def _contract_reference( def _portable_example_for(self, schema_file: str) -> Optional[str]: stem = schema_file.replace(".v1.json", "") - candidate = self._repo_root / "schemas" / "examples" / f"{stem}.example.json" - if candidate.exists(): - return str(candidate.relative_to(self._repo_root)) + if self._repo_root is not None: + candidate = ( + self._repo_root / "schemas" / "examples" / f"{stem}.example.json" + ) + if candidate.exists(): + return str(candidate.relative_to(self._repo_root)) + return None + candidate = data_file("schemas", "examples", f"{stem}.example.json") + if candidate.is_file(): + return f"schemas/examples/{stem}.example.json" return None @staticmethod diff --git a/src/epacomp_tox/server.py b/src/epacomp_tox/server.py index a5c8a01..44c303f 100644 --- a/src/epacomp_tox/server.py +++ b/src/epacomp_tox/server.py @@ -7,6 +7,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Tuple +from jsonschema.exceptions import ValidationError as JsonSchemaValidationError from pydantic import ValidationError from ctxpy import CtxApiError, RateLimitInfo @@ -19,6 +20,14 @@ from epacomp_tox.validators import to_serializable +class ToolInputValidationError(ValueError): + """Raised when tool input fails advertised JSON Schema validation.""" + + +class ToolOutputValidationError(RuntimeError): + """Raised when structuredContent fails advertised JSON Schema validation.""" + + class MCPServer: """ Model Context Protocol (MCP) server for EPA CompTox data. @@ -149,8 +158,9 @@ def execute_tool(self, tool_name: str, parameters: Dict[str, Any]) -> Any: """ for resource in self.resources.values(): if resource.has_tool(tool_name): - result = resource.execute_tool(tool_name, parameters) registration = self.tool_registry.get_registration(tool_name) + self._validate_tool_input(registration, parameters or {}) + result = resource.execute_tool(tool_name, parameters) if registration.response_schema_ref: namespace, name = registration.response_schema_ref try: @@ -273,6 +283,7 @@ def call_tool( resource = registration.resource try: + self._validate_tool_input(registration, parameters or {}) validated_params = registration.parameters_model.model_validate( parameters or {} ) @@ -324,6 +335,10 @@ def call_tool( "data": existing_sc, "metadata": combined_metadata, } + if not result.get("isError") and "structuredContent" in result: + self._validate_structured_content( + registration, result["structuredContent"] + ) self._emit_audit_event( tool_name=tool_name, status="success", @@ -336,7 +351,7 @@ def call_tool( params=validated_params.model_dump(exclude_none=True), ) return result - except ValidationError as exc: + except (ValidationError, ToolInputValidationError) as exc: self._emit_audit_event( tool_name=tool_name, status="invalid_params", @@ -356,9 +371,8 @@ def call_tool( metadata = self._format_metadata(resource.get_last_metadata()) session_metadata = self._format_session_context(context) error_payload = { - "message": str(exc), + "message": "Upstream CTX request failed.", "status": exc.status, - "detail": exc.detail, "requestId": exc.request_id, "retryAfter": exc.retry_after, } @@ -387,7 +401,7 @@ def call_tool( "content": [ { "type": "text", - "text": f"Tool call failed: {exc}", + "text": "Tool call failed: upstream CTX request failed.", "annotations": {"audience": ["assistant"]}, } ], @@ -407,7 +421,22 @@ def call_tool( params=parameters, error=str(exc), ) - raise + return { + "content": [ + { + "type": "text", + "text": "Tool call failed: internal server error.", + "annotations": {"audience": ["assistant"]}, + } + ], + "structuredContent": self._drop_none_values( + { + "message": "Tool execution failed.", + "correlationId": correlation_id, + } + ), + "isError": True, + } def _emit_audit_event( self, @@ -471,6 +500,43 @@ def _invoke_resource( pass return execute_tool(tool_name, parameters) + @staticmethod + def _format_json_schema_error(error: JsonSchemaValidationError) -> str: + location = ".".join(str(item) for item in error.path) + if location: + return f"{location}: {error.message}" + return error.message + + def _validate_tool_input( + self, registration: Any, parameters: Dict[str, Any] + ) -> None: + errors = sorted( + registration.input_validator.iter_errors(parameters or {}), + key=lambda error: list(error.path), + ) + if errors: + message = "; ".join( + self._format_json_schema_error(error) for error in errors[:5] + ) + raise ToolInputValidationError(message) + + def _validate_structured_content( + self, registration: Any, structured_content: Dict[str, Any] + ) -> None: + if registration.output_validator is None: + return + errors = sorted( + registration.output_validator.iter_errors(structured_content), + key=lambda error: list(error.path), + ) + if errors: + message = "; ".join( + self._format_json_schema_error(error) for error in errors[:5] + ) + raise ToolOutputValidationError( + f"Tool '{registration.name}' structuredContent failed schema validation: {message}" + ) + def _find_resource(self, tool_name: str): for resource in self.resources.values(): if resource.has_tool(tool_name): @@ -543,6 +609,7 @@ def _normalise_tool_definition( "description": tool.get("description", ""), "inputSchema": input_schema, "annotations": { + **(tool.get("annotations") or {}), "resource": resource_name, }, } @@ -566,7 +633,7 @@ def register_session( *, client_capabilities: Dict[str, Any], client_info: Optional[Dict[str, Any]] = None, - authentication: Optional[Dict[str, Any]] = None, + auth: Optional[Dict[str, Any]] = None, negotiated_capabilities: Optional[Dict[str, Any]] = None, ) -> None: """Track active session metadata for observability and governance.""" @@ -575,7 +642,7 @@ def register_session( "clientCapabilities": client_capabilities, "negotiatedCapabilities": negotiated_capabilities or {}, "clientInfo": client_info or {}, - "authentication": authentication or {}, + "auth": auth or {}, "lastActivity": datetime.now(tz=timezone.utc).isoformat(), "status": "active", } @@ -690,11 +757,15 @@ def _format_session_context( client_caps = context.get("clientCapabilities") if client_caps: session_view["clientCapabilities"] = client_caps - authentication = context.get("authentication") - if authentication: - session_view["authentication"] = authentication + auth_context = context.get("auth") + if auth_context: + session_view["auth"] = auth_context return session_view or None + @staticmethod + def _drop_none_values(payload: Dict[str, Any]) -> Dict[str, Any]: + return {key: value for key, value in payload.items() if value is not None} + def get_transport_metrics(self) -> Dict[str, Any]: """Summarize negotiated capability flags for observability consumers.""" summary: Dict[str, Any] = { diff --git a/src/epacomp_tox/settings.py b/src/epacomp_tox/settings.py index e302fbc..3e3ce06 100644 --- a/src/epacomp_tox/settings.py +++ b/src/epacomp_tox/settings.py @@ -25,6 +25,30 @@ def is_development(self) -> bool: class SecuritySettings: bypass_auth: bool allowed_origins: List[str] + auth_issuer: Optional[str] + auth_audience: Optional[str] + auth_jwks_url: Optional[str] + auth_required_scopes: List[str] + resource_url: str + + @property + def auth_configured(self) -> bool: + return bool(self.auth_issuer and self.auth_audience and self.auth_jwks_url) + + @property + def auth_requested(self) -> bool: + return bool( + self.auth_issuer + or self.auth_audience + or self.auth_jwks_url + or self.auth_required_scopes + ) + + +@dataclass(frozen=True) +class RateLimitSettings: + requests_per_minute: int + burst: int @dataclass(frozen=True) @@ -45,6 +69,7 @@ class TransportSettings: @dataclass(frozen=True) class ObservabilitySettings: metrics_enabled: bool = True + metrics_bypass_auth: bool = False class _RawSettings(BaseSettings): @@ -61,6 +86,19 @@ class _RawSettings(BaseSettings): bypass_auth: bool = Field(default=False, alias="BYPASS_AUTH") cors_allow_origins: Optional[str] = Field(default=None, alias="CORS_ALLOW_ORIGINS") + mcp_auth_issuer: Optional[str] = Field(default=None, alias="MCP_AUTH_ISSUER") + mcp_auth_audience: Optional[str] = Field(default=None, alias="MCP_AUTH_AUDIENCE") + mcp_auth_jwks_url: Optional[str] = Field(default=None, alias="MCP_AUTH_JWKS_URL") + mcp_auth_required_scopes: Optional[str] = Field( + default=None, alias="MCP_AUTH_REQUIRED_SCOPES" + ) + mcp_resource_url: str = Field( + default="http://localhost:8000/mcp", alias="MCP_RESOURCE_URL" + ) + rate_limit_requests_per_minute: int = Field( + default=120, alias="MCP_RATE_LIMIT_REQUESTS_PER_MINUTE" + ) + rate_limit_burst: int = Field(default=20, alias="MCP_RATE_LIMIT_BURST") ctx_api_key: Optional[str] = Field(default=None, alias="CTX_API_KEY") ctx_api_key_legacy: Optional[str] = Field(default=None, alias="EPA_COMPTOX_API_KEY") @@ -80,6 +118,7 @@ class _RawSettings(BaseSettings): ) metrics_enabled: bool = Field(default=True, alias="EPACOMP_MCP_METRICS_ENABLED") + metrics_bypass_auth: bool = Field(default=False, alias="MCP_METRICS_BYPASS_AUTH") class Settings(_RawSettings): @@ -95,7 +134,21 @@ def security(self) -> SecuritySettings: origins = [origin.strip() for origin in raw.split(",") if origin.strip()] if not origins and self.app.is_development: origins = ["*"] - return SecuritySettings(bypass_auth=self.bypass_auth, allowed_origins=origins) + scopes = [ + scope.strip() + for chunk in (self.mcp_auth_required_scopes or "").split(",") + for scope in chunk.split() + if scope.strip() + ] + return SecuritySettings( + bypass_auth=bool(self.bypass_auth), + allowed_origins=origins, + auth_issuer=self.mcp_auth_issuer, + auth_audience=self.mcp_auth_audience, + auth_jwks_url=self.mcp_auth_jwks_url, + auth_required_scopes=scopes, + resource_url=self.mcp_resource_url, + ) @cached_property def ctx(self) -> ContextSettings: @@ -126,9 +179,20 @@ def transport(self) -> TransportSettings: heartbeat_timeout=heartbeat, handshake_timeout=handshake ) + @cached_property + def rate_limit(self) -> RateLimitSettings: + rpm = max(0, int(self.rate_limit_requests_per_minute)) + burst = int(self.rate_limit_burst) + if burst <= 0: + burst = rpm + return RateLimitSettings(requests_per_minute=rpm, burst=max(0, burst)) + @cached_property def observability(self) -> ObservabilitySettings: - return ObservabilitySettings(metrics_enabled=bool(self.metrics_enabled)) + return ObservabilitySettings( + metrics_enabled=bool(self.metrics_enabled), + metrics_bypass_auth=bool(self.metrics_bypass_auth), + ) @lru_cache(maxsize=1) diff --git a/src/epacomp_tox/tools/registry.py b/src/epacomp_tox/tools/registry.py index db8de7e..fef4b7d 100644 --- a/src/epacomp_tox/tools/registry.py +++ b/src/epacomp_tox/tools/registry.py @@ -1,9 +1,11 @@ from __future__ import annotations import logging +from copy import deepcopy from dataclasses import dataclass from typing import Any, Dict, Iterable, List, Optional, Tuple, Type +from jsonschema import Draft202012Validator from pydantic import BaseModel from epacomp_tox.contracts import load_schema @@ -21,6 +23,8 @@ class ToolRegistration: output_schema: Optional[Dict[str, Any]] resource: BaseResource parameters_model: Type[BaseModel] + input_validator: Draft202012Validator + output_validator: Optional[Draft202012Validator] annotations: Dict[str, Any] response_schema_ref: Optional[Tuple[str, str]] @@ -46,6 +50,7 @@ def register_resource( input_schema = ( tool.get("inputSchema") or tool.get("parameters") or {"type": "object"} ) + input_schema = _normalise_input_schema(input_schema) output_schema = tool.get("outputSchema") response_schema_ref: Optional[Tuple[str, str]] = None @@ -67,6 +72,8 @@ def register_resource( "properties": {"data": output_schema}, "required": ["data"], } + if output_schema: + output_schema = _normalise_output_schema(output_schema) description = tool.get("description", "") parameters_model = create_model_from_schema(name, input_schema) @@ -75,6 +82,10 @@ def register_resource( combined_annotations.update(tool_annotations) if annotations: combined_annotations.update(annotations) + combined_annotations.setdefault("readOnlyHint", True) + combined_annotations.setdefault("destructiveHint", False) + combined_annotations.setdefault("openWorldHint", True) + combined_annotations.setdefault("idempotentHint", True) self._tools[name] = ToolRegistration( name=name, @@ -83,6 +94,10 @@ def register_resource( output_schema=output_schema, resource=resource, parameters_model=parameters_model, + input_validator=Draft202012Validator(input_schema), + output_validator=( + Draft202012Validator(output_schema) if output_schema else None + ), annotations=combined_annotations, response_schema_ref=response_schema_ref, ) @@ -117,3 +132,37 @@ def list_definitions(self) -> List[Dict[str, Any]]: def __iter__(self) -> Iterable[ToolRegistration]: return iter(self._tools.values()) + + +def _normalise_input_schema(schema: Dict[str, Any]) -> Dict[str, Any]: + normalised = deepcopy(schema or {"type": "object"}) + if normalised.get("type", "object") == "object": + normalised.setdefault("properties", {}) + normalised.setdefault("additionalProperties", False) + return normalised + + +def _normalise_output_schema(schema: Dict[str, Any]) -> Dict[str, Any]: + normalised = deepcopy(schema) + if normalised.get("type") != "object": + return normalised + properties = normalised.setdefault("properties", {}) + metadata_schema = {"type": "object", "additionalProperties": True} + properties.setdefault("metadata", metadata_schema) + properties.setdefault("mcpMetadata", metadata_schema) + properties.setdefault( + "data", + { + "type": [ + "object", + "array", + "string", + "number", + "integer", + "boolean", + "null", + ], + "additionalProperties": True, + }, + ) + return normalised diff --git a/src/epacomp_tox/transport/common.py b/src/epacomp_tox/transport/common.py index 6476791..8c41833 100644 --- a/src/epacomp_tox/transport/common.py +++ b/src/epacomp_tox/transport/common.py @@ -3,6 +3,7 @@ from __future__ import annotations SUPPORTED_PROTOCOL_VERSIONS = [ + "2025-11-25", "2025-06-18", "2025-03-26", "2024-11-05", diff --git a/src/epacomp_tox/transport/http.py b/src/epacomp_tox/transport/http.py index 575a5eb..5161cf1 100644 --- a/src/epacomp_tox/transport/http.py +++ b/src/epacomp_tox/transport/http.py @@ -14,6 +14,7 @@ PRIMARY_PROTOCOL_VERSION, SUPPORTED_PROTOCOL_VERSIONS, ) +from epacomp_tox.transport.security import AuthContext, AuthError, BearerAuthValidator logger = logging.getLogger(__name__) @@ -28,6 +29,7 @@ UNAUTHORIZED = -32000 FORBIDDEN = -32001 TOOL_EXECUTION_ERROR = -32002 +RATE_LIMITED = -32029 # HTTP transport capabilities advertised during initialize HTTP_SERVER_CAPABILITIES: Dict[str, Any] = { @@ -40,6 +42,14 @@ router = APIRouter() +class RateLimitExceeded(RuntimeError): + """Raised when a tool-call rate limit is exceeded.""" + + def __init__(self, retry_after_seconds: float): + super().__init__("Rate limit exceeded") + self.retry_after_seconds = retry_after_seconds + + @router.get("/mcp") async def mcp_probe(request: Request) -> Response: """ @@ -47,6 +57,9 @@ async def mcp_probe(request: Request) -> Response: Returns server info and supported protocol versions without requiring a JSON-RPC body. """ server = _get_mcp_server(request) + auth_response = _require_http_auth_or_response(request, request_id=None) + if auth_response is not None: + return auth_response return JSONResponse( status_code=status.HTTP_200_OK, content={ @@ -58,7 +71,7 @@ async def mcp_probe(request: Request) -> Response: ) -# Issue 3: OAuth discovery placeholder endpoints +# OAuth/OIDC discovery and MCP protected-resource metadata endpoints. @router.get("/.well-known/oauth-authorization-server") @router.get("/mcp/.well-known/oauth-authorization-server") @router.get("/.well-known/oauth-authorization-server/mcp") @@ -70,6 +83,17 @@ async def oauth_discovery_placeholder() -> Response: return JSONResponse(status_code=status.HTTP_200_OK, content={}) +@router.get("/.well-known/oauth-protected-resource") +@router.get("/.well-known/oauth-protected-resource/mcp") +@router.get("/mcp/.well-known/oauth-protected-resource") +async def oauth_protected_resource_metadata(request: Request) -> Response: + validator = _get_auth_validator(request) + return JSONResponse( + status_code=status.HTTP_200_OK, + content=validator.protected_resource_metadata(), + ) + + def _jsonrpc_success(result: Any, request_id: Optional[Any]) -> Dict[str, Any]: response: Dict[str, Any] = {"jsonrpc": JSONRPC_VERSION, "result": result} if request_id is not None: @@ -106,6 +130,54 @@ def _get_mcp_server(request: Request) -> MCPServer: return server +def _get_auth_validator(request: Request) -> BearerAuthValidator: + validator = getattr(request.app.state, "auth_validator", None) + if validator is None: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Authentication policy unavailable", + ) + return validator + + +def _remote_addr(request: Request) -> str: + client = getattr(request, "client", None) + if client and client.host: + return client.host + return "unknown" + + +def _require_http_auth_or_response( + request: Request, *, request_id: Optional[Any] +) -> Optional[Response]: + validator = _get_auth_validator(request) + try: + auth_context = validator.authenticate_header( + request.headers.get("authorization"), remote_addr=_remote_addr(request) + ) + except AuthError as exc: + return _auth_error_response(validator, exc, request_id=request_id) + request.state.auth_context = auth_context + return None + + +def _auth_error_response( + validator: BearerAuthValidator, exc: AuthError, *, request_id: Optional[Any] +) -> Response: + code = UNAUTHORIZED if exc.status_code == 401 else FORBIDDEN + payload = _jsonrpc_error( + code=code, + message=exc.description, + request_id=request_id, + data={"error": exc.error}, + ) + return JSONResponse( + status_code=exc.status_code, + content=payload, + headers={"WWW-Authenticate": validator.www_authenticate_header(exc)}, + ) + + def _normalize_tool_parameters(params: Dict[str, Any]) -> Dict[str, Any]: """Handle parameter shapes used by various MCP clients.""" if not isinstance(params, dict): @@ -157,8 +229,11 @@ def _build_request_context(request: Request) -> Dict[str, Any]: }, "clientCapabilities": {}, "negotiatedCapabilities": {}, - "transport": {"type": "http"}, + "transport": {"type": "http", "remoteAddress": _remote_addr(request)}, } + auth_context: Optional[AuthContext] = getattr(request.state, "auth_context", None) + if auth_context is not None: + context["auth"] = auth_context.safe_summary() correlation_id = getattr(request.state, "correlation_id", None) if correlation_id: context["correlationId"] = correlation_id @@ -225,6 +300,10 @@ async def mcp_endpoint(request: Request) -> Response: params = payload.get("params") or {} jsonrpc_version = payload.get("jsonrpc") + auth_response = _require_http_auth_or_response(request, request_id=request_id) + if auth_response is not None: + return auth_response + # Compatibility: respond to JSON-RPC initialize with a proper JSON-RPC envelope # while still carrying the "connected" shape Codex/Gemini expect. # if isinstance(method, str) and method.lower() in {"initialize", "mcp/initialize"}: @@ -288,13 +367,26 @@ async def mcp_endpoint(request: Request) -> Response: code=FORBIDDEN, message=str(exc), request_id=request_id ) return JSONResponse(status_code=status.HTTP_403_FORBIDDEN, content=content) + except RateLimitExceeded as exc: + content = _jsonrpc_error( + code=RATE_LIMITED, + message="Rate limit exceeded", + request_id=request_id, + data={"retryAfterSeconds": round(exc.retry_after_seconds, 3)}, + ) + return JSONResponse( + status_code=status.HTTP_429_TOO_MANY_REQUESTS, + content=content, + headers={"Retry-After": str(max(1, int(exc.retry_after_seconds)))}, + ) except Exception as exc: # pylint: disable=broad-except logger.exception("Unhandled MCP error") + correlation_id = getattr(request.state, "correlation_id", None) content = _jsonrpc_error( code=INTERNAL_ERROR, message="Internal server error", request_id=request_id, - data=str(exc), + data={"correlationId": correlation_id} if correlation_id else None, ) return JSONResponse( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, content=content @@ -366,7 +458,13 @@ def _handle_initialize(server: MCPServer, params: Dict[str, Any]) -> Dict[str, A logger.info( "HTTP MCP initialize with capabilities: %s", params.get("capabilities", {}) ) - protocol_version = params.get("protocolVersion") or PRIMARY_PROTOCOL_VERSION + requested_version = params.get("protocolVersion") + if requested_version and requested_version not in SUPPORTED_PROTOCOL_VERSIONS: + raise ValueError( + "Unsupported protocol version. Supported versions: " + + ", ".join(SUPPORTED_PROTOCOL_VERSIONS) + ) + protocol_version = requested_version or PRIMARY_PROTOCOL_VERSION # session_id = params.get("sessionId") or str(uuid4()) # Removed as per instructions # Return ONLY standard MCP fields @@ -1064,6 +1162,7 @@ async def _handle_tools_call( tool_params = _normalize_tool_parameters(params) context = _build_request_context(request) + _enforce_rate_limit(request, context=context) try: result = server.call_tool(tool_name, tool_params, context=context) @@ -1122,3 +1221,23 @@ async def _handle_tools_call( raise ValueError("Tool result could not be serialized.") from exc return result + + +def _enforce_rate_limit(request: Request, *, context: Dict[str, Any]) -> None: + limiter = getattr(request.app.state, "rate_limiter", None) + if limiter is None or not getattr(limiter, "enabled", False): + return + auth_context: Optional[AuthContext] = getattr(request.state, "auth_context", None) + fallback_key = f"ip:{_remote_addr(request)}" + key = ( + auth_context.rate_limit_key(fallback_key) + if auth_context is not None + else fallback_key + ) + decision = limiter.check(key) + if not decision.allowed: + context["rateLimit"] = { + "limited": True, + "retryAfterSeconds": round(decision.retry_after_seconds, 3), + } + raise RateLimitExceeded(decision.retry_after_seconds) diff --git a/src/epacomp_tox/transport/security.py b/src/epacomp_tox/transport/security.py new file mode 100644 index 0000000..a06a968 --- /dev/null +++ b/src/epacomp_tox/transport/security.py @@ -0,0 +1,318 @@ +from __future__ import annotations + +import hashlib +import time +from dataclasses import dataclass +from threading import Lock +from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple + +from epacomp_tox.settings import AppSettings, RateLimitSettings, SecuritySettings + +try: # pragma: no cover - exercised when JWT validation is configured + import jwt + from jwt import PyJWKClient +except ImportError: # pragma: no cover - optional until auth is configured + jwt = None # type: ignore[assignment] + PyJWKClient = None # type: ignore[assignment] + + +class AuthError(RuntimeError): + """Authentication or authorization failure.""" + + def __init__( + self, + *, + status_code: int, + error: str, + description: str, + required_scopes: Optional[Sequence[str]] = None, + ) -> None: + super().__init__(description) + self.status_code = status_code + self.error = error + self.description = description + self.required_scopes = list(required_scopes or []) + + +@dataclass(frozen=True) +class AuthContext: + """Safe authentication summary for sessions, audit, and metadata.""" + + subject_hash: Optional[str] + issuer: Optional[str] + audience: Tuple[str, ...] + scopes: Tuple[str, ...] + expires_at: Optional[int] + token_hash: Optional[str] + bypassed: bool = False + + def safe_summary(self) -> Dict[str, Any]: + summary: Dict[str, Any] = { + "authenticated": self.subject_hash is not None and not self.bypassed, + "scopes": list(self.scopes), + } + if self.subject_hash: + summary["subjectHash"] = self.subject_hash + if self.issuer: + summary["issuer"] = self.issuer + if self.audience: + summary["audience"] = list(self.audience) + if self.expires_at is not None: + summary["expiresAt"] = self.expires_at + if self.token_hash: + summary["tokenHash"] = self.token_hash + if self.bypassed: + summary["bypassed"] = True + return summary + + def rate_limit_key(self, fallback: str) -> str: + if self.subject_hash: + return f"sub:{self.subject_hash}" + if self.token_hash: + return f"tok:{self.token_hash}" + return fallback + + +@dataclass(frozen=True) +class RateLimitDecision: + allowed: bool + retry_after_seconds: float = 0.0 + remaining: int = 0 + + +class InMemoryRateLimiter: + """Simple process-local token-bucket limiter for MCP tool calls.""" + + def __init__(self, settings: RateLimitSettings): + self.requests_per_minute = settings.requests_per_minute + self.burst = max(1, settings.burst) + self._buckets: Dict[str, Tuple[float, float]] = {} + self._lock = Lock() + + @property + def enabled(self) -> bool: + return self.requests_per_minute > 0 + + def check(self, key: str) -> RateLimitDecision: + if not self.enabled: + return RateLimitDecision(allowed=True, remaining=self.burst) + + now = time.monotonic() + refill_per_second = self.requests_per_minute / 60.0 + with self._lock: + tokens, last_seen = self._buckets.get(key, (float(self.burst), now)) + elapsed = max(0.0, now - last_seen) + tokens = min(float(self.burst), tokens + elapsed * refill_per_second) + if tokens < 1.0: + retry_after = (1.0 - tokens) / refill_per_second + self._buckets[key] = (tokens, now) + return RateLimitDecision( + allowed=False, + retry_after_seconds=retry_after, + remaining=0, + ) + tokens -= 1.0 + self._buckets[key] = (tokens, now) + return RateLimitDecision( + allowed=True, + remaining=max(0, int(tokens)), + ) + + +class BearerAuthValidator: + """Validate MCP bearer tokens against configured OIDC/JWKS settings.""" + + def __init__( + self, + *, + security: SecuritySettings, + app: AppSettings, + bypass_auth: Optional[bool] = None, + ) -> None: + self.security = security + self.app = app + self.bypass_auth = security.bypass_auth if bypass_auth is None else bypass_auth + self.enabled = self._resolve_enabled() + self.required_scopes = tuple(security.auth_required_scopes) + self._jwks_client = None + if self.enabled: + self._validate_configuration() + if PyJWKClient is None: + raise RuntimeError( + "PyJWT[crypto] is required when MCP bearer authentication is enabled." + ) + self._jwks_client = PyJWKClient(security.auth_jwks_url) # type: ignore[arg-type] + + def _resolve_enabled(self) -> bool: + if self.bypass_auth: + return False + if self.security.auth_configured: + return True + if self.security.auth_requested: + return True + return not self.app.is_development + + def _validate_configuration(self) -> None: + missing = [] + if not self.security.auth_issuer: + missing.append("MCP_AUTH_ISSUER") + if not self.security.auth_audience: + missing.append("MCP_AUTH_AUDIENCE") + if not self.security.auth_jwks_url: + missing.append("MCP_AUTH_JWKS_URL") + if missing: + raise RuntimeError( + "MCP auth is enabled but incomplete; set " + + ", ".join(missing) + + " or use BYPASS_AUTH=1 for local development." + ) + + def authenticate_header( + self, authorization: Optional[str], *, remote_addr: Optional[str] = None + ) -> AuthContext: + if not self.enabled: + return AuthContext( + subject_hash=None, + issuer=None, + audience=(), + scopes=(), + expires_at=None, + token_hash=None, + bypassed=self.bypass_auth, + ) + + scheme, token = _split_authorization(authorization) + if scheme.lower() != "bearer" or not token: + raise AuthError( + status_code=401, + error="invalid_token", + description="Bearer token is required.", + required_scopes=self.required_scopes, + ) + + claims = self._decode_jwt(token) + scopes = tuple(sorted(_extract_scopes(claims))) + missing_scopes = sorted(set(self.required_scopes) - set(scopes)) + if missing_scopes: + raise AuthError( + status_code=403, + error="insufficient_scope", + description="Bearer token is missing required MCP scope.", + required_scopes=self.required_scopes, + ) + + subject = str(claims.get("sub") or "") + audience = claims.get("aud") + return AuthContext( + subject_hash=_hash_value(subject) if subject else None, + issuer=claims.get("iss"), + audience=tuple(str(item) for item in _as_list(audience)), + scopes=scopes, + expires_at=( + claims.get("exp") if isinstance(claims.get("exp"), int) else None + ), + token_hash=_hash_value(token), + bypassed=False, + ) + + def _decode_jwt(self, token: str) -> Dict[str, Any]: + if jwt is None or self._jwks_client is None: + raise AuthError( + status_code=401, + error="invalid_token", + description="JWT validation is not available.", + required_scopes=self.required_scopes, + ) + try: + signing_key = self._jwks_client.get_signing_key_from_jwt(token).key + return jwt.decode( + token, + signing_key, + algorithms=[ + "RS256", + "RS384", + "RS512", + "ES256", + "ES384", + "ES512", + ], + audience=_split_config_values(self.security.auth_audience), + issuer=self.security.auth_issuer, + ) + except Exception as exc: + raise AuthError( + status_code=401, + error="invalid_token", + description="Bearer token is invalid.", + required_scopes=self.required_scopes, + ) from exc + + def protected_resource_metadata(self) -> Dict[str, Any]: + metadata: Dict[str, Any] = { + "resource": self.security.resource_url, + "bearer_methods_supported": ["header"], + "scopes_supported": list(self.required_scopes), + } + if self.security.auth_issuer: + metadata["authorization_servers"] = [self.security.auth_issuer] + if self.security.auth_jwks_url: + metadata["jwks_uri"] = self.security.auth_jwks_url + return metadata + + def www_authenticate_header(self, error: Optional[AuthError] = None) -> str: + parts = [ + "Bearer", + f'resource="{self.security.resource_url}"', + f'resource_metadata="{self.security.resource_url.rstrip("/")}/.well-known/oauth-protected-resource"', + ] + scopes = error.required_scopes if error else self.required_scopes + if scopes: + parts.append(f'scope="{" ".join(scopes)}"') + if error is not None: + parts.append(f'error="{error.error}"') + return ", ".join(parts) + + +def _split_authorization(authorization: Optional[str]) -> Tuple[str, str]: + if not authorization: + return "", "" + parts = authorization.strip().split(None, 1) + if len(parts) != 2: + return authorization.strip(), "" + return parts[0], parts[1].strip() + + +def _split_config_values(value: Optional[str]) -> Any: + values = [item.strip() for item in (value or "").split(",") if item.strip()] + if not values: + return value + if len(values) == 1: + return values[0] + return values + + +def _extract_scopes(claims: Dict[str, Any]) -> List[str]: + scopes: List[str] = [] + scope_value = claims.get("scope") + if isinstance(scope_value, str): + scopes.extend(scope_value.split()) + scp_value = claims.get("scp") + if isinstance(scp_value, str): + scopes.extend(scp_value.split()) + elif isinstance(scp_value, Iterable): + scopes.extend(str(item) for item in scp_value) + return [scope for scope in scopes if scope] + + +def _as_list(value: Any) -> List[Any]: + if value is None: + return [] + if isinstance(value, list): + return value + if isinstance(value, tuple): + return list(value) + return [value] + + +def _hash_value(value: str) -> str: + return hashlib.sha256(value.encode("utf-8")).hexdigest()[:24] diff --git a/src/epacomp_tox/transport/websocket.py b/src/epacomp_tox/transport/websocket.py index bb38c33..d19ce95 100644 --- a/src/epacomp_tox/transport/websocket.py +++ b/src/epacomp_tox/transport/websocket.py @@ -9,8 +9,16 @@ from functools import partial from typing import Any, Dict, List, Optional -from fastapi import FastAPI, HTTPException, Response, WebSocket, WebSocketDisconnect +from fastapi import ( + FastAPI, + HTTPException, + Request, + Response, + WebSocket, + WebSocketDisconnect, +) from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse from prometheus_client import ( CONTENT_TYPE_LATEST, CollectorRegistry, @@ -26,6 +34,12 @@ SUPPORTED_PROTOCOL_VERSIONS, ) from epacomp_tox.transport.http import router as http_router +from epacomp_tox.transport.security import ( + AuthContext, + AuthError, + BearerAuthValidator, + InMemoryRateLimiter, +) from epacomp_tox.validators import to_serializable logger = logging.getLogger(__name__) @@ -38,6 +52,7 @@ CANCELLED_ERROR_CODE = -32800 CAPABILITY_NOT_NEGOTIATED_ERROR_CODE = -32004 +RATE_LIMITED_ERROR_CODE = -32029 class AuditMiddleware: @@ -182,9 +197,18 @@ def __init__( class MCPWebSocketSession: """Manage a single MCP WebSocket session and JSON-RPC message loop.""" - def __init__(self, websocket: WebSocket, server: MCPServer): + def __init__( + self, + websocket: WebSocket, + server: MCPServer, + *, + auth_context: AuthContext, + rate_limiter: Optional[InMemoryRateLimiter] = None, + ): self.websocket = websocket self.server = server + self.auth_context = auth_context + self.rate_limiter = rate_limiter self.initialized = False self.protocol_version: Optional[str] = None self.session_id = str(uuid.uuid4()) @@ -197,7 +221,6 @@ def __init__(self, websocket: WebSocket, server: MCPServer): DEFAULT_SERVER_CAPABILITIES ) self.client_info: Dict[str, Any] = {} - self.authentication: Dict[str, Any] = {} self._close_reason = "disconnect" self.active_requests: Dict[str, Dict[str, Any]] = {} self._streams_enabled = True @@ -341,7 +364,6 @@ async def _handle_initialize(self, message_id: Any, params: Dict[str, Any]) -> N self.negotiated_capabilities.get("tools", {}).get("cancel", False) ) self.client_info = params.get("clientInfo") or {} - self.authentication = params.get("authentication") or {} transport_settings = self.server.get_transport_options() heartbeat_override = params.get("heartbeatIntervalMs") if isinstance(heartbeat_override, (int, float)) and heartbeat_override > 0: @@ -354,7 +376,7 @@ async def _handle_initialize(self, message_id: Any, params: Dict[str, Any]) -> N self.session_id, client_capabilities=self.client_capabilities, client_info=self.client_info, - authentication=self.authentication, + auth=self.auth_context.safe_summary(), negotiated_capabilities=self.negotiated_capabilities, ) server_info = self.server.get_server_info() @@ -549,6 +571,19 @@ async def _handle_tools_call(self, message_id: Any, params: Dict[str, Any]) -> N message_id, code=-32602, message="Tool arguments must be an object" ) return + if self.rate_limiter is not None and self.rate_limiter.enabled: + fallback_key = f"ws:{self.session_id}" + decision = self.rate_limiter.check( + self.auth_context.rate_limit_key(fallback_key) + ) + if not decision.allowed: + await self._send_error( + message_id, + code=RATE_LIMITED_ERROR_CODE, + message="Rate limit exceeded", + data={"retryAfterSeconds": round(decision.retry_after_seconds, 3)}, + ) + return request_id = params.get("requestId") or str(uuid.uuid4()) timeout_ms = params.get("timeoutMs") timeout_seconds: Optional[float] = None @@ -850,7 +885,7 @@ async def _run_tool_call( raise ToolExecutionError( code=-32603, message="Tool execution failed", - data={"detail": str(exc)}, + data={"reason": "internal_error"}, ) from exc async def _emit_event(self, method: str, params: Dict[str, Any]) -> None: @@ -869,7 +904,7 @@ def _session_context(self) -> Dict[str, Any]: "clientInfo": deepcopy(self.client_info), "clientCapabilities": deepcopy(self.client_capabilities), "negotiatedCapabilities": deepcopy(self.negotiated_capabilities), - "authentication": deepcopy(self.authentication), + "auth": deepcopy(self.auth_context.safe_summary()), } @@ -921,10 +956,43 @@ def _json_default(value: Any) -> Any: return converted -def create_app(server: Optional[MCPServer] = None) -> FastAPI: +def _remote_addr_from_request(request: Request) -> str: + if request.client and request.client.host: + return request.client.host + return "unknown" + + +def _remote_addr_from_websocket(websocket: WebSocket) -> str: + if websocket.client and websocket.client.host: + return websocket.client.host + return "unknown" + + +def _metrics_auth_response( + validator: BearerAuthValidator, exc: AuthError +) -> JSONResponse: + return JSONResponse( + status_code=exc.status_code, + content={"detail": exc.description, "error": exc.error}, + headers={"WWW-Authenticate": validator.www_authenticate_header(exc)}, + ) + + +def create_app( + server: Optional[MCPServer] = None, + *, + auth_bypass: Optional[bool] = None, + auth_validator: Optional[BearerAuthValidator] = None, +) -> FastAPI: """Create a FastAPI application exposing the MCP WebSocket transport.""" app = FastAPI(title="EPA CompTox MCP Server") + app.state.auth_validator = auth_validator or BearerAuthValidator( + security=settings.security, + app=settings.app, + bypass_auth=auth_bypass, + ) + app.state.rate_limiter = InMemoryRateLimiter(settings.rate_limit) allowed_origins = settings.security.allowed_origins if not allowed_origins and settings.app.is_development: @@ -977,7 +1045,18 @@ async def readyz() -> Dict[str, Any]: return {"status": "ok", "ctx": health} @app.get("/metrics", tags=["metrics"]) - async def metrics() -> Response: + async def metrics(request: Request) -> Response: + if not settings.observability.metrics_enabled: + raise HTTPException(status_code=404, detail="Metrics disabled") + if not settings.observability.metrics_bypass_auth: + validator: BearerAuthValidator = app.state.auth_validator + try: + validator.authenticate_header( + request.headers.get("authorization"), + remote_addr=_remote_addr_from_request(request), + ) + except AuthError as exc: + return _metrics_auth_response(validator, exc) server_instance = getattr(app.state, "mcp_server", None) payload = _render_prometheus_metrics(server_instance) return Response(content=payload, media_type=CONTENT_TYPE_LATEST) @@ -1009,7 +1088,35 @@ async def websocket_endpoint(websocket: WebSocket) -> None: await websocket.close() return - session = MCPWebSocketSession(websocket=websocket, server=server_instance) + validator: BearerAuthValidator = app.state.auth_validator + try: + auth_context = validator.authenticate_header( + websocket.headers.get("authorization"), + remote_addr=_remote_addr_from_websocket(websocket), + ) + except AuthError as exc: + await websocket.accept() + await websocket.send_text( + json.dumps( + { + "jsonrpc": "2.0", + "error": { + "code": (-32000 if exc.status_code == 401 else -32001), + "message": exc.description, + "data": {"error": exc.error}, + }, + } + ) + ) + await websocket.close(code=4401 if exc.status_code == 401 else 4403) + return + + session = MCPWebSocketSession( + websocket=websocket, + server=server_instance, + auth_context=auth_context, + rate_limiter=app.state.rate_limiter, + ) await session.run() app.include_router(http_router) diff --git a/tests/test_audit_hardening.py b/tests/test_audit_hardening.py index 2fa663c..f22eb73 100644 --- a/tests/test_audit_hardening.py +++ b/tests/test_audit_hardening.py @@ -139,3 +139,36 @@ def test_bundle_store_verify_chain_detects_missing_file(tmp_path: Path): valid, errors = store.verify_chain() assert valid is False assert any("bundle file missing" in e for e in errors) + + +def test_bundle_store_rejects_unsafe_workflow_run_id(tmp_path: Path) -> None: + store = AuditBundleStore(tmp_path) + + with pytest.raises(ValueError): + store.save({"workflowRunId": "../escape", "data": "a"}) + + +def test_bundle_store_rejects_attachment_traversal(tmp_path: Path) -> None: + store = AuditBundleStore(tmp_path) + + with pytest.raises(ValueError): + store.save( + {"workflowRunId": "run-1", "data": "a"}, + attachments={"../escape.txt": "nope"}, + ) + assert not (tmp_path.parent / "escape.txt").exists() + + +def test_bundle_store_allows_nested_safe_attachments(tmp_path: Path) -> None: + store = AuditBundleStore(tmp_path) + + metadata = store.save( + {"workflowRunId": "run-1", "data": "a"}, + attachments={"interop/aop_linkage_summary.json": "{}"}, + ) + + attachment = ( + tmp_path / "run-1" / "attachments" / "interop" / "aop_linkage_summary.json" + ) + assert attachment.exists() + assert metadata["attachments"][0]["name"] == "interop/aop_linkage_summary.json" diff --git a/tests/test_http_transport.py b/tests/test_http_transport.py index 177f9a5..b92f096 100644 --- a/tests/test_http_transport.py +++ b/tests/test_http_transport.py @@ -196,6 +196,9 @@ def test_http_transport_initialize_and_list_and_call(): assert any(tool["name"] == "echo" for tool in tools) first_tool = next(tool for tool in tools if tool["name"] == "echo") assert first_tool["annotations"]["resource"] == "echo" + assert first_tool["annotations"]["readOnlyHint"] is True + assert first_tool["annotations"]["destructiveHint"] is False + assert first_tool["annotations"]["openWorldHint"] is True call_response = client.post( "/mcp", diff --git a/tests/test_package_assets.py b/tests/test_package_assets.py new file mode 100644 index 0000000..1240b6e --- /dev/null +++ b/tests/test_package_assets.py @@ -0,0 +1,114 @@ +from __future__ import annotations + +import os +import subprocess +import sys +import venv +import zipfile +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +PACKAGE_DATA = ROOT / "src" / "epacomp_tox" / "data" + + +def _relative_files(root: Path) -> dict[str, bytes]: + return { + str(path.relative_to(root)): path.read_bytes() + for path in root.rglob("*") + if path.is_file() and path.suffix in {".json", ".md"} + } + + +def test_packaged_runtime_assets_match_source_copies() -> None: + source_roots = { + "contracts/schemas": ROOT / "docs" / "contracts" / "schemas", + "schemas": ROOT / "schemas", + "metadata/model_cards": ROOT / "metadata" / "model_cards", + "metadata/applicability_domains": ROOT / "metadata" / "applicability_domains", + } + package_roots = { + "contracts/schemas": PACKAGE_DATA / "contracts" / "schemas", + "schemas": PACKAGE_DATA / "schemas", + "metadata/model_cards": PACKAGE_DATA / "metadata" / "model_cards", + "metadata/applicability_domains": PACKAGE_DATA + / "metadata" + / "applicability_domains", + } + + for label, source_root in source_roots.items(): + assert _relative_files(package_roots[label]) == _relative_files(source_root) + + +def test_wheel_contains_runtime_assets_and_instantiates_server(tmp_path: Path) -> None: + wheel_dir = tmp_path / "wheelhouse" + wheel_dir.mkdir() + build = subprocess.run( + [ + sys.executable, + "-m", + "pip", + "wheel", + ".", + "--no-deps", + "--wheel-dir", + str(wheel_dir), + ], + cwd=ROOT, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + timeout=120, + ) + assert build.returncode == 0, build.stdout + wheel = next(wheel_dir.glob("*.whl")) + + with zipfile.ZipFile(wheel) as archive: + names = set(archive.namelist()) + assert ( + "epacomp_tox/data/contracts/schemas/metadata/model_cards.response.schema.json" + in names + ) + assert "epacomp_tox/data/schemas/comptoxEvidencePack.v1.json" in names + assert "epacomp_tox/data/metadata/model_cards/genra_read_across.json" in names + + venv_dir = tmp_path / "venv" + venv.EnvBuilder(with_pip=True, system_site_packages=True).create(venv_dir) + bin_dir = "Scripts" if os.name == "nt" else "bin" + pip = venv_dir / bin_dir / "pip" + python = venv_dir / bin_dir / "python" + install = subprocess.run( + [str(pip), "install", "--no-deps", str(wheel)], + cwd=tmp_path, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + timeout=120, + ) + assert install.returncode == 0, install.stdout + + smoke = subprocess.run( + [ + str(python), + "-c", + ( + "from epacomp_tox.server import MCPServer; " + "s=MCPServer(api_key='dummy-key'); " + "names={t['name'] for t in s.get_tools()}; " + "assert 'metadata_get_model_card' in names; " + "assert 'get_contract_manifest' in names; " + "assert s.call_tool('metadata_get_model_card', {}, context={})" + "['structuredContent']['modelCards']; " + "assert s.call_tool('metadata_list_applicability_domain', {}, context={})" + "['structuredContent']['applicabilityDomains']; " + "assert s.call_tool('get_contract_manifest', {}, context={})" + "['structuredContent']['responseSchemas']" + ), + ], + cwd=tmp_path, + env={key: value for key, value in os.environ.items() if key != "PYTHONPATH"}, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + timeout=120, + ) + assert smoke.returncode == 0, smoke.stdout diff --git a/tests/test_security_hardening.py b/tests/test_security_hardening.py new file mode 100644 index 0000000..1cbd718 --- /dev/null +++ b/tests/test_security_hardening.py @@ -0,0 +1,246 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +import pytest +from fastapi.testclient import TestClient +from starlette.websockets import WebSocketDisconnect + +from epacomp_tox.resources.base import BaseResource +from epacomp_tox.server import MCPServer +from epacomp_tox.settings import RateLimitSettings +from epacomp_tox.transport.security import AuthContext, AuthError, InMemoryRateLimiter +from epacomp_tox.transport.websocket import create_app + + +class EchoResource(BaseResource): + def __init__(self, api_key: str = "dummy"): + super().__init__(api_key) + + @property + def name(self) -> str: + return "echo" + + @property + def description(self) -> str: + return "Echo test resource" + + def get_tools(self) -> List[Dict[str, Any]]: + return [ + { + "name": "echo", + "description": "Echo back provided text", + "inputSchema": { + "type": "object", + "properties": {"text": {"type": "string"}}, + "required": ["text"], + }, + } + ] + + def execute_tool(self, tool_name: str, parameters: Dict[str, Any]) -> Any: + if tool_name != "echo": + raise ValueError("Unknown tool") + self._last_metadata = {"resource": self.name} + return {"echo": parameters["text"]} + + +class CrashingResource(EchoResource): + def execute_tool(self, tool_name: str, parameters: Dict[str, Any]) -> Any: + raise RuntimeError("secret-token-value") + + +class EchoServer(MCPServer): + def __init__(self, resource: Optional[BaseResource] = None): + self._resource = resource or EchoResource() + super().__init__(api_key="dummy-key", validate_health=False) + + def _initialize_resources(self) -> Dict[str, BaseResource]: + return {"echo": self._resource} + + +class FakeAuthValidator: + enabled = True + + def authenticate_header( + self, authorization: Optional[str], *, remote_addr: Optional[str] = None + ) -> AuthContext: + if authorization == "Bearer valid": + return AuthContext( + subject_hash="subject-hash", + issuer="https://issuer.example", + audience=("mcp://test",), + scopes=("tox:read",), + expires_at=1893456000, + token_hash="token-hash", + ) + if authorization == "Bearer noscope": + raise AuthError( + status_code=403, + error="insufficient_scope", + description="Bearer token is missing required MCP scope.", + required_scopes=["tox:read"], + ) + raise AuthError( + status_code=401, + error="invalid_token", + description="Bearer token is required.", + required_scopes=["tox:read"], + ) + + def protected_resource_metadata(self) -> Dict[str, Any]: + return { + "resource": "https://mcp.example/mcp", + "authorization_servers": ["https://issuer.example"], + "scopes_supported": ["tox:read"], + "bearer_methods_supported": ["header"], + } + + def www_authenticate_header(self, error: Optional[AuthError] = None) -> str: + suffix = f', error="{error.error}"' if error else "" + return ( + 'Bearer, resource="https://mcp.example/mcp", ' + 'scope="tox:read"' + f"{suffix}" + ) + + +def _rpc_call(tool_params: Dict[str, Any]) -> Dict[str, Any]: + return { + "jsonrpc": "2.0", + "id": 1, + "method": "tools/call", + "params": {"name": "echo", "parameters": tool_params}, + } + + +def test_http_rejects_missing_bearer_token_with_challenge() -> None: + app = create_app(server=EchoServer(), auth_validator=FakeAuthValidator()) + with TestClient(app) as client: + response = client.post("/mcp", json=_rpc_call({"text": "hello"})) + + assert response.status_code == 401 + assert response.json()["error"]["code"] == -32000 + assert "WWW-Authenticate" in response.headers + assert "Bearer" in response.headers["WWW-Authenticate"] + + +def test_http_rejects_valid_token_missing_scope() -> None: + app = create_app(server=EchoServer(), auth_validator=FakeAuthValidator()) + with TestClient(app) as client: + response = client.post( + "/mcp", + json=_rpc_call({"text": "hello"}), + headers={"authorization": "Bearer noscope"}, + ) + + assert response.status_code == 403 + assert response.json()["error"]["code"] == -32001 + + +def test_http_does_not_echo_raw_authentication_metadata() -> None: + app = create_app(server=EchoServer(), auth_validator=FakeAuthValidator()) + with TestClient(app) as client: + response = client.post( + "/mcp", + json=_rpc_call({"text": "hello"}), + headers={"authorization": "Bearer valid"}, + ) + + assert response.status_code == 200 + body_text = response.text + assert "Bearer valid" not in body_text + structured = response.json()["result"]["structuredContent"] + assert structured["metadata"]["session"]["auth"]["subjectHash"] == "subject-hash" + + +def test_protected_resource_metadata_endpoint_is_public() -> None: + app = create_app(server=EchoServer(), auth_validator=FakeAuthValidator()) + with TestClient(app) as client: + response = client.get("/.well-known/oauth-protected-resource") + + assert response.status_code == 200 + assert response.json()["resource"] == "https://mcp.example/mcp" + + +def test_invalid_extra_tool_parameter_fails_before_execution() -> None: + app = create_app(server=EchoServer(), auth_validator=FakeAuthValidator()) + with TestClient(app) as client: + response = client.post( + "/mcp", + json=_rpc_call({"text": "hello", "extra": "nope"}), + headers={"authorization": "Bearer valid"}, + ) + + assert response.status_code == 400 + assert "Additional properties" in response.json()["error"]["message"] + + +def test_tool_call_rate_limit_returns_jsonrpc_error() -> None: + app = create_app(server=EchoServer(), auth_validator=FakeAuthValidator()) + app.state.rate_limiter = InMemoryRateLimiter( + RateLimitSettings(requests_per_minute=60, burst=1) + ) + with TestClient(app) as client: + first = client.post( + "/mcp", + json=_rpc_call({"text": "first"}), + headers={"authorization": "Bearer valid"}, + ) + second = client.post( + "/mcp", + json=_rpc_call({"text": "second"}), + headers={"authorization": "Bearer valid"}, + ) + + assert first.status_code == 200 + assert second.status_code == 429 + assert second.json()["error"]["code"] == -32029 + + +def test_internal_tool_exception_does_not_leak_raw_detail() -> None: + app = create_app( + server=EchoServer(resource=CrashingResource()), + auth_validator=FakeAuthValidator(), + ) + with TestClient(app) as client: + response = client.post( + "/mcp", + json=_rpc_call({"text": "hello"}), + headers={"authorization": "Bearer valid"}, + ) + + assert response.status_code == 200 + assert response.json()["result"]["isError"] is True + assert "secret-token-value" not in response.text + + +def test_websocket_rejects_missing_bearer_token() -> None: + app = create_app(server=EchoServer(), auth_validator=FakeAuthValidator()) + with TestClient(app) as client: + with client.websocket_connect("/mcp/ws") as websocket: + message = websocket.receive_json() + assert message["error"]["code"] == -32000 + with pytest.raises(WebSocketDisconnect): + websocket.receive_text() + + +def test_websocket_accepts_valid_bearer_token() -> None: + app = create_app(server=EchoServer(), auth_validator=FakeAuthValidator()) + with TestClient(app) as client: + with client.websocket_connect( + "/mcp/ws", headers={"authorization": "Bearer valid"} + ) as websocket: + websocket.send_json( + { + "jsonrpc": "2.0", + "id": 1, + "method": "initialize", + "params": { + "protocolVersion": "2025-11-25", + "capabilities": {}, + "clientInfo": {"name": "test"}, + }, + } + ) + assert websocket.receive_json()["result"]["protocolVersion"] == "2025-11-25" diff --git a/tests/test_tool_registry.py b/tests/test_tool_registry.py index e23b1f8..1549077 100644 --- a/tests/test_tool_registry.py +++ b/tests/test_tool_registry.py @@ -43,14 +43,13 @@ def test_registry_wraps_non_object_output_schema_without_warning_log( registry.register_resource(resource) definition = registry.list_definitions()[0] - assert definition["outputSchema"] == { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": {"type": "string"}, - } - }, - "required": ["data"], + output_schema = definition["outputSchema"] + assert output_schema["type"] == "object" + assert output_schema["required"] == ["data"] + assert output_schema["properties"]["data"] == { + "type": "array", + "items": {"type": "string"}, } + assert output_schema["properties"]["metadata"]["additionalProperties"] is True + assert output_schema["properties"]["mcpMetadata"]["additionalProperties"] is True assert not caplog.records diff --git a/triclosan_partition_distribution.png b/triclosan_partition_distribution.png deleted file mode 100644 index 408c219..0000000 Binary files a/triclosan_partition_distribution.png and /dev/null differ diff --git a/triclosan_tissue_distribution_httk.png b/triclosan_tissue_distribution_httk.png deleted file mode 100644 index c0aad87..0000000 Binary files a/triclosan_tissue_distribution_httk.png and /dev/null differ