diff --git a/CHANGELOG.md b/CHANGELOG.md index b726219..f12b4a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Separate internal vs external trace schemas with strict Pydantic validation and checked-in JSON Schema artifacts for API, billing, pipeline, inbox, and dispatch events. + ## [0.4.0] - 2026-02-26 ### Changed diff --git a/README.md b/README.md index a6e61e9..2a88f95 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,22 @@ The Claude plugin skill remains at: Both skills cover the same CLI capabilities (`estimate`, `validate`, `calibrate`) but are phrased for their respective ecosystems. +## Trace Schema Foundation + +The repo now includes strict trace schemas for two surfaces: + +- internal operational traces: agent coordination, inbox protocol, dispatch lifecycle +- external customer-visible traces: API request/response, billing events, pipeline stages + +Source models live in `src/agent_estimate/tracing/schema.py`. +Checked-in JSON Schema artifacts live in `schemas/trace/`. + +Regenerate the schema artifacts after model changes: + +```bash +python scripts/render_trace_schemas.py +``` + ## Usage Examples Estimate tasks from a text file: diff --git a/schemas/trace/external_trace.schema.json b/schemas/trace/external_trace.schema.json new file mode 100644 index 0000000..536951f --- /dev/null +++ b/schemas/trace/external_trace.schema.json @@ -0,0 +1,569 @@ +{ + "$defs": { + "ApiRequestPayload": { + "additionalProperties": false, + "description": "Customer-visible API request trace.", + "properties": { + "method": { + "enum": [ + "GET", + "POST", + "PUT", + "PATCH", + "DELETE" + ], + "title": "Method", + "type": "string" + }, + "request_id": { + "minLength": 1, + "title": "Request Id", + "type": "string" + }, + "request_size_bytes": { + "anyOf": [ + { + "minimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Request Size Bytes" + }, + "route": { + "minLength": 1, + "title": "Route", + "type": "string" + } + }, + "required": [ + "request_id", + "method", + "route" + ], + "title": "ApiRequestPayload", + "type": "object" + }, + "ApiRequestTrace": { + "additionalProperties": false, + "description": "External trace for inbound API requests.", + "properties": { + "customer_id": { + "minLength": 1, + "title": "Customer Id", + "type": "string" + }, + "environment": { + "enum": [ + "dev", + "staging", + "prod" + ], + "title": "Environment", + "type": "string" + }, + "event_type": { + "const": "api_request", + "default": "api_request", + "title": "Event Type", + "type": "string" + }, + "extensions": { + "patternProperties": { + "^[a-z][a-z0-9_.-]*$": { + "$ref": "#/$defs/JsonValue" + } + }, + "propertyNames": { + "minLength": 1 + }, + "title": "Extensions", + "type": "object" + }, + "payload": { + "$ref": "#/$defs/ApiRequestPayload" + }, + "producer": { + "minLength": 1, + "title": "Producer", + "type": "string" + }, + "recorded_at": { + "format": "date-time", + "title": "Recorded At", + "type": "string" + }, + "schema_version": { + "const": "v1", + "default": "v1", + "title": "Schema Version", + "type": "string" + }, + "surface": { + "const": "external", + "default": "external", + "title": "Surface", + "type": "string" + }, + "trace_id": { + "minLength": 1, + "title": "Trace Id", + "type": "string" + }, + "workspace_id": { + "minLength": 1, + "title": "Workspace Id", + "type": "string" + } + }, + "required": [ + "trace_id", + "recorded_at", + "producer", + "customer_id", + "workspace_id", + "environment", + "payload" + ], + "title": "ApiRequestTrace", + "type": "object" + }, + "ApiResponsePayload": { + "additionalProperties": false, + "description": "Customer-visible API response trace.", + "properties": { + "cache_hit": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Cache Hit" + }, + "latency_ms": { + "minimum": 0, + "title": "Latency Ms", + "type": "integer" + }, + "request_id": { + "minLength": 1, + "title": "Request Id", + "type": "string" + }, + "response_size_bytes": { + "anyOf": [ + { + "minimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Response Size Bytes" + }, + "status_code": { + "maximum": 599, + "minimum": 100, + "title": "Status Code", + "type": "integer" + } + }, + "required": [ + "request_id", + "status_code", + "latency_ms" + ], + "title": "ApiResponsePayload", + "type": "object" + }, + "ApiResponseTrace": { + "additionalProperties": false, + "description": "External trace for API response outcomes.", + "properties": { + "customer_id": { + "minLength": 1, + "title": "Customer Id", + "type": "string" + }, + "environment": { + "enum": [ + "dev", + "staging", + "prod" + ], + "title": "Environment", + "type": "string" + }, + "event_type": { + "const": "api_response", + "default": "api_response", + "title": "Event Type", + "type": "string" + }, + "extensions": { + "patternProperties": { + "^[a-z][a-z0-9_.-]*$": { + "$ref": "#/$defs/JsonValue" + } + }, + "propertyNames": { + "minLength": 1 + }, + "title": "Extensions", + "type": "object" + }, + "payload": { + "$ref": "#/$defs/ApiResponsePayload" + }, + "producer": { + "minLength": 1, + "title": "Producer", + "type": "string" + }, + "recorded_at": { + "format": "date-time", + "title": "Recorded At", + "type": "string" + }, + "schema_version": { + "const": "v1", + "default": "v1", + "title": "Schema Version", + "type": "string" + }, + "surface": { + "const": "external", + "default": "external", + "title": "Surface", + "type": "string" + }, + "trace_id": { + "minLength": 1, + "title": "Trace Id", + "type": "string" + }, + "workspace_id": { + "minLength": 1, + "title": "Workspace Id", + "type": "string" + } + }, + "required": [ + "trace_id", + "recorded_at", + "producer", + "customer_id", + "workspace_id", + "environment", + "payload" + ], + "title": "ApiResponseTrace", + "type": "object" + }, + "BillingEventPayload": { + "additionalProperties": false, + "description": "Billing event emitted to customer-visible ledgers.", + "properties": { + "amount_usd": { + "minimum": 0, + "title": "Amount Usd", + "type": "number" + }, + "billable": { + "default": true, + "title": "Billable", + "type": "boolean" + }, + "billing_event_id": { + "minLength": 1, + "title": "Billing Event Id", + "type": "string" + }, + "currency": { + "default": "USD", + "pattern": "^[A-Z]{3}$", + "title": "Currency", + "type": "string" + }, + "meter": { + "minLength": 1, + "title": "Meter", + "type": "string" + }, + "quantity": { + "minimum": 0, + "title": "Quantity", + "type": "number" + }, + "unit": { + "minLength": 1, + "title": "Unit", + "type": "string" + } + }, + "required": [ + "billing_event_id", + "meter", + "quantity", + "unit", + "amount_usd" + ], + "title": "BillingEventPayload", + "type": "object" + }, + "BillingEventTrace": { + "additionalProperties": false, + "description": "External trace for customer billing events.", + "properties": { + "customer_id": { + "minLength": 1, + "title": "Customer Id", + "type": "string" + }, + "environment": { + "enum": [ + "dev", + "staging", + "prod" + ], + "title": "Environment", + "type": "string" + }, + "event_type": { + "const": "billing_event", + "default": "billing_event", + "title": "Event Type", + "type": "string" + }, + "extensions": { + "patternProperties": { + "^[a-z][a-z0-9_.-]*$": { + "$ref": "#/$defs/JsonValue" + } + }, + "propertyNames": { + "minLength": 1 + }, + "title": "Extensions", + "type": "object" + }, + "payload": { + "$ref": "#/$defs/BillingEventPayload" + }, + "producer": { + "minLength": 1, + "title": "Producer", + "type": "string" + }, + "recorded_at": { + "format": "date-time", + "title": "Recorded At", + "type": "string" + }, + "schema_version": { + "const": "v1", + "default": "v1", + "title": "Schema Version", + "type": "string" + }, + "surface": { + "const": "external", + "default": "external", + "title": "Surface", + "type": "string" + }, + "trace_id": { + "minLength": 1, + "title": "Trace Id", + "type": "string" + }, + "workspace_id": { + "minLength": 1, + "title": "Workspace Id", + "type": "string" + } + }, + "required": [ + "trace_id", + "recorded_at", + "producer", + "customer_id", + "workspace_id", + "environment", + "payload" + ], + "title": "BillingEventTrace", + "type": "object" + }, + "JsonValue": {}, + "PipelineStagePayload": { + "additionalProperties": false, + "description": "Customer-visible pipeline stage state.", + "properties": { + "duration_ms": { + "anyOf": [ + { + "minimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Duration Ms" + }, + "pipeline_run_id": { + "minLength": 1, + "title": "Pipeline Run Id", + "type": "string" + }, + "stage": { + "minLength": 1, + "title": "Stage", + "type": "string" + }, + "status": { + "enum": [ + "queued", + "running", + "completed", + "failed", + "cancelled" + ], + "title": "Status", + "type": "string" + }, + "visible_label": { + "minLength": 1, + "title": "Visible Label", + "type": "string" + } + }, + "required": [ + "pipeline_run_id", + "stage", + "status", + "visible_label" + ], + "title": "PipelineStagePayload", + "type": "object" + }, + "PipelineStageTrace": { + "additionalProperties": false, + "description": "External trace for visible execution pipeline stages.", + "properties": { + "customer_id": { + "minLength": 1, + "title": "Customer Id", + "type": "string" + }, + "environment": { + "enum": [ + "dev", + "staging", + "prod" + ], + "title": "Environment", + "type": "string" + }, + "event_type": { + "const": "pipeline_stage", + "default": "pipeline_stage", + "title": "Event Type", + "type": "string" + }, + "extensions": { + "patternProperties": { + "^[a-z][a-z0-9_.-]*$": { + "$ref": "#/$defs/JsonValue" + } + }, + "propertyNames": { + "minLength": 1 + }, + "title": "Extensions", + "type": "object" + }, + "payload": { + "$ref": "#/$defs/PipelineStagePayload" + }, + "producer": { + "minLength": 1, + "title": "Producer", + "type": "string" + }, + "recorded_at": { + "format": "date-time", + "title": "Recorded At", + "type": "string" + }, + "schema_version": { + "const": "v1", + "default": "v1", + "title": "Schema Version", + "type": "string" + }, + "surface": { + "const": "external", + "default": "external", + "title": "Surface", + "type": "string" + }, + "trace_id": { + "minLength": 1, + "title": "Trace Id", + "type": "string" + }, + "workspace_id": { + "minLength": 1, + "title": "Workspace Id", + "type": "string" + } + }, + "required": [ + "trace_id", + "recorded_at", + "producer", + "customer_id", + "workspace_id", + "environment", + "payload" + ], + "title": "PipelineStageTrace", + "type": "object" + } + }, + "discriminator": { + "mapping": { + "api_request": "#/$defs/ApiRequestTrace", + "api_response": "#/$defs/ApiResponseTrace", + "billing_event": "#/$defs/BillingEventTrace", + "pipeline_stage": "#/$defs/PipelineStageTrace" + }, + "propertyName": "event_type" + }, + "oneOf": [ + { + "$ref": "#/$defs/ApiRequestTrace" + }, + { + "$ref": "#/$defs/ApiResponseTrace" + }, + { + "$ref": "#/$defs/BillingEventTrace" + }, + { + "$ref": "#/$defs/PipelineStageTrace" + } + ] +} diff --git a/schemas/trace/internal_trace.schema.json b/schemas/trace/internal_trace.schema.json new file mode 100644 index 0000000..68893bc --- /dev/null +++ b/schemas/trace/internal_trace.schema.json @@ -0,0 +1,462 @@ +{ + "$defs": { + "AgentCoordinationPayload": { + "additionalProperties": false, + "description": "Coordination traces between agents or orchestrators.", + "properties": { + "action": { + "enum": [ + "task_request", + "notification", + "question", + "handoff", + "review_request", + "review_feedback", + "review_lgtm", + "status_update" + ], + "title": "Action", + "type": "string" + }, + "agent": { + "minLength": 1, + "title": "Agent", + "type": "string" + }, + "context_keys": { + "items": { + "minLength": 1, + "type": "string" + }, + "title": "Context Keys", + "type": "array" + }, + "conversation_id": { + "anyOf": [ + { + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Conversation Id" + }, + "coordination_id": { + "minLength": 1, + "title": "Coordination Id", + "type": "string" + }, + "peer_agent": { + "anyOf": [ + { + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Peer Agent" + } + }, + "required": [ + "coordination_id", + "agent", + "action" + ], + "title": "AgentCoordinationPayload", + "type": "object" + }, + "AgentCoordinationTrace": { + "additionalProperties": false, + "description": "Internal trace for agent coordination events.", + "properties": { + "environment": { + "enum": [ + "dev", + "staging", + "prod" + ], + "title": "Environment", + "type": "string" + }, + "event_type": { + "const": "agent_coordination", + "default": "agent_coordination", + "title": "Event Type", + "type": "string" + }, + "extensions": { + "patternProperties": { + "^[a-z][a-z0-9_.-]*$": { + "$ref": "#/$defs/JsonValue" + } + }, + "propertyNames": { + "minLength": 1 + }, + "title": "Extensions", + "type": "object" + }, + "payload": { + "$ref": "#/$defs/AgentCoordinationPayload" + }, + "producer": { + "minLength": 1, + "title": "Producer", + "type": "string" + }, + "project": { + "minLength": 1, + "title": "Project", + "type": "string" + }, + "recorded_at": { + "format": "date-time", + "title": "Recorded At", + "type": "string" + }, + "schema_version": { + "const": "v1", + "default": "v1", + "title": "Schema Version", + "type": "string" + }, + "surface": { + "const": "internal", + "default": "internal", + "title": "Surface", + "type": "string" + }, + "trace_id": { + "minLength": 1, + "title": "Trace Id", + "type": "string" + } + }, + "required": [ + "trace_id", + "recorded_at", + "producer", + "project", + "environment", + "payload" + ], + "title": "AgentCoordinationTrace", + "type": "object" + }, + "DispatchLifecyclePayload": { + "additionalProperties": false, + "description": "Dispatch state transition details.", + "properties": { + "actor": { + "minLength": 1, + "title": "Actor", + "type": "string" + }, + "dispatch_id": { + "minLength": 1, + "title": "Dispatch Id", + "type": "string" + }, + "dispatch_state": { + "enum": [ + "queued", + "in_progress", + "blocked", + "awaiting_review", + "completed", + "cancelled" + ], + "title": "Dispatch State", + "type": "string" + }, + "review_round": { + "anyOf": [ + { + "minimum": 0, + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Review Round" + }, + "task_ref": { + "minLength": 1, + "title": "Task Ref", + "type": "string" + } + }, + "required": [ + "dispatch_id", + "dispatch_state", + "actor", + "task_ref" + ], + "title": "DispatchLifecyclePayload", + "type": "object" + }, + "DispatchLifecycleTrace": { + "additionalProperties": false, + "description": "Internal trace for dispatch lifecycle transitions.", + "properties": { + "environment": { + "enum": [ + "dev", + "staging", + "prod" + ], + "title": "Environment", + "type": "string" + }, + "event_type": { + "const": "dispatch_lifecycle", + "default": "dispatch_lifecycle", + "title": "Event Type", + "type": "string" + }, + "extensions": { + "patternProperties": { + "^[a-z][a-z0-9_.-]*$": { + "$ref": "#/$defs/JsonValue" + } + }, + "propertyNames": { + "minLength": 1 + }, + "title": "Extensions", + "type": "object" + }, + "payload": { + "$ref": "#/$defs/DispatchLifecyclePayload" + }, + "producer": { + "minLength": 1, + "title": "Producer", + "type": "string" + }, + "project": { + "minLength": 1, + "title": "Project", + "type": "string" + }, + "recorded_at": { + "format": "date-time", + "title": "Recorded At", + "type": "string" + }, + "schema_version": { + "const": "v1", + "default": "v1", + "title": "Schema Version", + "type": "string" + }, + "surface": { + "const": "internal", + "default": "internal", + "title": "Surface", + "type": "string" + }, + "trace_id": { + "minLength": 1, + "title": "Trace Id", + "type": "string" + } + }, + "required": [ + "trace_id", + "recorded_at", + "producer", + "project", + "environment", + "payload" + ], + "title": "DispatchLifecycleTrace", + "type": "object" + }, + "InboxProtocolPayload": { + "additionalProperties": false, + "description": "Inbox / outbox protocol event details.", + "properties": { + "inbox_path": { + "minLength": 1, + "title": "Inbox Path", + "type": "string" + }, + "message_id": { + "minLength": 1, + "title": "Message Id", + "type": "string" + }, + "message_type": { + "enum": [ + "task_request", + "question", + "notification", + "follow_up", + "handoff", + "handoff_complete", + "review_request", + "review_feedback", + "review_addressed", + "review_lgtm", + "brainstorm_request", + "brainstorm_followup" + ], + "title": "Message Type", + "type": "string" + }, + "outbox_path": { + "anyOf": [ + { + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Outbox Path" + }, + "parent_message_id": { + "anyOf": [ + { + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Parent Message Id" + }, + "recipient": { + "minLength": 1, + "title": "Recipient", + "type": "string" + }, + "sender": { + "minLength": 1, + "title": "Sender", + "type": "string" + } + }, + "required": [ + "message_id", + "message_type", + "sender", + "recipient", + "inbox_path" + ], + "title": "InboxProtocolPayload", + "type": "object" + }, + "InboxProtocolTrace": { + "additionalProperties": false, + "description": "Internal trace for message protocol lifecycle events.", + "properties": { + "environment": { + "enum": [ + "dev", + "staging", + "prod" + ], + "title": "Environment", + "type": "string" + }, + "event_type": { + "const": "inbox_protocol", + "default": "inbox_protocol", + "title": "Event Type", + "type": "string" + }, + "extensions": { + "patternProperties": { + "^[a-z][a-z0-9_.-]*$": { + "$ref": "#/$defs/JsonValue" + } + }, + "propertyNames": { + "minLength": 1 + }, + "title": "Extensions", + "type": "object" + }, + "payload": { + "$ref": "#/$defs/InboxProtocolPayload" + }, + "producer": { + "minLength": 1, + "title": "Producer", + "type": "string" + }, + "project": { + "minLength": 1, + "title": "Project", + "type": "string" + }, + "recorded_at": { + "format": "date-time", + "title": "Recorded At", + "type": "string" + }, + "schema_version": { + "const": "v1", + "default": "v1", + "title": "Schema Version", + "type": "string" + }, + "surface": { + "const": "internal", + "default": "internal", + "title": "Surface", + "type": "string" + }, + "trace_id": { + "minLength": 1, + "title": "Trace Id", + "type": "string" + } + }, + "required": [ + "trace_id", + "recorded_at", + "producer", + "project", + "environment", + "payload" + ], + "title": "InboxProtocolTrace", + "type": "object" + }, + "JsonValue": {} + }, + "discriminator": { + "mapping": { + "agent_coordination": "#/$defs/AgentCoordinationTrace", + "dispatch_lifecycle": "#/$defs/DispatchLifecycleTrace", + "inbox_protocol": "#/$defs/InboxProtocolTrace" + }, + "propertyName": "event_type" + }, + "oneOf": [ + { + "$ref": "#/$defs/AgentCoordinationTrace" + }, + { + "$ref": "#/$defs/InboxProtocolTrace" + }, + { + "$ref": "#/$defs/DispatchLifecycleTrace" + } + ] +} diff --git a/scripts/render_trace_schemas.py b/scripts/render_trace_schemas.py new file mode 100644 index 0000000..7cbdd69 --- /dev/null +++ b/scripts/render_trace_schemas.py @@ -0,0 +1,28 @@ +"""Render checked-in JSON Schema artifacts for trace validation.""" + +from __future__ import annotations + +import sys +import json +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +SRC = ROOT / "src" +if str(SRC) not in sys.path: + sys.path.insert(0, str(SRC)) + +def _write(path: Path, payload: dict) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + + +def main() -> None: + from agent_estimate.tracing.schema import external_trace_json_schema, internal_trace_json_schema + + schema_root = ROOT / "schemas" / "trace" + _write(schema_root / "internal_trace.schema.json", internal_trace_json_schema()) + _write(schema_root / "external_trace.schema.json", external_trace_json_schema()) + + +if __name__ == "__main__": + main() diff --git a/src/agent_estimate/tracing/__init__.py b/src/agent_estimate/tracing/__init__.py new file mode 100644 index 0000000..7d44112 --- /dev/null +++ b/src/agent_estimate/tracing/__init__.py @@ -0,0 +1,23 @@ +"""Tracing schema helpers for internal and external event validation.""" + +from agent_estimate.tracing.schema import ( + ExternalTrace, + InternalTrace, + external_trace_json_schema, + internal_trace_json_schema, + validate_external_trace, + validate_external_trace_json, + validate_internal_trace, + validate_internal_trace_json, +) + +__all__ = [ + "ExternalTrace", + "InternalTrace", + "external_trace_json_schema", + "internal_trace_json_schema", + "validate_external_trace", + "validate_external_trace_json", + "validate_internal_trace", + "validate_internal_trace_json", +] diff --git a/src/agent_estimate/tracing/schema.py b/src/agent_estimate/tracing/schema.py new file mode 100644 index 0000000..01135bb --- /dev/null +++ b/src/agent_estimate/tracing/schema.py @@ -0,0 +1,309 @@ +"""Strict trace schemas for internal operations vs external customer surfaces.""" + +from __future__ import annotations + +from collections.abc import Mapping +from datetime import datetime +from typing import Annotated, Any, Literal, TypeAlias + +from pydantic import BaseModel, ConfigDict, Field, JsonValue, StringConstraints, TypeAdapter +from pydantic import model_validator + +NonEmptyStr = Annotated[str, StringConstraints(strip_whitespace=True, min_length=1)] +ExtensionKey = Annotated[ + str, + StringConstraints(strip_whitespace=True, min_length=1, pattern=r"^[a-z][a-z0-9_.-]*$"), +] + +MessageType = Literal[ + "task_request", + "question", + "notification", + "follow_up", + "handoff", + "handoff_complete", + "review_request", + "review_feedback", + "review_addressed", + "review_lgtm", + "brainstorm_request", + "brainstorm_followup", +] + +DispatchState = Literal[ + "queued", + "in_progress", + "blocked", + "awaiting_review", + "completed", + "cancelled", +] + +HttpMethod = Literal["GET", "POST", "PUT", "PATCH", "DELETE"] +PipelineStatus = Literal["queued", "running", "completed", "failed", "cancelled"] + +TraceExtensions: TypeAlias = dict[ExtensionKey, JsonValue] + +_INTERNAL_ONLY_KEYS = frozenset( + { + "agent", + "context_keys", + "conversation_id", + "dispatch_id", + "dispatch_state", + "inbox_path", + "message_id", + "message_type", + "outbox_path", + "parent_message_id", + "peer_agent", + "review_round", + "task_ref", + } +) +_INTERNAL_ONLY_PREFIXES = ( + "agent_", + "conversation_", + "dispatch_", + "inbox_", + "outbox_", + "parent_message_", + "review_", +) + + +def _find_internal_key_path(value: JsonValue, path: tuple[str, ...] = ()) -> str | None: + """Return the first nested path that uses a reserved internal key.""" + if isinstance(value, Mapping): + for key, nested in value.items(): + current_path = (*path, key) + if key in _INTERNAL_ONLY_KEYS or key.startswith(_INTERNAL_ONLY_PREFIXES): + return ".".join(current_path) + nested_path = _find_internal_key_path(nested, current_path) + if nested_path: + return nested_path + return None + if isinstance(value, list): + for index, item in enumerate(value): + nested_path = _find_internal_key_path(item, (*path, str(index))) + if nested_path: + return nested_path + return None + + +class SchemaModel(BaseModel): + """Base class for strict trace payloads.""" + + model_config = ConfigDict(extra="forbid") + + +class InternalTraceBase(SchemaModel): + """Common envelope for internal operational traces.""" + + schema_version: Literal["v1"] = "v1" + surface: Literal["internal"] = "internal" + trace_id: NonEmptyStr + recorded_at: datetime + producer: NonEmptyStr + project: NonEmptyStr + environment: Literal["dev", "staging", "prod"] + extensions: TraceExtensions = Field(default_factory=dict) + + +class AgentCoordinationPayload(SchemaModel): + """Coordination traces between agents or orchestrators.""" + + coordination_id: NonEmptyStr + agent: NonEmptyStr + peer_agent: NonEmptyStr | None = None + action: Literal[ + "task_request", + "notification", + "question", + "handoff", + "review_request", + "review_feedback", + "review_lgtm", + "status_update", + ] + conversation_id: NonEmptyStr | None = None + context_keys: list[NonEmptyStr] = Field(default_factory=list) + + +class AgentCoordinationTrace(InternalTraceBase): + """Internal trace for agent coordination events.""" + + event_type: Literal["agent_coordination"] = "agent_coordination" + payload: AgentCoordinationPayload + + +class InboxProtocolPayload(SchemaModel): + """Inbox / outbox protocol event details.""" + + message_id: NonEmptyStr + message_type: MessageType + sender: NonEmptyStr + recipient: NonEmptyStr + inbox_path: NonEmptyStr + outbox_path: NonEmptyStr | None = None + parent_message_id: NonEmptyStr | None = None + + +class InboxProtocolTrace(InternalTraceBase): + """Internal trace for message protocol lifecycle events.""" + + event_type: Literal["inbox_protocol"] = "inbox_protocol" + payload: InboxProtocolPayload + + +class DispatchLifecyclePayload(SchemaModel): + """Dispatch state transition details.""" + + dispatch_id: NonEmptyStr + dispatch_state: DispatchState + actor: NonEmptyStr + task_ref: NonEmptyStr + review_round: Annotated[int, Field(ge=0)] | None = None + + +class DispatchLifecycleTrace(InternalTraceBase): + """Internal trace for dispatch lifecycle transitions.""" + + event_type: Literal["dispatch_lifecycle"] = "dispatch_lifecycle" + payload: DispatchLifecyclePayload + + +class ExternalTraceBase(SchemaModel): + """Common envelope for customer-visible trace events.""" + + schema_version: Literal["v1"] = "v1" + surface: Literal["external"] = "external" + trace_id: NonEmptyStr + recorded_at: datetime + producer: NonEmptyStr + customer_id: NonEmptyStr + workspace_id: NonEmptyStr + environment: Literal["dev", "staging", "prod"] + extensions: TraceExtensions = Field(default_factory=dict) + + @model_validator(mode="after") + def reject_internal_fields(self) -> "ExternalTraceBase": + leak_path = _find_internal_key_path(self.extensions) + if leak_path: + raise ValueError( + "external trace extensions must not contain internal-only fields " + f"(found {leak_path})" + ) + return self + + +class ApiRequestPayload(SchemaModel): + """Customer-visible API request trace.""" + + request_id: NonEmptyStr + method: HttpMethod + route: NonEmptyStr + request_size_bytes: Annotated[int, Field(ge=0)] | None = None + + +class ApiRequestTrace(ExternalTraceBase): + """External trace for inbound API requests.""" + + event_type: Literal["api_request"] = "api_request" + payload: ApiRequestPayload + + +class ApiResponsePayload(SchemaModel): + """Customer-visible API response trace.""" + + request_id: NonEmptyStr + status_code: Annotated[int, Field(ge=100, le=599)] + latency_ms: Annotated[int, Field(ge=0)] + response_size_bytes: Annotated[int, Field(ge=0)] | None = None + cache_hit: bool | None = None + + +class ApiResponseTrace(ExternalTraceBase): + """External trace for API response outcomes.""" + + event_type: Literal["api_response"] = "api_response" + payload: ApiResponsePayload + + +class BillingEventPayload(SchemaModel): + """Billing event emitted to customer-visible ledgers.""" + + billing_event_id: NonEmptyStr + meter: NonEmptyStr + quantity: Annotated[float, Field(ge=0)] + unit: NonEmptyStr + amount_usd: Annotated[float, Field(ge=0)] + currency: Annotated[str, StringConstraints(pattern=r"^[A-Z]{3}$")] = "USD" + billable: bool = True + + +class BillingEventTrace(ExternalTraceBase): + """External trace for customer billing events.""" + + event_type: Literal["billing_event"] = "billing_event" + payload: BillingEventPayload + + +class PipelineStagePayload(SchemaModel): + """Customer-visible pipeline stage state.""" + + pipeline_run_id: NonEmptyStr + stage: NonEmptyStr + status: PipelineStatus + visible_label: NonEmptyStr + duration_ms: Annotated[int, Field(ge=0)] | None = None + + +class PipelineStageTrace(ExternalTraceBase): + """External trace for visible execution pipeline stages.""" + + event_type: Literal["pipeline_stage"] = "pipeline_stage" + payload: PipelineStagePayload + + +InternalTrace = Annotated[ + AgentCoordinationTrace | InboxProtocolTrace | DispatchLifecycleTrace, + Field(discriminator="event_type"), +] +ExternalTrace = Annotated[ + ApiRequestTrace | ApiResponseTrace | BillingEventTrace | PipelineStageTrace, + Field(discriminator="event_type"), +] + +_INTERNAL_TRACE_ADAPTER = TypeAdapter(InternalTrace) +_EXTERNAL_TRACE_ADAPTER = TypeAdapter(ExternalTrace) + + +def validate_internal_trace(payload: Mapping[str, Any]) -> AgentCoordinationTrace | InboxProtocolTrace | DispatchLifecycleTrace: + """Validate a Python mapping as an internal trace event.""" + return _INTERNAL_TRACE_ADAPTER.validate_python(payload) + + +def validate_internal_trace_json(payload: str | bytes) -> AgentCoordinationTrace | InboxProtocolTrace | DispatchLifecycleTrace: + """Validate a JSON document as an internal trace event.""" + return _INTERNAL_TRACE_ADAPTER.validate_json(payload) + + +def validate_external_trace(payload: Mapping[str, Any]) -> ApiRequestTrace | ApiResponseTrace | BillingEventTrace | PipelineStageTrace: + """Validate a Python mapping as an external trace event.""" + return _EXTERNAL_TRACE_ADAPTER.validate_python(payload) + + +def validate_external_trace_json(payload: str | bytes) -> ApiRequestTrace | ApiResponseTrace | BillingEventTrace | PipelineStageTrace: + """Validate a JSON document as an external trace event.""" + return _EXTERNAL_TRACE_ADAPTER.validate_json(payload) + + +def internal_trace_json_schema() -> dict[str, Any]: + """Return the JSON Schema artifact for internal traces.""" + return _INTERNAL_TRACE_ADAPTER.json_schema() + + +def external_trace_json_schema() -> dict[str, Any]: + """Return the JSON Schema artifact for external traces.""" + return _EXTERNAL_TRACE_ADAPTER.json_schema() diff --git a/tests/unit/test_trace_schema.py b/tests/unit/test_trace_schema.py new file mode 100644 index 0000000..65c66d3 --- /dev/null +++ b/tests/unit/test_trace_schema.py @@ -0,0 +1,107 @@ +"""Tests for internal vs external trace schema separation.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest +from pydantic import ValidationError + +from agent_estimate.tracing.schema import ( + external_trace_json_schema, + internal_trace_json_schema, + validate_external_trace, + validate_internal_trace, +) + + +def _repo_root() -> Path: + return Path(__file__).resolve().parents[2] + + +def _internal_trace(**overrides: object) -> dict: + payload: dict = { + "schema_version": "v1", + "surface": "internal", + "trace_id": "trace-int-001", + "recorded_at": "2026-03-09T00:00:00Z", + "producer": "agent-hub", + "project": "agent-estimate", + "environment": "prod", + "event_type": "dispatch_lifecycle", + "payload": { + "dispatch_id": "dispatch-123", + "dispatch_state": "in_progress", + "actor": "codex", + "task_ref": "hq#33/P2", + "review_round": 1, + }, + } + payload.update(overrides) + return payload + + +def _external_trace(**overrides: object) -> dict: + payload: dict = { + "schema_version": "v1", + "surface": "external", + "trace_id": "trace-ext-001", + "recorded_at": "2026-03-09T00:00:00Z", + "producer": "agent-estimate-api", + "customer_id": "cust_123", + "workspace_id": "ws_456", + "environment": "prod", + "event_type": "pipeline_stage", + "payload": { + "pipeline_run_id": "run_123", + "stage": "estimate", + "status": "running", + "visible_label": "Estimating issue bundle", + "duration_ms": 12, + }, + } + payload.update(overrides) + return payload + + +class TestTraceValidation: + def test_internal_trace_accepts_dispatch_lifecycle(self) -> None: + trace = validate_internal_trace(_internal_trace()) + assert trace.surface == "internal" + assert trace.payload.dispatch_id == "dispatch-123" + + def test_external_trace_accepts_pipeline_stage(self) -> None: + trace = validate_external_trace(_external_trace()) + assert trace.surface == "external" + assert trace.payload.stage == "estimate" + + def test_external_trace_rejects_internal_keys_in_extensions(self) -> None: + with pytest.raises(ValidationError, match="internal-only fields"): + validate_external_trace( + _external_trace( + extensions={ + "public_context": { + "conversation_id": "conv-001", + } + } + ) + ) + + def test_external_trace_rejects_internal_surface(self) -> None: + with pytest.raises(ValidationError, match="surface"): + validate_external_trace(_external_trace(surface="internal")) + + def test_internal_trace_rejects_external_event_type(self) -> None: + with pytest.raises(ValidationError, match="event_type"): + validate_internal_trace(_internal_trace(event_type="api_request")) + + +class TestSchemaArtifacts: + def test_internal_schema_artifact_is_in_sync(self) -> None: + schema_path = _repo_root() / "schemas" / "trace" / "internal_trace.schema.json" + assert json.loads(schema_path.read_text(encoding="utf-8")) == internal_trace_json_schema() + + def test_external_schema_artifact_is_in_sync(self) -> None: + schema_path = _repo_root() / "schemas" / "trace" / "external_trace.schema.json" + assert json.loads(schema_path.read_text(encoding="utf-8")) == external_trace_json_schema()