diff --git a/hindsight-api-slim/hindsight_api/extensions/builtin/bank_scoped_tenant.py b/hindsight-api-slim/hindsight_api/extensions/builtin/bank_scoped_tenant.py new file mode 100644 index 000000000..dd45101b6 --- /dev/null +++ b/hindsight-api-slim/hindsight_api/extensions/builtin/bank_scoped_tenant.py @@ -0,0 +1,236 @@ +""" +API Key Tenant Extension with Schema Isolation for Hindsight + +Maps API keys to isolated PostgreSQL schemas, providing database-level +memory isolation between tenants. Each key gets its own schema containing +independent banks, memories, and entities — no application-layer access +checks required. + +Why schema isolation instead of application-layer bank filtering? + The primary threat model is **prompt injection against AI agents**. + Agents execute tool calls (including Hindsight recall/retain) based on + conversation content. A prompt injection delivered via chat message, + email, or web search result can trick an agent into querying any bank + on the same Hindsight instance. + + Application-layer access control (checking bank_id in a validator + extension) is defense-in-depth but not a security boundary — it depends + on every code path calling the validator, and a single missed path or + engine bug grants cross-tenant access. + + Schema isolation is a security boundary. The API key determines the + PostgreSQL schema at authentication time, before any bank lookup or + memory query. Even if an agent is fully compromised by injection, its + queries are physically scoped to its schema. Banks from other schemas + don't exist in its view of the database. + +Configuration: + HINDSIGHT_API_TENANT_EXTENSION=hindsight_api.extensions.builtin.bank_scoped_tenant:ApiKeySchemaTenantExtension + + # Semicolon-separated entries: api_key:schema_name + HINDSIGHT_API_TENANT_KEY_MAP=key1:tenant_alpha;key2:tenant_beta + + # Optional: prefix for schema names (default: none, uses schema name as-is) + HINDSIGHT_API_TENANT_SCHEMA_PREFIX=hs + + # Optional: disable auth for MCP endpoints + HINDSIGHT_API_TENANT_MCP_AUTH_DISABLED=true + +Example: + Two AI agent deployments sharing one Hindsight instance: + + HINDSIGHT_API_TENANT_KEY_MAP=abc123:team_alpha;xyz789:team_beta + + - Agent with key "abc123" → schema "team_alpha" (its own banks, memories) + - Agent with key "xyz789" → schema "team_beta" (its own banks, memories) + - A prompt-injected agent sending recall requests with the wrong bank name + gets "bank not found" — the bank doesn't exist in its schema + - Schemas are auto-created with full table migrations on first access + +License: MIT +""" + +from __future__ import annotations + +import logging +import re + +from hindsight_api.config import get_config +from hindsight_api.extensions.tenant import AuthenticationError, Tenant, TenantContext, TenantExtension +from hindsight_api.models import RequestContext + +logger = logging.getLogger(__name__) + +__all__ = ["ApiKeySchemaTenantExtension"] + +# Schema names must be valid Postgres identifiers +_SCHEMA_RE = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$") + + +def _parse_key_map(raw: str) -> dict[str, str]: + """ + Parse a key map string into a dict of API key → schema name. + + Format: key1:schema1;key2:schema2 + + Returns: + Dict mapping API key strings to schema name strings. + + Raises: + ValueError: If the format is invalid or a schema name is not a valid + Postgres identifier. + """ + result: dict[str, str] = {} + if not raw or not raw.strip(): + return result + + for entry in raw.split(";"): + entry = entry.strip() + if not entry: + continue + if ":" not in entry: + raise ValueError( + f"Invalid key_map entry '{entry}'. " + f"Expected format: 'apikey:schema_name'. " + f"Full format: 'key1:schema1;key2:schema2'" + ) + key, schema = entry.split(":", 1) + key = key.strip() + schema = schema.strip() + if not key: + raise ValueError("Empty API key in key_map") + if not schema: + raise ValueError(f"Empty schema name for key in key_map") + if not _SCHEMA_RE.match(schema): + raise ValueError( + f"Invalid schema name '{schema}'. " + f"Must be a valid Postgres identifier " + f"(letters, digits, underscores, starting with a letter or underscore)." + ) + result[key] = schema + + return result + + +class ApiKeySchemaTenantExtension(TenantExtension): + """ + Tenant extension that maps API keys to isolated PostgreSQL schemas. + + Each API key resolves to a dedicated schema. All database operations + (bank creation, memory storage, recall, reflect) are scoped to that + schema. Schemas are auto-created with full table migrations on first + access. + + This provides database-level isolation — tenants cannot access each + other's data regardless of bank names, query parameters, or + application-layer bugs. + + Configuration: + HINDSIGHT_API_TENANT_EXTENSION=hindsight_api.extensions.builtin.bank_scoped_tenant:ApiKeySchemaTenantExtension + HINDSIGHT_API_TENANT_KEY_MAP=key1:schema1;key2:schema2 + HINDSIGHT_API_TENANT_SCHEMA_PREFIX=hs (optional) + HINDSIGHT_API_TENANT_MCP_AUTH_DISABLED=true (optional) + """ + + def __init__(self, config: dict[str, str]) -> None: + super().__init__(config) + + raw_key_map = config.get("key_map", "") + self.schema_prefix = config.get("schema_prefix", "") + self.key_map = _parse_key_map(raw_key_map) + + if not self.key_map: + raise ValueError( + "HINDSIGHT_API_TENANT_KEY_MAP is required. " + "Format: key1:schema1;key2:schema2" + ) + + if self.schema_prefix and not _SCHEMA_RE.match(self.schema_prefix): + raise ValueError( + f"Invalid schema_prefix '{self.schema_prefix}'. " + f"Must be a valid Postgres identifier." + ) + + self.mcp_auth_disabled = config.get("mcp_auth_disabled", "").lower() in ( + "true", + "1", + "yes", + ) + + # Track initialized schemas to avoid redundant migrations + self._initialized_schemas: set[str] = set() + + # Build full schema names (with optional prefix) + self._key_to_schema: dict[str, str] = {} + for key, schema in self.key_map.items(): + full_schema = f"{self.schema_prefix}_{schema}" if self.schema_prefix else schema + self._key_to_schema[key] = full_schema + + # Log configuration (without revealing full keys) + for key, schema in self._key_to_schema.items(): + masked = key[:4] + "..." + key[-4:] if len(key) > 12 else key[:4] + "..." + logger.info("Tenant key %s -> schema '%s'", masked, schema) + + async def authenticate(self, context: RequestContext) -> TenantContext: + """ + Authenticate API key and return tenant context with isolated schema. + + On first access for a schema, runs database migrations to create + all required tables. + + Args: + context: Request context containing the API key. + + Returns: + TenantContext with schema_name for database isolation. + + Raises: + AuthenticationError: If the API key is missing or not recognized. + """ + if not context.api_key: + raise AuthenticationError( + "Missing API key. Pass via Authorization: Bearer " + ) + + schema_name = self._key_to_schema.get(context.api_key) + if schema_name is None: + raise AuthenticationError("Invalid API key") + + # Initialize schema on first access (creates tables via migration) + if schema_name not in self._initialized_schemas: + await self._initialize_schema(schema_name) + + return TenantContext(schema_name=schema_name) + + async def list_tenants(self) -> list[Tenant]: + """Return all initialized tenant schemas for worker discovery.""" + return [Tenant(schema=schema) for schema in self._initialized_schemas] + + async def authenticate_mcp(self, context: RequestContext) -> TenantContext: + """ + Authenticate MCP requests. + + If mcp_auth_disabled is set, falls back to the default schema + from HINDSIGHT_API_DATABASE_SCHEMA. Otherwise delegates to + authenticate(). + + Note: Disabling MCP auth when using schema isolation means MCP + requests hit the default schema, not a tenant schema. This is + appropriate for admin MCP clients but not for tenant-facing ones. + """ + if self.mcp_auth_disabled: + return TenantContext(schema_name=get_config().database_schema) + return await self.authenticate(context) + + async def _initialize_schema(self, schema_name: str) -> None: + """Run migrations for a new tenant schema and cache the result.""" + logger.info("Initializing schema: %s", schema_name) + try: + await self.context.run_migration(schema_name) + self._initialized_schemas.add(schema_name) + logger.info("Schema ready: %s", schema_name) + except Exception as e: + logger.error( + "Schema initialization failed for %s: %s", schema_name, e + ) + raise AuthenticationError(f"Failed to initialize tenant: {e!s}") diff --git a/hindsight-api-slim/tests/test_bank_scoped.py b/hindsight-api-slim/tests/test_bank_scoped.py new file mode 100644 index 000000000..e7a1c6b9d --- /dev/null +++ b/hindsight-api-slim/tests/test_bank_scoped.py @@ -0,0 +1,175 @@ +"""Tests for API key schema tenant extension.""" + +import pytest + +from hindsight_api.extensions.builtin.bank_scoped_tenant import ( + ApiKeySchemaTenantExtension, + _parse_key_map, +) +from hindsight_api.extensions.tenant import AuthenticationError +from hindsight_api.models import RequestContext + + +# ========================================================================= +# _parse_key_map tests +# ========================================================================= + + +class TestParseKeyMap: + """Tests for the key map parser.""" + + def test_single_entry(self): + result = _parse_key_map("key1:schema_a") + assert result == {"key1": "schema_a"} + + def test_multiple_entries(self): + result = _parse_key_map("key1:schema_a;key2:schema_b") + assert result == {"key1": "schema_a", "key2": "schema_b"} + + def test_whitespace_handling(self): + result = _parse_key_map(" key1 : schema_a ; key2 : schema_b ") + assert result == {"key1": "schema_a", "key2": "schema_b"} + + def test_invalid_no_colon(self): + with pytest.raises(ValueError, match="Expected format"): + _parse_key_map("key1-schema_a") + + def test_invalid_empty_key(self): + with pytest.raises(ValueError, match="Empty API key"): + _parse_key_map(":schema_a") + + def test_invalid_empty_schema(self): + with pytest.raises(ValueError, match="Empty schema name"): + _parse_key_map("key1:") + + def test_invalid_schema_not_postgres_identifier(self): + with pytest.raises(ValueError, match="valid Postgres identifier"): + _parse_key_map("key1:bad-schema") + + +# ========================================================================= +# ApiKeySchemaTenantExtension tests +# ========================================================================= + + +class TestApiKeySchemaTenantExtension: + """Tests for the schema-isolating tenant extension.""" + + def _make_ext(self, key_map: str, **kwargs) -> ApiKeySchemaTenantExtension: + config = {"key_map": key_map, **kwargs} + return ApiKeySchemaTenantExtension(config) + + def test_init_requires_key_map(self): + with pytest.raises(ValueError, match="HINDSIGHT_API_TENANT_KEY_MAP is required"): + ApiKeySchemaTenantExtension({}) + + def test_init_invalid_schema_prefix(self): + with pytest.raises(ValueError, match="Invalid schema_prefix"): + ApiKeySchemaTenantExtension({ + "key_map": "key1:schema1", + "schema_prefix": "bad-prefix", + }) + + def test_schema_names_without_prefix(self): + ext = self._make_ext("key1:team_alpha;key2:team_beta") + assert ext._key_to_schema["key1"] == "team_alpha" + assert ext._key_to_schema["key2"] == "team_beta" + + def test_schema_names_with_prefix(self): + ext = self._make_ext("key1:alpha;key2:beta", schema_prefix="hs") + assert ext._key_to_schema["key1"] == "hs_alpha" + assert ext._key_to_schema["key2"] == "hs_beta" + + @pytest.mark.asyncio + async def test_authenticate_valid_key(self): + ext = self._make_ext("secret123:tenant_a") + ext._initialized_schemas.add("tenant_a") + ctx = RequestContext(api_key="secret123") + result = await ext.authenticate(ctx) + assert result.schema_name == "tenant_a" + + @pytest.mark.asyncio + async def test_authenticate_missing_key(self): + ext = self._make_ext("secret:tenant_a") + with pytest.raises(AuthenticationError, match="Missing API key"): + await ext.authenticate(RequestContext(api_key=None)) + + @pytest.mark.asyncio + async def test_authenticate_wrong_key(self): + ext = self._make_ext("secret:tenant_a") + with pytest.raises(AuthenticationError, match="Invalid API key"): + await ext.authenticate(RequestContext(api_key="wrong-key")) + + @pytest.mark.asyncio + async def test_different_keys_different_schemas(self): + """Core isolation test: two keys resolve to different schemas.""" + ext = self._make_ext("key_a:schema_a;key_b:schema_b") + ext._initialized_schemas.update(["schema_a", "schema_b"]) + + result_a = await ext.authenticate(RequestContext(api_key="key_a")) + result_b = await ext.authenticate(RequestContext(api_key="key_b")) + + assert result_a.schema_name == "schema_a" + assert result_b.schema_name == "schema_b" + assert result_a.schema_name != result_b.schema_name + + @pytest.mark.asyncio + async def test_mcp_auth_disabled_falls_back_to_default(self): + ext = self._make_ext("secret:tenant_a", mcp_auth_disabled="true") + result = await ext.authenticate_mcp(RequestContext(api_key=None)) + assert result.schema_name is not None + + @pytest.mark.asyncio + async def test_mcp_auth_enabled_rejects_missing_key(self): + ext = self._make_ext("secret:tenant_a") + with pytest.raises(AuthenticationError): + await ext.authenticate_mcp(RequestContext(api_key=None)) + + +# ========================================================================= +# Prompt injection defense tests +# ========================================================================= + + +class TestPromptInjectionDefense: + """ + Validates the core security property: an API key can only access its + own schema, regardless of what the agent requests. This is the defense + against prompt injection where a compromised agent tries to access + another tenant's memories. + """ + + @pytest.mark.asyncio + async def test_attacker_key_cannot_reach_victim_schema(self): + """The schema is determined solely by the API key, not the request.""" + ext = ApiKeySchemaTenantExtension({ + "key_map": "victim_key:victim_schema;attacker_key:attacker_schema", + }) + ext._initialized_schemas.update(["victim_schema", "attacker_schema"]) + + result = await ext.authenticate(RequestContext(api_key="attacker_key")) + assert result.schema_name == "attacker_schema" + assert result.schema_name != "victim_schema" + + @pytest.mark.asyncio + async def test_unknown_key_rejected_not_defaulted(self): + """Unknown keys must be rejected, never mapped to a default schema.""" + ext = ApiKeySchemaTenantExtension({ + "key_map": "real_key:real_schema", + }) + + with pytest.raises(AuthenticationError, match="Invalid API key"): + await ext.authenticate(RequestContext(api_key="guessed_key")) + + @pytest.mark.asyncio + async def test_empty_key_rejected(self): + """Empty and None keys must be rejected.""" + ext = ApiKeySchemaTenantExtension({ + "key_map": "real_key:real_schema", + }) + + with pytest.raises(AuthenticationError, match="Missing API key"): + await ext.authenticate(RequestContext(api_key=None)) + + with pytest.raises(AuthenticationError, match="Missing API key"): + await ext.authenticate(RequestContext(api_key=""))