From f9033a1409d2ebf28abf202f9f007abeb7dcc48f Mon Sep 17 00:00:00 2001 From: bntvllnt <32437578+bntvllnt@users.noreply.github.com> Date: Wed, 25 Mar 2026 23:20:31 +0100 Subject: [PATCH 1/4] =?UTF-8?q?feat!:=20v0.2.0=20production=20hardening=20?= =?UTF-8?q?=E2=80=94=20auth=20boundary,=20server-side=20secret=20gen,=20re?= =?UTF-8?q?move=20events+rate-limiter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BREAKING CHANGES: - create()/rotate(): secret material generated server-side, remove hash/lookupPrefix/secretHex from client args - Admin mutations (revoke/disable/enable/update/rotate/getUsage): ownerId now required - apiKeyEvents table removed — structured logs replace event-based audit trail - @convex-dev/rate-limiter removed — rate limiting is integrator's responsibility - @convex-dev/aggregate and @convex-dev/crons removed (unused) - getUsage(): period param removed (counter-only), lastUsedAt removed from return - list()/listByTag(): paginated via .take(100) default limit New features: - ownerId cross-check on all admin mutations (auth boundary) - Input validation: keyPrefix charset, env charset, gracePeriodMs bounds, metadata/scopes/tags size caps - configure() bounds validation + structured audit logging - lastUsedAt/remaining writes decoupled in validate (single merged patch) - lastUsedAt throttled to 60s to reduce OCC contention - revokeByTag expanded to include rotating+disabled statuses - Structured logging replaces all event table writes Tests: 82 passing (was 69), +14 new tests for auth boundary, input validation, bounds --- example/convex/example.test.ts | 247 ++++++++++++++++++---- example/convex/example.ts | 33 ++- src/client/index.ts | 61 +++--- src/client/types.ts | 14 +- src/component/_generated/component.ts | 19 +- src/component/convex.config.ts | 6 - src/component/mutations.ts | 284 ++++++++++++-------------- src/component/queries.ts | 147 +++++-------- src/component/schema.ts | 11 - src/shared.ts | 77 ++++--- 10 files changed, 487 insertions(+), 412 deletions(-) diff --git a/example/convex/example.test.ts b/example/convex/example.test.ts index bbcc29e..167879e 100644 --- a/example/convex/example.test.ts +++ b/example/convex/example.test.ts @@ -3,10 +3,7 @@ import { expect, test, describe, vi, beforeEach, afterEach } from "vitest"; import { convexTest } from "convex-test"; import { api } from "./_generated/api.js"; import { register } from "../../src/test.js"; -import rateLimiterTest from "@convex-dev/rate-limiter/test"; import shardedCounterTest from "@convex-dev/sharded-counter/test"; -import aggregateTest from "@convex-dev/aggregate/test"; -import cronsTest from "@convex-dev/crons/test"; const modules = import.meta.glob("./**/*.ts"); @@ -15,10 +12,7 @@ const HOUR = 3600000; function setup() { const t = convexTest(undefined!, modules); register(t, "apiKeys"); - rateLimiterTest.register(t, "apiKeys/rateLimiter"); shardedCounterTest.register(t, "apiKeys/shardedCounter"); - aggregateTest.register(t, "apiKeys/usageAggregate"); - cronsTest.register(t, "apiKeys/crons"); return t; } @@ -192,7 +186,7 @@ describe("validate", () => { name: "Revoke Me", ownerId: "org_1", }); - await t.mutation(api.example.revokeKey, { keyId: created.keyId }); + await t.mutation(api.example.revokeKey, { keyId: created.keyId, ownerId: "org_1" }); const result = await t.mutation(api.example.validateKey, { key: created.key, }); @@ -206,7 +200,7 @@ describe("validate", () => { name: "Disable Me", ownerId: "org_1", }); - await t.mutation(api.example.disableKey, { keyId: created.keyId }); + await t.mutation(api.example.disableKey, { keyId: created.keyId, ownerId: "org_1" }); const result = await t.mutation(api.example.validateKey, { key: created.key, }); @@ -250,6 +244,7 @@ describe("expiry", () => { }); await t.mutation(api.example.rotateKey, { keyId: created.keyId, + ownerId: "org_1", gracePeriodMs: HOUR, }); @@ -323,12 +318,12 @@ describe("disable / enable", () => { ownerId: "org_1", }); - await t.mutation(api.example.disableKey, { keyId: created.keyId }); + await t.mutation(api.example.disableKey, { keyId: created.keyId, ownerId: "org_1" }); let result = await t.mutation(api.example.validateKey, { key: created.key }); expect(result.valid).toBe(false); expect(result.reason).toBe("disabled"); - await t.mutation(api.example.enableKey, { keyId: created.keyId }); + await t.mutation(api.example.enableKey, { keyId: created.keyId, ownerId: "org_1" }); result = await t.mutation(api.example.validateKey, { key: created.key }); expect(result.valid).toBe(true); }); @@ -339,9 +334,9 @@ describe("disable / enable", () => { name: "Already Disabled", ownerId: "org_1", }); - await t.mutation(api.example.disableKey, { keyId: created.keyId }); + await t.mutation(api.example.disableKey, { keyId: created.keyId, ownerId: "org_1" }); // Second disable should not throw - await t.mutation(api.example.disableKey, { keyId: created.keyId }); + await t.mutation(api.example.disableKey, { keyId: created.keyId, ownerId: "org_1" }); }); test("enable already active key is idempotent", async () => { @@ -351,7 +346,7 @@ describe("disable / enable", () => { ownerId: "org_1", }); // Enable when already active should not throw - await t.mutation(api.example.enableKey, { keyId: created.keyId }); + await t.mutation(api.example.enableKey, { keyId: created.keyId, ownerId: "org_1" }); }); test("disable non-active key throws", async () => { @@ -360,9 +355,9 @@ describe("disable / enable", () => { name: "Revoked", ownerId: "org_1", }); - await t.mutation(api.example.revokeKey, { keyId: created.keyId }); + await t.mutation(api.example.revokeKey, { keyId: created.keyId, ownerId: "org_1" }); await expect( - t.mutation(api.example.disableKey, { keyId: created.keyId }), + t.mutation(api.example.disableKey, { keyId: created.keyId, ownerId: "org_1" }), ).rejects.toThrow("can only disable active keys"); }); @@ -372,9 +367,9 @@ describe("disable / enable", () => { name: "Revoked", ownerId: "org_1", }); - await t.mutation(api.example.revokeKey, { keyId: created.keyId }); + await t.mutation(api.example.revokeKey, { keyId: created.keyId, ownerId: "org_1" }); await expect( - t.mutation(api.example.enableKey, { keyId: created.keyId }), + t.mutation(api.example.enableKey, { keyId: created.keyId, ownerId: "org_1" }), ).rejects.toThrow("can only enable disabled keys"); }); }); @@ -393,6 +388,7 @@ describe("update", () => { await t.mutation(api.example.updateKey, { keyId: created.keyId, + ownerId: "org_1", name: "Renamed", scopes: ["read", "write"], tags: ["v2"], @@ -414,7 +410,7 @@ describe("update", () => { ownerId: "org_1", }); // Should not throw - await t.mutation(api.example.updateKey, { keyId: created.keyId }); + await t.mutation(api.example.updateKey, { keyId: created.keyId, ownerId: "org_1" }); }); test("update terminal key throws", async () => { @@ -423,10 +419,11 @@ describe("update", () => { name: "Terminal", ownerId: "org_1", }); - await t.mutation(api.example.revokeKey, { keyId: created.keyId }); + await t.mutation(api.example.revokeKey, { keyId: created.keyId, ownerId: "org_1" }); await expect( t.mutation(api.example.updateKey, { keyId: created.keyId, + ownerId: "org_1", name: "Can't", }), ).rejects.toThrow("cannot update terminal key"); @@ -441,6 +438,7 @@ describe("update", () => { await expect( t.mutation(api.example.updateKey, { keyId: created.keyId, + ownerId: "org_1", tags: ["-nope"], }), ).rejects.toThrow("Invalid tag"); @@ -456,9 +454,15 @@ describe("revoke", () => { name: "Double Revoke", ownerId: "org_1", }); - await t.mutation(api.example.revokeKey, { keyId: created.keyId }); + await t.mutation(api.example.revokeKey, { + keyId: created.keyId, + ownerId: "org_1", + }); // Second revoke should not throw - await t.mutation(api.example.revokeKey, { keyId: created.keyId }); + await t.mutation(api.example.revokeKey, { + keyId: created.keyId, + ownerId: "org_1", + }); }); }); @@ -524,6 +528,7 @@ describe("rotate", () => { const rotated = await t.mutation(api.example.rotateKey, { keyId: created.keyId, + ownerId: "org_1", gracePeriodMs: HOUR, }); @@ -553,6 +558,7 @@ describe("rotate", () => { const rotated = await t.mutation(api.example.rotateKey, { keyId: created.keyId, + ownerId: "org_1", }); expect(rotated.newKey).toContain("_test_"); @@ -570,9 +576,9 @@ describe("rotate", () => { name: "Revoked", ownerId: "org_1", }); - await t.mutation(api.example.revokeKey, { keyId: created.keyId }); + await t.mutation(api.example.revokeKey, { keyId: created.keyId, ownerId: "org_1" }); await expect( - t.mutation(api.example.rotateKey, { keyId: created.keyId }), + t.mutation(api.example.rotateKey, { keyId: created.keyId, ownerId: "org_1" }), ).rejects.toThrow("cannot rotate a terminal key"); }); }); @@ -632,7 +638,7 @@ describe("list", () => { ownerId: "org_1", env: "live", }); - await t.mutation(api.example.disableKey, { keyId: created.keyId }); + await t.mutation(api.example.disableKey, { keyId: created.keyId, ownerId: "org_1" }); const active = await t.query(api.example.listKeys, { ownerId: "org_1", @@ -652,7 +658,7 @@ describe("list", () => { name: "Revoked", ownerId: "org_1", }); - await t.mutation(api.example.revokeKey, { keyId: toRevoke.keyId }); + await t.mutation(api.example.revokeKey, { keyId: toRevoke.keyId, ownerId: "org_1" }); const active = await t.query(api.example.listKeys, { ownerId: "org_1", @@ -703,26 +709,9 @@ describe("getUsage", () => { const usage = await t.query(api.example.getUsage, { keyId: created.keyId, - }); - expect(usage.total).toBe(2); - expect(usage.lastUsedAt).toBeDefined(); - }); - - test("returns usage for a time period", async () => { - const t = setup(); - const before = Date.now(); - const created = await t.mutation(api.example.createKey, { - name: "Period Key", ownerId: "org_1", }); - await t.mutation(api.example.validateKey, { key: created.key }); - const after = Date.now(); - - const usage = await t.query(api.example.getUsage, { - keyId: created.keyId, - period: { start: before, end: after + 1000 }, - }); - expect(usage.total).toBe(1); + expect(usage.total).toBe(2); }); test("returns remaining for finite-use keys", async () => { @@ -736,6 +725,7 @@ describe("getUsage", () => { const usage = await t.query(api.example.getUsage, { keyId: created.keyId, + ownerId: "org_1", }); expect(usage.remaining).toBe(4); }); @@ -759,14 +749,181 @@ describe("parseKeyString edge cases", () => { describe("configure", () => { test("sets and updates configuration", async () => { const t = setup(); - // First call inserts await t.mutation(api.example.configureKeys, { cleanupIntervalMs: 3600000, defaultExpiryMs: 86400000, }); - // Second call patches await t.mutation(api.example.configureKeys, { cleanupIntervalMs: 7200000, }); }); + + test("rejects negative cleanupIntervalMs", async () => { + const t = setup(); + await expect( + t.mutation(api.example.configureKeys, { cleanupIntervalMs: -1 }), + ).rejects.toThrow("cleanupIntervalMs must be > 0"); + }); + + test("rejects zero defaultExpiryMs", async () => { + const t = setup(); + await expect( + t.mutation(api.example.configureKeys, { defaultExpiryMs: 0 }), + ).rejects.toThrow("defaultExpiryMs must be > 0"); + }); +}); + +// ─── auth boundary (cross-tenant) ─────────────────────────────── + +describe("auth boundary", () => { + test("revoke rejects wrong ownerId", async () => { + const t = setup(); + const created = await t.mutation(api.example.createKey, { + name: "Auth Test", + ownerId: "org_1", + }); + await expect( + t.mutation(api.example.revokeKey, { keyId: created.keyId, ownerId: "org_2" }), + ).rejects.toThrow("unauthorized"); + }); + + test("disable rejects wrong ownerId", async () => { + const t = setup(); + const created = await t.mutation(api.example.createKey, { + name: "Auth Test", + ownerId: "org_1", + }); + await expect( + t.mutation(api.example.disableKey, { keyId: created.keyId, ownerId: "org_2" }), + ).rejects.toThrow("unauthorized"); + }); + + test("enable rejects wrong ownerId", async () => { + const t = setup(); + const created = await t.mutation(api.example.createKey, { + name: "Auth Test", + ownerId: "org_1", + }); + await t.mutation(api.example.disableKey, { keyId: created.keyId, ownerId: "org_1" }); + await expect( + t.mutation(api.example.enableKey, { keyId: created.keyId, ownerId: "org_2" }), + ).rejects.toThrow("unauthorized"); + }); + + test("update rejects wrong ownerId", async () => { + const t = setup(); + const created = await t.mutation(api.example.createKey, { + name: "Auth Test", + ownerId: "org_1", + }); + await expect( + t.mutation(api.example.updateKey, { keyId: created.keyId, ownerId: "org_2", name: "Hacked" }), + ).rejects.toThrow("unauthorized"); + }); + + test("rotate rejects wrong ownerId", async () => { + const t = setup(); + const created = await t.mutation(api.example.createKey, { + name: "Auth Test", + ownerId: "org_1", + }); + await expect( + t.mutation(api.example.rotateKey, { keyId: created.keyId, ownerId: "org_2" }), + ).rejects.toThrow("unauthorized"); + }); + + test("getUsage rejects wrong ownerId", async () => { + const t = setup(); + const created = await t.mutation(api.example.createKey, { + name: "Auth Test", + ownerId: "org_1", + }); + await expect( + t.query(api.example.getUsage, { keyId: created.keyId, ownerId: "org_2" }), + ).rejects.toThrow("unauthorized"); + }); +}); + +// ─── input validation ─────────────────────────────────────────── + +describe("input validation", () => { + test("rejects env with underscores", async () => { + const t = setup(); + await expect( + t.mutation(api.example.createKey, { + name: "Bad Env", + ownerId: "org_1", + env: "live_test", + }), + ).rejects.toThrow("Invalid env"); + }); + + test("rejects metadata over 4KB", async () => { + const t = setup(); + await expect( + t.mutation(api.example.createKey, { + name: "Big Meta", + ownerId: "org_1", + metadata: { data: "x".repeat(5000) }, + }), + ).rejects.toThrow("metadata must be <= 4096 bytes"); + }); + + test("rejects more than 50 scopes", async () => { + const t = setup(); + const scopes = Array.from({ length: 51 }, (_, i) => `scope${i}`); + await expect( + t.mutation(api.example.createKey, { + name: "Too Many Scopes", + ownerId: "org_1", + scopes, + }), + ).rejects.toThrow("scopes must have <= 50 entries"); + }); + + test("rejects more than 20 tags", async () => { + const t = setup(); + const tags = Array.from({ length: 21 }, (_, i) => `tag${i}`); + await expect( + t.mutation(api.example.createKey, { + name: "Too Many Tags", + ownerId: "org_1", + tags, + }), + ).rejects.toThrow("tags must have <= 20 entries"); + }); +}); + +// ─── gracePeriodMs bounds ─────────────────────────────────────── + +describe("gracePeriodMs bounds", () => { + test("rejects gracePeriodMs below minimum (60s)", async () => { + const t = setup(); + const created = await t.mutation(api.example.createKey, { + name: "Grace Test", + ownerId: "org_1", + }); + await expect( + t.mutation(api.example.rotateKey, { + keyId: created.keyId, + ownerId: "org_1", + gracePeriodMs: 59999, + }), + ).rejects.toThrow("gracePeriodMs must be between"); + }); + + test("rejects gracePeriodMs above maximum (30 days)", async () => { + const t = setup(); + const created = await t.mutation(api.example.createKey, { + name: "Grace Test", + ownerId: "org_1", + }); + await expect( + t.mutation(api.example.rotateKey, { + keyId: created.keyId, + ownerId: "org_1", + gracePeriodMs: 30 * 24 * 60 * 60 * 1000 + 1, + }), + ).rejects.toThrow("gracePeriodMs must be between"); + }); }); diff --git a/example/convex/example.ts b/example/convex/example.ts index d7cdad4..d8bc87a 100644 --- a/example/convex/example.ts +++ b/example/convex/example.ts @@ -3,8 +3,6 @@ import { v } from "convex/values"; import { ApiKeys } from "../../src/client/index.js"; import { components } from "./_generated/api.js"; -const MINUTE = 60 * 1000; - const apiKeys = new ApiKeys(components.apiKeys, { prefix: "myapp", }); @@ -107,10 +105,10 @@ export const listByTag = query({ }); export const revokeKey = mutation({ - args: { keyId: v.string() }, + args: { keyId: v.string(), ownerId: v.string() }, returns: v.null(), - handler: async (ctx, { keyId }) => { - await apiKeys.revoke(ctx, { keyId }); + handler: async (ctx, { keyId, ownerId }) => { + await apiKeys.revoke(ctx, { keyId, ownerId }); return null; }, }); @@ -129,17 +127,19 @@ export const revokeByTag = mutation({ export const rotateKey = mutation({ args: { keyId: v.string(), + ownerId: v.string(), gracePeriodMs: v.optional(v.number()), }, returns: v.any(), - handler: async (ctx, { keyId, gracePeriodMs }) => { - return await apiKeys.rotate(ctx, { keyId, gracePeriodMs }); + handler: async (ctx, { keyId, ownerId, gracePeriodMs }) => { + return await apiKeys.rotate(ctx, { keyId, ownerId, gracePeriodMs }); }, }); export const updateKey = mutation({ args: { keyId: v.string(), + ownerId: v.string(), name: v.optional(v.string()), scopes: v.optional(v.array(v.string())), tags: v.optional(v.array(v.string())), @@ -153,19 +153,19 @@ export const updateKey = mutation({ }); export const disableKey = mutation({ - args: { keyId: v.string() }, + args: { keyId: v.string(), ownerId: v.string() }, returns: v.null(), - handler: async (ctx, { keyId }) => { - await apiKeys.disable(ctx, { keyId }); + handler: async (ctx, { keyId, ownerId }) => { + await apiKeys.disable(ctx, { keyId, ownerId }); return null; }, }); export const enableKey = mutation({ - args: { keyId: v.string() }, + args: { keyId: v.string(), ownerId: v.string() }, returns: v.null(), - handler: async (ctx, { keyId }) => { - await apiKeys.enable(ctx, { keyId }); + handler: async (ctx, { keyId, ownerId }) => { + await apiKeys.enable(ctx, { keyId, ownerId }); return null; }, }); @@ -173,12 +173,7 @@ export const enableKey = mutation({ export const getUsage = query({ args: { keyId: v.string(), - period: v.optional( - v.object({ - start: v.number(), - end: v.number(), - }), - ), + ownerId: v.string(), }, returns: v.any(), handler: async (ctx, args) => { diff --git a/src/client/index.ts b/src/client/index.ts index f6cd4eb..8495322 100644 --- a/src/client/index.ts +++ b/src/client/index.ts @@ -1,6 +1,5 @@ import type { GenericMutationCtx, GenericQueryCtx, GenericDataModel } from "convex/server"; import type { ComponentApi } from "../component/_generated/component.js"; -import { sha256Hex } from "../shared.js"; import type { CreateKeyOptions, CreateKeyResult, @@ -31,14 +30,6 @@ export interface ApiKeysConfig { defaultType?: KeyType; } -function generateRandomHex(length: number): string { - const bytes = new Uint8Array(length / 2); - crypto.getRandomValues(bytes); - return Array.from(bytes) - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); -} - export class ApiKeys { public component: ComponentApi; private prefix: string; @@ -54,30 +45,17 @@ export class ApiKeys { ctx: RunMutationCtx, options: CreateKeyOptions, ): Promise { - const type = options.type ?? this.defaultType; - const typeShort = type === "publishable" ? "pub" : "secret"; - const env = options.env ?? "live"; - - const lookupPrefix = generateRandomHex(8); - const secretHex = generateRandomHex(64); - - const rawKey = [this.prefix, typeShort, env, lookupPrefix, secretHex].join("_"); - const hash = await sha256Hex(rawKey); - const result = await ctx.runMutation(this.component.mutations.create, { name: options.name, ownerId: options.ownerId, - type, + type: options.type ?? this.defaultType, scopes: options.scopes, tags: options.tags, - env, + env: options.env ?? "live", metadata: options.metadata, remaining: options.remaining, expiresAt: options.expiresAt, keyPrefix: this.prefix, - lookupPrefix, - secretHex, - hash, }); return { keyId: result.keyId as string, key: result.key }; @@ -93,8 +71,11 @@ export class ApiKeys { ) as ValidationResult; } - async revoke(ctx: RunMutationCtx, args: { keyId: string }): Promise { - await ctx.runMutation(this.component.mutations.revoke, { keyId: args.keyId }); + async revoke(ctx: RunMutationCtx, args: { keyId: string; ownerId: string }): Promise { + await ctx.runMutation(this.component.mutations.revoke, { + keyId: args.keyId, + ownerId: args.ownerId, + }); } async revokeByTag( @@ -106,16 +87,12 @@ export class ApiKeys { async rotate( ctx: RunMutationCtx, - args: { keyId: string; gracePeriodMs?: number }, + args: { keyId: string; ownerId: string; gracePeriodMs?: number }, ): Promise { - const lookupPrefix = generateRandomHex(8); - const secretHex = generateRandomHex(64); - return await ctx.runMutation(this.component.mutations.rotate, { keyId: args.keyId, + ownerId: args.ownerId, gracePeriodMs: args.gracePeriodMs, - lookupPrefix, - secretHex, }) as RotateResult; } @@ -143,6 +120,7 @@ export class ApiKeys { ctx: RunMutationCtx, args: { keyId: string; + ownerId: string; name?: string; scopes?: string[]; tags?: string[]; @@ -151,6 +129,7 @@ export class ApiKeys { ): Promise { await ctx.runMutation(this.component.mutations.update, { keyId: args.keyId, + ownerId: args.ownerId, name: args.name, scopes: args.scopes, tags: args.tags, @@ -158,21 +137,27 @@ export class ApiKeys { }); } - async disable(ctx: RunMutationCtx, args: { keyId: string }): Promise { - await ctx.runMutation(this.component.mutations.disable, { keyId: args.keyId }); + async disable(ctx: RunMutationCtx, args: { keyId: string; ownerId: string }): Promise { + await ctx.runMutation(this.component.mutations.disable, { + keyId: args.keyId, + ownerId: args.ownerId, + }); } - async enable(ctx: RunMutationCtx, args: { keyId: string }): Promise { - await ctx.runMutation(this.component.mutations.enable, { keyId: args.keyId }); + async enable(ctx: RunMutationCtx, args: { keyId: string; ownerId: string }): Promise { + await ctx.runMutation(this.component.mutations.enable, { + keyId: args.keyId, + ownerId: args.ownerId, + }); } async getUsage( ctx: RunQueryCtx, - args: { keyId: string; period?: { start: number; end: number } }, + args: { keyId: string; ownerId: string }, ): Promise { return await ctx.runQuery(this.component.queries.getUsage, { keyId: args.keyId, - period: args.period, + ownerId: args.ownerId, }) as UsageStats; } diff --git a/src/client/types.ts b/src/client/types.ts index 9c53d06..ab760f3 100644 --- a/src/client/types.ts +++ b/src/client/types.ts @@ -1,6 +1,6 @@ -import type { KeyType, KeyStatus, EventType } from "../shared.js"; +import type { KeyType, KeyStatus } from "../shared.js"; -export type { KeyType, KeyStatus, EventType }; +export type { KeyType, KeyStatus }; export interface CreateKeyOptions { name: string; @@ -58,7 +58,6 @@ export interface KeyMetadata { export interface UsageStats { total: number; remaining?: number; - lastUsedAt?: number; } export interface RotateResult { @@ -66,12 +65,3 @@ export interface RotateResult { newKey: string; oldKeyExpiresAt: number; } - -export interface KeyEvent { - keyId: string; - ownerId: string; - eventType: EventType; - reason?: string; - metadata?: Record; - timestamp: number; -} diff --git a/src/component/_generated/component.ts b/src/component/_generated/component.ts index bf99cbf..b7a9bd3 100644 --- a/src/component/_generated/component.ts +++ b/src/component/_generated/component.ts @@ -37,15 +37,12 @@ export type ComponentApi = { env?: string; expiresAt?: number; - hash: string; keyPrefix?: string; - lookupPrefix: string; metadata?: any; name: string; ownerId: string; remaining?: number; scopes?: Array; - secretHex: string; tags?: Array; type?: "secret" | "publishable"; }, @@ -55,21 +52,21 @@ export type ComponentApi = disable: FunctionReference< "mutation", "internal", - { keyId: string }, + { keyId: string; ownerId: string }, null, Name >; enable: FunctionReference< "mutation", "internal", - { keyId: string }, + { keyId: string; ownerId: string }, null, Name >; revoke: FunctionReference< "mutation", "internal", - { keyId: string }, + { keyId: string; ownerId: string }, null, Name >; @@ -86,8 +83,7 @@ export type ComponentApi = { gracePeriodMs?: number; keyId: string; - lookupPrefix: string; - secretHex: string; + ownerId: string; }, { newKey: string; newKeyId: string; oldKeyExpiresAt: number }, Name @@ -99,6 +95,7 @@ export type ComponentApi = keyId: string; metadata?: any; name?: string; + ownerId: string; scopes?: Array; tags?: Array; }, @@ -120,7 +117,7 @@ export type ComponentApi = type: "secret" | "publishable"; valid: true; } - | { reason: string; retryAfter?: number; valid: false }, + | { reason: string; valid: false }, Name >; }; @@ -128,8 +125,8 @@ export type ComponentApi = getUsage: FunctionReference< "query", "internal", - { keyId: string; period?: { end: number; start: number } }, - { lastUsedAt?: number; remaining?: number; total: number }, + { keyId: string; ownerId: string }, + { remaining?: number; total: number }, Name >; list: FunctionReference< diff --git a/src/component/convex.config.ts b/src/component/convex.config.ts index e78cf1a..2c0771c 100644 --- a/src/component/convex.config.ts +++ b/src/component/convex.config.ts @@ -1,14 +1,8 @@ import { defineComponent } from "convex/server"; -import rateLimiter from "@convex-dev/rate-limiter/convex.config.js"; import shardedCounter from "@convex-dev/sharded-counter/convex.config.js"; -import aggregate from "@convex-dev/aggregate/convex.config.js"; -import crons from "@convex-dev/crons/convex.config.js"; const component = defineComponent("apiKeys"); -component.use(rateLimiter); component.use(shardedCounter); -component.use(aggregate, { name: "usageAggregate" }); -component.use(crons); export default component; diff --git a/src/component/mutations.ts b/src/component/mutations.ts index 5eecfa6..3e5a9b2 100644 --- a/src/component/mutations.ts +++ b/src/component/mutations.ts @@ -1,16 +1,17 @@ import { v } from "convex/values"; import { mutation } from "./_generated/server.js"; import { components } from "./_generated/api.js"; -import { RateLimiter, MINUTE, HOUR } from "@convex-dev/rate-limiter"; import { ShardedCounter } from "@convex-dev/sharded-counter"; import { - KEY_STATUS, KEY_TYPE, TERMINAL_STATUSES, parseKeyString, timingSafeEqual, sha256Hex, validateTags, + validateKeyPrefix, + validateEnv, + validateSizeLimits, KEY_PREFIX_SEPARATOR, } from "../shared.js"; import { createLogger } from "../log.js"; @@ -19,13 +20,16 @@ import type { KeyStatus } from "../shared.js"; const log = createLogger("api-keys"); -const rateLimiter = new RateLimiter(components.rateLimiter, { - createKey: { kind: "fixed window", rate: 100, period: HOUR }, - validateKey: { kind: "token bucket", rate: 1000, period: MINUTE }, -}); - const counter = new ShardedCounter(components.shardedCounter); +function generateRandomHex(length: number): string { + const bytes = new Uint8Array(length / 2); + crypto.getRandomValues(bytes); + return Array.from(bytes) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); +} + export const create = mutation({ args: { name: v.string(), @@ -38,9 +42,6 @@ export const create = mutation({ remaining: v.optional(v.number()), expiresAt: v.optional(v.number()), keyPrefix: v.optional(v.string()), - lookupPrefix: v.string(), - secretHex: v.string(), - hash: v.string(), }, returns: v.object({ keyId: v.id("apiKeys"), @@ -57,19 +58,25 @@ export const create = mutation({ validateTags(args.tags); } - await rateLimiter.limit(ctx, "createKey", { - key: args.ownerId, - throws: true, - }); + const prefix = args.keyPrefix ?? "vk"; + validateKeyPrefix(prefix); + const env = args.env ?? "live"; + validateEnv(env); + validateSizeLimits({ metadata: args.metadata, scopes: args.scopes, tags: args.tags, name: args.name }); const type = args.type ?? "secret"; - const env = args.env ?? "live"; - const prefix = args.keyPrefix ?? "vk"; const typeShort = type === "publishable" ? "pub" : "secret"; + const lookupPrefix = generateRandomHex(8); + const secretHex = generateRandomHex(64); + const rawKey = [prefix, typeShort, env, lookupPrefix, secretHex].join( + KEY_PREFIX_SEPARATOR, + ); + const hash = await sha256Hex(rawKey); + const keyId = await ctx.db.insert("apiKeys", { - hash: args.hash, - lookupPrefix: args.lookupPrefix, + hash, + lookupPrefix, keyPrefix: prefix, type, env, @@ -83,22 +90,13 @@ export const create = mutation({ expiresAt: args.expiresAt, }); - await ctx.db.insert("apiKeyEvents", { - keyId, - ownerId: args.ownerId, - eventType: "key.created", - timestamp: Date.now(), - }); - - const rawKey = [prefix, typeShort, env, args.lookupPrefix, args.secretHex].join( - KEY_PREFIX_SEPARATOR, - ); - - log.info("key created", { keyId, ownerId: args.ownerId, type, env }); + log.info("key.created", { keyId, ownerId: args.ownerId, type, env }); return { keyId, key: rawKey }; }, }); +const LAST_USED_AT_THROTTLE_MS = 60_000; + export const validate = mutation({ args: { key: v.string(), @@ -118,12 +116,12 @@ export const validate = mutation({ v.object({ valid: v.literal(false), reason: v.string(), - retryAfter: v.optional(v.number()), }), ), handler: async (ctx, { key }) => { const parsed = parseKeyString(key); if (!parsed.valid) { + log.info("key.validate_failed", { reason: "malformed" }); return { valid: false as const, reason: "malformed" }; } @@ -133,6 +131,7 @@ export const validate = mutation({ .collect(); if (candidates.length === 0) { + log.info("key.validate_failed", { reason: "not_found", lookupPrefix: parsed.lookupPrefix }); return { valid: false as const, reason: "not_found" }; } @@ -147,45 +146,30 @@ export const validate = mutation({ } if (!matchedKey) { + log.info("key.validate_failed", { reason: "not_found", lookupPrefix: parsed.lookupPrefix }); return { valid: false as const, reason: "not_found" }; } const now = Date.now(); if (matchedKey.status === "revoked") { - await ctx.db.insert("apiKeyEvents", { - keyId: matchedKey._id, - ownerId: matchedKey.ownerId, - eventType: "key.validate_failed", - reason: "revoked", - timestamp: now, - }); + log.info("key.validate_failed", { keyId: matchedKey._id, reason: "revoked" }); return { valid: false as const, reason: "revoked" }; } if (matchedKey.status === "disabled") { - await ctx.db.insert("apiKeyEvents", { - keyId: matchedKey._id, - ownerId: matchedKey.ownerId, - eventType: "key.validate_failed", - reason: "disabled", - timestamp: now, - }); + log.info("key.validate_failed", { keyId: matchedKey._id, reason: "disabled" }); return { valid: false as const, reason: "disabled" }; } if (matchedKey.status === "exhausted") { + log.info("key.validate_failed", { keyId: matchedKey._id, reason: "exhausted" }); return { valid: false as const, reason: "exhausted" }; } if (matchedKey.expiresAt && matchedKey.expiresAt <= now) { await ctx.db.patch(matchedKey._id, { status: "expired" }); - await ctx.db.insert("apiKeyEvents", { - keyId: matchedKey._id, - ownerId: matchedKey.ownerId, - eventType: "key.expired", - timestamp: now, - }); + log.info("key.expired", { keyId: matchedKey._id }); return { valid: false as const, reason: "expired" }; } @@ -195,13 +179,7 @@ export const validate = mutation({ matchedKey.gracePeriodEnd <= now ) { await ctx.db.patch(matchedKey._id, { status: "expired" }); - await ctx.db.insert("apiKeyEvents", { - keyId: matchedKey._id, - ownerId: matchedKey.ownerId, - eventType: "key.expired", - reason: "grace_period_ended", - timestamp: now, - }); + log.info("key.expired", { keyId: matchedKey._id, reason: "grace_period_ended" }); return { valid: false as const, reason: "expired" }; } @@ -209,58 +187,35 @@ export const validate = mutation({ if (matchedKey.remaining !== undefined) { if (matchedKey.remaining <= 0) { await ctx.db.patch(matchedKey._id, { status: "exhausted" }); - await ctx.db.insert("apiKeyEvents", { - keyId: matchedKey._id, - ownerId: matchedKey.ownerId, - eventType: "key.exhausted", - timestamp: now, - }); + log.info("key.exhausted", { keyId: matchedKey._id }); return { valid: false as const, reason: "exhausted" }; } newRemaining = matchedKey.remaining - 1; - const updates: Record = { - remaining: newRemaining, - lastUsedAt: now, - }; - if (newRemaining === 0) { - updates.status = "exhausted"; - } - await ctx.db.patch(matchedKey._id, updates); + } + const shouldUpdateLastUsed = + !matchedKey.lastUsedAt || now - matchedKey.lastUsedAt >= LAST_USED_AT_THROTTLE_MS; + + const patch: Record = {}; + if (newRemaining !== matchedKey.remaining) { + patch.remaining = newRemaining; if (newRemaining === 0) { - await ctx.db.insert("apiKeyEvents", { - keyId: matchedKey._id, - ownerId: matchedKey.ownerId, - eventType: "key.exhausted", - timestamp: now, - }); + patch.status = "exhausted"; } - } else { - await ctx.db.patch(matchedKey._id, { lastUsedAt: now }); } - - const { ok, retryAfter } = await rateLimiter.limit(ctx, "validateKey", { - key: matchedKey._id, - throws: false, - }); - if (!ok) { - await ctx.db.insert("apiKeyEvents", { - keyId: matchedKey._id, - ownerId: matchedKey.ownerId, - eventType: "key.rate_limited", - timestamp: now, - }); - return { valid: false as const, reason: "rate_limited", retryAfter }; + if (shouldUpdateLastUsed) { + patch.lastUsedAt = now; + } + if (Object.keys(patch).length > 0) { + await ctx.db.patch(matchedKey._id, patch); + } + if (newRemaining === 0) { + log.info("key.exhausted", { keyId: matchedKey._id }); } await counter.add(ctx, matchedKey._id, 1); - await ctx.db.insert("apiKeyEvents", { - keyId: matchedKey._id, - ownerId: matchedKey.ownerId, - eventType: "key.validated", - timestamp: now, - }); + log.info("key.validated", { keyId: matchedKey._id, ownerId: matchedKey.ownerId }); return { valid: true as const, @@ -279,24 +234,22 @@ export const validate = mutation({ export const revoke = mutation({ args: { keyId: v.id("apiKeys"), + ownerId: v.string(), }, returns: v.null(), - handler: async (ctx, { keyId }) => { + handler: async (ctx, { keyId, ownerId }) => { const key = await ctx.db.get(keyId); if (!key) { throw new Error("key not found"); } + if (key.ownerId !== ownerId) { + throw new Error("unauthorized: key does not belong to owner"); + } if (key.status === "revoked") { return null; } await ctx.db.patch(keyId, { status: "revoked", revokedAt: Date.now() }); - await ctx.db.insert("apiKeyEvents", { - keyId, - ownerId: key.ownerId, - eventType: "key.revoked", - timestamp: Date.now(), - }); - log.info("key revoked", { keyId, ownerId: key.ownerId }); + log.info("key.revoked", { keyId, ownerId }); return null; }, }); @@ -308,36 +261,41 @@ export const revokeByTag = mutation({ }, returns: v.object({ revokedCount: v.number() }), handler: async (ctx, { ownerId, tag }) => { - const keys = await ctx.db + const activeKeys = await ctx.db .query("apiKeys") .withIndex("by_owner_status", (q) => q.eq("ownerId", ownerId).eq("status", "active")) .collect(); + const rotatingKeys = await ctx.db + .query("apiKeys") + .withIndex("by_owner_status", (q) => q.eq("ownerId", ownerId).eq("status", "rotating")) + .collect(); + const disabledKeys = await ctx.db + .query("apiKeys") + .withIndex("by_owner_status", (q) => q.eq("ownerId", ownerId).eq("status", "disabled")) + .collect(); + const allKeys = [...activeKeys, ...rotatingKeys, ...disabledKeys]; let revokedCount = 0; const now = Date.now(); - for (const key of keys) { + for (const key of allKeys) { if (key.tags.includes(tag)) { await ctx.db.patch(key._id, { status: "revoked", revokedAt: now }); - await ctx.db.insert("apiKeyEvents", { - keyId: key._id, - ownerId, - eventType: "key.revoked", - reason: `bulk_revoke_by_tag:${tag}`, - timestamp: now, - }); revokedCount++; } } + log.info("key.bulk_revoked", { ownerId, tag, revokedCount }); return { revokedCount }; }, }); +const MIN_GRACE_PERIOD_MS = 60_000; +const MAX_GRACE_PERIOD_MS = 30 * 24 * 60 * 60 * 1000; + export const rotate = mutation({ args: { keyId: v.id("apiKeys"), + ownerId: v.string(), gracePeriodMs: v.optional(v.number()), - lookupPrefix: v.string(), - secretHex: v.string(), }, returns: v.object({ newKeyId: v.id("apiKeys"), @@ -349,12 +307,21 @@ export const rotate = mutation({ if (!oldKey) { throw new Error("key not found"); } + if (oldKey.ownerId !== args.ownerId) { + throw new Error("unauthorized: key does not belong to owner"); + } if (TERMINAL_STATUSES.has(oldKey.status as KeyStatus)) { throw new Error("cannot rotate a terminal key"); } - const now = Date.now(); const gracePeriodMs = args.gracePeriodMs ?? 3600000; + if (gracePeriodMs < MIN_GRACE_PERIOD_MS || gracePeriodMs > MAX_GRACE_PERIOD_MS) { + throw new Error( + `gracePeriodMs must be between ${MIN_GRACE_PERIOD_MS} (60s) and ${MAX_GRACE_PERIOD_MS} (30 days)`, + ); + } + + const now = Date.now(); const gracePeriodEnd = now + gracePeriodMs; await ctx.db.patch(args.keyId, { @@ -363,18 +330,20 @@ export const rotate = mutation({ }); const typeShort = oldKey.type === "publishable" ? "pub" : "secret"; + const lookupPrefix = generateRandomHex(8); + const secretHex = generateRandomHex(64); const rawKey = [ oldKey.keyPrefix, typeShort, oldKey.env, - args.lookupPrefix, - args.secretHex, + lookupPrefix, + secretHex, ].join(KEY_PREFIX_SEPARATOR); const hash = await sha256Hex(rawKey); const newKeyId = await ctx.db.insert("apiKeys", { hash, - lookupPrefix: args.lookupPrefix, + lookupPrefix, keyPrefix: oldKey.keyPrefix, type: oldKey.type, env: oldKey.env, @@ -389,14 +358,7 @@ export const rotate = mutation({ rotatedFromId: args.keyId, }); - await ctx.db.insert("apiKeyEvents", { - keyId: args.keyId, - ownerId: oldKey.ownerId, - eventType: "key.rotated", - metadata: { newKeyId }, - timestamp: now, - }); - + log.info("key.rotated", { keyId: args.keyId, newKeyId, ownerId: oldKey.ownerId }); return { newKeyId, newKey: rawKey, oldKeyExpiresAt: gracePeriodEnd }; }, }); @@ -404,23 +366,28 @@ export const rotate = mutation({ export const update = mutation({ args: { keyId: v.id("apiKeys"), + ownerId: v.string(), name: v.optional(v.string()), scopes: v.optional(v.array(v.string())), tags: v.optional(v.array(v.string())), metadata: v.optional(jsonValue), }, returns: v.null(), - handler: async (ctx, { keyId, ...updates }) => { + handler: async (ctx, { keyId, ownerId, ...updates }) => { const key = await ctx.db.get(keyId); if (!key) { throw new Error("key not found"); } + if (key.ownerId !== ownerId) { + throw new Error("unauthorized: key does not belong to owner"); + } if (TERMINAL_STATUSES.has(key.status as KeyStatus)) { throw new Error("cannot update terminal key"); } if (updates.tags) { validateTags(updates.tags); } + validateSizeLimits({ metadata: updates.metadata, scopes: updates.scopes, tags: updates.tags, name: updates.name }); const patch: Record = {}; if (updates.name !== undefined) patch.name = updates.name; @@ -430,13 +397,7 @@ export const update = mutation({ if (Object.keys(patch).length > 0) { await ctx.db.patch(keyId, patch); - await ctx.db.insert("apiKeyEvents", { - keyId, - ownerId: key.ownerId, - eventType: "key.updated", - metadata: { fields: Object.keys(patch) }, - timestamp: Date.now(), - }); + log.info("key.updated", { keyId, ownerId, fields: Object.keys(patch) }); } return null; @@ -444,13 +405,19 @@ export const update = mutation({ }); export const disable = mutation({ - args: { keyId: v.id("apiKeys") }, + args: { + keyId: v.id("apiKeys"), + ownerId: v.string(), + }, returns: v.null(), - handler: async (ctx, { keyId }) => { + handler: async (ctx, { keyId, ownerId }) => { const key = await ctx.db.get(keyId); if (!key) { throw new Error("key not found"); } + if (key.ownerId !== ownerId) { + throw new Error("unauthorized: key does not belong to owner"); + } if (key.status === "disabled") { return null; } @@ -458,24 +425,25 @@ export const disable = mutation({ throw new Error("can only disable active keys"); } await ctx.db.patch(keyId, { status: "disabled" }); - await ctx.db.insert("apiKeyEvents", { - keyId, - ownerId: key.ownerId, - eventType: "key.disabled", - timestamp: Date.now(), - }); + log.info("key.disabled", { keyId, ownerId }); return null; }, }); export const enable = mutation({ - args: { keyId: v.id("apiKeys") }, + args: { + keyId: v.id("apiKeys"), + ownerId: v.string(), + }, returns: v.null(), - handler: async (ctx, { keyId }) => { + handler: async (ctx, { keyId, ownerId }) => { const key = await ctx.db.get(keyId); if (!key) { throw new Error("key not found"); } + if (key.ownerId !== ownerId) { + throw new Error("unauthorized: key does not belong to owner"); + } if (key.status === "active") { return null; } @@ -483,12 +451,7 @@ export const enable = mutation({ throw new Error("can only enable disabled keys"); } await ctx.db.patch(keyId, { status: "active" }); - await ctx.db.insert("apiKeyEvents", { - keyId, - ownerId: key.ownerId, - eventType: "key.enabled", - timestamp: Date.now(), - }); + log.info("key.enabled", { keyId, ownerId }); return null; }, }); @@ -500,12 +463,25 @@ export const configure = mutation({ }, returns: v.null(), handler: async (ctx, args) => { + if (args.cleanupIntervalMs !== undefined && args.cleanupIntervalMs <= 0) { + throw new Error("cleanupIntervalMs must be > 0"); + } + if (args.defaultExpiryMs !== undefined && args.defaultExpiryMs <= 0) { + throw new Error("defaultExpiryMs must be > 0"); + } + const existing = await ctx.db.query("config").first(); + const oldValues = existing + ? { cleanupIntervalMs: existing.cleanupIntervalMs, defaultExpiryMs: existing.defaultExpiryMs } + : {}; + if (existing) { await ctx.db.patch(existing._id, args); } else { await ctx.db.insert("config", args); } + + log.info("config.updated", { old: oldValues, new: args }); return null; }, }); diff --git a/src/component/queries.ts b/src/component/queries.ts index 2b692b1..8824e43 100644 --- a/src/component/queries.ts +++ b/src/component/queries.ts @@ -7,30 +7,52 @@ import { jsonValue } from "./validators.js"; const counter = new ShardedCounter(components.shardedCounter); +const DEFAULT_PAGE_SIZE = 100; + +const keyItemValidator = v.object({ + keyId: v.id("apiKeys"), + name: v.string(), + lookupPrefix: v.string(), + type: KEY_TYPE, + env: v.string(), + scopes: v.array(v.string()), + tags: v.array(v.string()), + status: KEY_STATUS, + metadata: v.optional(jsonValue), + remaining: v.optional(v.number()), + expiresAt: v.optional(v.number()), + createdAt: v.number(), + lastUsedAt: v.optional(v.number()), +}); + +function mapKey(k: any) { + return { + keyId: k._id, + name: k.name, + lookupPrefix: k.lookupPrefix, + type: k.type, + env: k.env, + scopes: k.scopes, + tags: k.tags, + status: k.status, + metadata: k.metadata, + remaining: k.remaining, + expiresAt: k.expiresAt, + createdAt: k._creationTime, + lastUsedAt: k.lastUsedAt, + }; +} + export const list = query({ args: { ownerId: v.string(), env: v.optional(v.string()), status: v.optional(KEY_STATUS), + limit: v.optional(v.number()), }, - returns: v.array( - v.object({ - keyId: v.id("apiKeys"), - name: v.string(), - lookupPrefix: v.string(), - type: KEY_TYPE, - env: v.string(), - scopes: v.array(v.string()), - tags: v.array(v.string()), - status: KEY_STATUS, - metadata: v.optional(jsonValue), - remaining: v.optional(v.number()), - expiresAt: v.optional(v.number()), - createdAt: v.number(), - lastUsedAt: v.optional(v.number()), - }), - ), - handler: async (ctx, { ownerId, env, status }) => { + returns: v.array(keyItemValidator), + handler: async (ctx, { ownerId, env, status, limit }) => { + const pageSize = limit ?? DEFAULT_PAGE_SIZE; let keysQuery; if (env) { keysQuery = ctx.db @@ -51,23 +73,8 @@ export const list = query({ .withIndex("by_owner_status", (q) => q.eq("ownerId", ownerId)); } - const keys = await keysQuery.collect(); - - return keys.map((k) => ({ - keyId: k._id, - name: k.name, - lookupPrefix: k.lookupPrefix, - type: k.type, - env: k.env, - scopes: k.scopes, - tags: k.tags, - status: k.status, - metadata: k.metadata, - remaining: k.remaining, - expiresAt: k.expiresAt, - createdAt: k._creationTime, - lastUsedAt: k.lastUsedAt, - })); + const keys = await keysQuery.take(pageSize); + return keys.map(mapKey); }, }); @@ -75,88 +82,46 @@ export const listByTag = query({ args: { ownerId: v.string(), tag: v.string(), + limit: v.optional(v.number()), }, - returns: v.array( - v.object({ - keyId: v.id("apiKeys"), - name: v.string(), - lookupPrefix: v.string(), - type: KEY_TYPE, - env: v.string(), - scopes: v.array(v.string()), - tags: v.array(v.string()), - status: KEY_STATUS, - metadata: v.optional(jsonValue), - remaining: v.optional(v.number()), - expiresAt: v.optional(v.number()), - createdAt: v.number(), - lastUsedAt: v.optional(v.number()), - }), - ), - handler: async (ctx, { ownerId, tag }) => { - const keys = await ctx.db + returns: v.array(keyItemValidator), + handler: async (ctx, { ownerId, tag, limit }) => { + const pageSize = limit ?? DEFAULT_PAGE_SIZE; + const allKeys = await ctx.db .query("apiKeys") .withIndex("by_owner_status", (q) => q.eq("ownerId", ownerId)) .collect(); - return keys + return allKeys .filter((k) => k.tags.includes(tag)) - .map((k) => ({ - keyId: k._id, - name: k.name, - lookupPrefix: k.lookupPrefix, - type: k.type, - env: k.env, - scopes: k.scopes, - tags: k.tags, - status: k.status, - metadata: k.metadata, - remaining: k.remaining, - expiresAt: k.expiresAt, - createdAt: k._creationTime, - lastUsedAt: k.lastUsedAt, - })); + .slice(0, pageSize) + .map(mapKey); }, }); export const getUsage = query({ args: { keyId: v.id("apiKeys"), - period: v.optional( - v.object({ - start: v.number(), - end: v.number(), - }), - ), + ownerId: v.string(), }, returns: v.object({ total: v.number(), remaining: v.optional(v.number()), - lastUsedAt: v.optional(v.number()), }), - handler: async (ctx, { keyId, period }) => { + handler: async (ctx, { keyId, ownerId }) => { const key = await ctx.db.get(keyId); if (!key) { throw new Error("key not found"); } - - let total: number; - if (period) { - const events = await ctx.db - .query("apiKeyEvents") - .withIndex("by_key", (q) => - q.eq("keyId", keyId).gte("timestamp", period.start).lte("timestamp", period.end), - ) - .collect(); - total = events.filter((e) => e.eventType === "key.validated").length; - } else { - total = await counter.count(ctx, keyId); + if (key.ownerId !== ownerId) { + throw new Error("unauthorized: key does not belong to owner"); } + const total = await counter.count(ctx, keyId); + return { total, remaining: key.remaining, - lastUsedAt: key.lastUsedAt, }; }, }); diff --git a/src/component/schema.ts b/src/component/schema.ts index 5f4eac4..429f612 100644 --- a/src/component/schema.ts +++ b/src/component/schema.ts @@ -27,17 +27,6 @@ export default defineSchema({ .index("by_owner_status", ["ownerId", "status"]) .index("by_owner_env", ["ownerId", "env", "status"]), - apiKeyEvents: defineTable({ - keyId: v.id("apiKeys"), - ownerId: v.string(), - eventType: v.string(), - reason: v.optional(v.string()), - metadata: v.optional(jsonValue), - timestamp: v.number(), - }) - .index("by_key", ["keyId", "timestamp"]) - .index("by_owner", ["ownerId", "timestamp"]), - config: defineTable({ cleanupIntervalMs: v.optional(v.number()), defaultExpiryMs: v.optional(v.number()), diff --git a/src/shared.ts b/src/shared.ts index 6da6986..4bca3ca 100644 --- a/src/shared.ts +++ b/src/shared.ts @@ -26,32 +26,59 @@ export const TERMINAL_STATUSES: ReadonlySet = new Set([ ]); export const TAG_PATTERN = /^[a-zA-Z0-9][a-zA-Z0-9-]*$/; +export const KEY_PREFIX_PATTERN = /^[a-zA-Z0-9]+$/; +export const ENV_PATTERN = /^[a-zA-Z0-9-]+$/; + +export const MAX_METADATA_SIZE = 4096; +export const MAX_SCOPES = 50; +export const MAX_TAGS = 20; +export const MAX_STRING_LENGTH = 256; + +export function validateKeyPrefix(prefix: string): void { + if (!KEY_PREFIX_PATTERN.test(prefix)) { + throw new Error( + `Invalid keyPrefix "${prefix}": must match ^[a-zA-Z0-9]+$ (alphanumeric only, no underscores)`, + ); + } + if (prefix.length > MAX_STRING_LENGTH) { + throw new Error(`keyPrefix must be <= ${MAX_STRING_LENGTH} characters`); + } +} + +export function validateEnv(env: string): void { + if (!ENV_PATTERN.test(env)) { + throw new Error( + `Invalid env "${env}": must match ^[a-zA-Z0-9-]+$ (no underscores — would break key parsing)`, + ); + } + if (env.length > MAX_STRING_LENGTH) { + throw new Error(`env must be <= ${MAX_STRING_LENGTH} characters`); + } +} + +export function validateSizeLimits(args: { + metadata?: unknown; + scopes?: string[]; + tags?: string[]; + name?: string; +}): void { + if (args.metadata !== undefined) { + const size = JSON.stringify(args.metadata).length; + if (size > MAX_METADATA_SIZE) { + throw new Error(`metadata must be <= ${MAX_METADATA_SIZE} bytes (got ${size})`); + } + } + if (args.scopes && args.scopes.length > MAX_SCOPES) { + throw new Error(`scopes must have <= ${MAX_SCOPES} entries (got ${args.scopes.length})`); + } + if (args.tags && args.tags.length > MAX_TAGS) { + throw new Error(`tags must have <= ${MAX_TAGS} entries (got ${args.tags.length})`); + } + if (args.name && args.name.length > MAX_STRING_LENGTH) { + throw new Error(`name must be <= ${MAX_STRING_LENGTH} characters`); + } +} -export const EVENT_TYPE = v.union( - v.literal("key.created"), - v.literal("key.validated"), - v.literal("key.validate_failed"), - v.literal("key.revoked"), - v.literal("key.rotated"), - v.literal("key.expired"), - v.literal("key.exhausted"), - v.literal("key.disabled"), - v.literal("key.enabled"), - v.literal("key.updated"), - v.literal("key.rate_limited"), -); -export type EventType = - | "key.created" - | "key.validated" - | "key.validate_failed" - | "key.revoked" - | "key.rotated" - | "key.expired" - | "key.exhausted" - | "key.disabled" - | "key.enabled" - | "key.updated" - | "key.rate_limited"; export function validateTag(tag: string): void { if (!TAG_PATTERN.test(tag)) { From a72d3d11f62700c6bbd5e7812791d4f9fa2a1b15 Mon Sep 17 00:00:00 2001 From: bntvllnt <32437578+bntvllnt@users.noreply.github.com> Date: Wed, 25 Mar 2026 23:20:37 +0100 Subject: [PATCH 2/4] docs: add v0.2.0 spec, deep analysis, and roadmap --- docs/DEEP-ANALYSIS.md | 188 ++++++++ docs/ROADMAP.md | 105 +++++ .../2026-03-25-v1-production-hardening.md | 418 ++++++++++++++++++ 3 files changed, 711 insertions(+) create mode 100644 docs/DEEP-ANALYSIS.md create mode 100644 docs/ROADMAP.md create mode 100644 specs/active/2026-03-25-v1-production-hardening.md diff --git a/docs/DEEP-ANALYSIS.md b/docs/DEEP-ANALYSIS.md new file mode 100644 index 0000000..f051ee0 --- /dev/null +++ b/docs/DEEP-ANALYSIS.md @@ -0,0 +1,188 @@ +# Deep Analysis: @vllnt/convex-api-keys + +**Mode**: Deep | **Perspectives**: 7 (Security, Adversarial, Performance, Scalability, Extensibility, Observability, API Design) +**Date**: 2026-03-24 | **Verification**: static repo review + `pnpm test` + `pnpm typecheck` + +## The Essence + +A Convex component with solid API-key storage basics, but the main production risks are not cryptographic. The hard problems here are authorization boundaries, write-heavy validation, unbounded collection scans, and an operational story that is weaker than the README suggests. + +## Executive Summary + +The repository gets several important fundamentals right: keys are stored hashed, lookups are prefix-indexed, and comparison uses a timing-safe helper. However, the merged analysis converges on three critical issues: + +1. `validate()` is a write transaction on the hot auth path. +2. The component has no built-in authorization boundary. +3. Several admin and analytics paths do unbounded scans, while cleanup and aggregation are configured but not actually implemented. + +This means the package is acceptable for modest trusted-server usage, but not yet secure-by-default or scale-ready as a public reusable component. + +--- + +## Verification Snapshot + +- `pnpm test` passed: 69 tests across unit and example integration coverage. +- `pnpm typecheck` passed. +- Findings below are based on code in `src/` and the example app in `example/convex/`, not just README claims. + +--- + +## Verified Facts + +- Keys are stored as SHA-256 hashes in `apiKeys.hash`, not raw secrets. + Evidence: `src/component/schema.ts:7-28`, `src/shared.ts:114-121` +- Key lookups are prefix-indexed through `lookupPrefix`. + Evidence: `src/component/schema.ts:26`, `src/component/mutations.ts:130-133` +- `validate()` is implemented as a mutation and writes on successful validation. + Evidence: `src/component/mutations.ts:102-277` +- `list()`, `listByTag()`, `getUsage(period)`, and `revokeByTag()` all use `.collect()`. + Evidence: `src/component/queries.ts:54`, `src/component/queries.ts:97-103`, `src/component/queries.ts:145-151`, `src/component/mutations.ts:311-330` +- `aggregate` and `crons` child components are registered, but no repo code uses them for cleanup or analytics reads. + Evidence: `src/component/convex.config.ts:9-12`, `src/component/queries.ts:137-160`, `src/component/mutations.ts:496-510` +- The public example forwards `ownerId` and `keyId` directly into component APIs and does not use `ctx.auth`. + Evidence: `example/convex/example.ts:19-216` + +--- + +## Failure Hypotheses (Merged, Prioritized) + +### Critical + +| ID | Failure | Why It Matters | Evidence | Mitigation | +|---|---|---|---|---| +| **C1** | **No built-in authorization boundary** | If an app exposes these wrappers directly, any caller with a `keyId` or `ownerId` can attempt cross-tenant admin operations | `src/component/mutations.ts:279-510`, `src/component/queries.ts:10-162`, `example/convex/example.ts:19-216` | Add an explicit ownership/auth hook or require an app-supplied authorizer for every admin read/write surface | +| **C2** | **`validate()` is a write-heavy hot path** | Every successful auth check patches state, inserts an event, and increments a counter; high-volume keys will see contention and unnecessary invalidations | `src/component/mutations.ts:208-263` | Split auth check from analytics tracking, or at minimum threshold/async `lastUsedAt` updates | +| **C3** | **Unbounded scans on admin and analytics paths** | Tenant growth and event growth will turn listing, tag filtering, bulk revoke, and period usage queries into scale bottlenecks | `src/component/queries.ts:54`, `src/component/queries.ts:97-103`, `src/component/queries.ts:145-151`, `src/component/mutations.ts:311-330` | Add pagination, tag indexing/join tables, and bounded batch processing | + +### High + +| ID | Failure | Why It Matters | Evidence | Mitigation | +|---|---|---|---|---| +| **H1** | **Server-side invariants are weaker than claimed** | `create()` accepts caller-supplied `hash`, `lookupPrefix`, and `secretHex`; integrity depends on caller honesty instead of server derivation | `src/client/index.ts:61-81`, `src/component/mutations.ts:29-44`, `src/component/mutations.ts:65-98` | Generate secret material and derive hash inside the component mutation or behind an internal mutation | +| **H2** | **Rate limit runs after state mutation** | Rate-limited validations can still burn `remaining` quota and write `lastUsedAt` first | `src/component/mutations.ts:208-245` | Reorder validate flow: parse -> find -> rate limit -> mutate usage state | +| **H3** | **No pre-match throttle on malformed/unknown probes** | Attackers can cheaply probe malformed keys or random prefixes without hitting per-key rate limits | `src/component/mutations.ts:125-150`, `src/component/mutations.ts:242-253` | Add global/per-prefix throttling before matched-key state writes | +| **H4** | **Advertised features exceed implemented behavior** | README promises event callbacks, scheduled cleanup, and O(log n) analytics, but the code does not currently provide those behaviors | `README.md:20`, `README.md:28-29`, `src/component/convex.config.ts:9-12`, `src/component/queries.ts:143-154`, `src/component/mutations.ts:496-510` | Narrow docs now or implement the missing features | +| **H5** | **Bulk tag operations are owner scans plus JS filtering** | `listByTag()` and `revokeByTag()` do not scale with large tenants and are easy abuse targets | `src/component/queries.ts:96-118`, `src/component/mutations.ts:304-332` | Add `by_owner_tag` support or a tag join table; batch revoke continuations | +| **H6** | **Audit model is too thin for incident response** | Events have no actor, source, or request metadata, and `eventType` is just `v.string()` | `src/component/schema.ts:30-39` | Use a typed event validator and include actor/source/request context | +| **H7** | **Rotation duplicates finite-use quota during grace** | `rotate()` copies `remaining` to the new key while the old key still validates during grace, effectively duplicating budget | `src/component/mutations.ts:360-390`, `src/component/mutations.ts:208-240`, `example/convex/example.test.ts:518-544` | Share quota across rotated keys or block rotation for finite-use keys | + +### Medium + +| ID | Failure | Why It Matters | Evidence | Mitigation | +|---|---|---|---|---| +| M1 | No pagination on list surfaces | Public APIs return unbounded arrays | `src/component/queries.ts:10-120` | Add cursor-based pagination | +| M2 | `prefix` / `env` are not validated against `_` separator semantics | Docs say `env` can be any string, but underscores break parsing | `README.md:64-66`, `README.md:78`, `src/shared.ts:83-100` | Validate allowed charset for key segments | +| M3 | Metadata, scopes, and tags have no size/cardinality limits | Large payloads can bloat documents and responses | `src/component/schema.ts:15-20`, `src/component/queries.ts:56-70`, `src/component/validators.ts:1-4` | Add explicit caps and document them | +| M4 | Event history is effectively write-only | No paginated event query exists for operators | `src/component/schema.ts:30-39`, no corresponding query in `src/component/queries.ts` | Add `listEvents()` | +| M5 | Rate-limit policy is hardcoded and global | Real deployments need tenant-, key-, or plan-specific policies | `src/component/mutations.ts:22-25` | Support configurable rate-limit policy | +| M6 | Cleanup-related config is stored but unused | `cleanupIntervalMs` and `defaultExpiryMs` currently do not change runtime behavior | `src/component/schema.ts:41-44`, `src/component/mutations.ts:496-510` | Implement retention/cleanup and default expiry wiring | +| M7 | Tests do not exercise auth, rate-limited branches, cleanup, or large-data behavior | Current passing suite does not de-risk the scale and misuse cases above | `example/convex/example.test.ts` | Add targeted misuse and scale tests | + +--- + +## Cross-Cutting Concerns + +1. **Security model ambiguity** + The package behaves like infrastructure, but its public surfaces assume the host app will add authorization externally. That is too implicit for a reusable component. + +2. **Write amplification** + Validation is both an auth check and a synchronous analytics pipeline. That couples correctness to throughput. + +3. **Operational gaps** + Event retention, cleanup, actor-aware audits, and scalable usage analytics are all partial at best. + +4. **Doc and implementation drift** + The package markets stronger operational capabilities than the repository currently contains. + +--- + +## Assumptions To Challenge + +| Assumption | Evidence For | Evidence Against | Verdict | +|---|---|---|---| +| "Owner scoping is enough for multi-tenant safety" | Queries and some mutations carry `ownerId` | Caller identity is never derived or checked in component code | **Discard** | +| "Every validation must synchronously update usage state" | Nice for dashboards and quota accounting | Creates the hottest contention point in the system | **Validate** | +| "Child components mean cleanup and aggregation are implemented" | `aggregate` and `crons` are registered | No cleanup job or aggregate-backed read path exists | **Discard** | +| "Current tests prove production readiness" | Tests are broad for normal lifecycle flows | They do not cover auth boundaries, load behavior, or retention logic | **Discard** | + +--- + +## What You Haven't Considered + +1. **Reactive query churn** + Because `validate()` patches `lastUsedAt`, any owner-scoped list subscriber can be invalidated by routine auth traffic. + +2. **Quota duplication on rotate** + For finite-use keys, rotation is not just a status transition. It creates two concurrently valid keys with copied `remaining` state. + +3. **Invisible malformed traffic** + `malformed` and unknown-key attempts return quickly, but they are not promoted into a robust operator-facing audit stream. + +4. **Segment encoding contract** + The underscore-delimited key format is stricter than the public docs imply; allowing arbitrary `env`/prefix strings creates parsing hazards. + +--- + +## The Real Question + +Not "is the crypto acceptable?" but **"can this component stay correct, isolated, and observable under hostile or high-volume traffic?"** + +Today the answer is: only if the integrating app adds strong authorization and the workload remains modest. + +--- + +## Action Plan & Roadmap + +### Immediate (Before broader production use) + +| # | Action | Addresses | Effort | +|---|---|---|---| +| 1 | Add an explicit authorization/ownership hook for every admin read/write operation | C1 | Medium | +| 2 | Reorder `validate()` so rate limiting happens before quota burn and usage writes | C2, H2 | Small | +| 3 | Add pagination to `list()` and `listByTag()` | C3, M1 | Small | +| 4 | Validate key-segment inputs (`prefix`, `env`) and configuration bounds | M2, M6 | Small | +| 5 | Move secret generation and hash derivation fully server-side | H1 | Medium | +| 6 | Tighten event typing and add actor/source metadata | H6 | Medium | +| 7 | Align README claims with implemented behavior | H4 | Small | + +### Short-Term (1-2 Weeks) + +| # | Action | Addresses | Effort | +|---|---|---|---| +| 8 | Add tag indexing or a join table for tag lookups | C3, H5 | Medium | +| 9 | Batch `revokeByTag()` via continuation/scheduler pattern | C3, H5 | Medium | +| 10 | Add `listEvents()` with pagination and filters | H6, M4 | Medium | +| 11 | Cap metadata size and array lengths for `scopes` / `tags` | M3 | Small | +| 12 | Add tests for auth boundaries, rate-limited outcomes, and malformed probe handling | H3, M7 | Medium | + +### Medium-Term (1 Month) + +| # | Action | Addresses | Effort | +|---|---|---|---| +| 13 | Implement retention/cleanup using the stored config | C3, M6 | Medium | +| 14 | Decouple `lastUsedAt` from every successful validation | C2 | Medium | +| 15 | Replace event scans in `getUsage(period)` with time-bucketed aggregation | C3, H4 | Large | +| 16 | Make rate-limit policy configurable by tenant/key/plan | M5 | Medium | +| 17 | Decide rotation semantics for finite-use keys and enforce them in code | H7 | Medium | + +### Long-Term + +| # | Action | Addresses | Effort | +|---|---|---|---| +| 18 | Split authentication from analytics tracking into separate durability classes | C2, C3 | Large | +| 19 | Add explicit extension points for callbacks/webhooks only after the audit model is stable | H4, H6 | Large | +| 20 | Revisit key format versioning before wider adoption | M2 | Medium | + +--- + +## Severity Distribution + +```text +Critical : 3 +High : 7 +Medium : 7 +``` + +## Bottom Line + +This is a strong v0.1 lifecycle component with good hashing/storage basics and a solid happy-path test suite. The merged review does not say "rewrite it"; it says tighten the trust boundary, remove avoidable hot-path writes, and make the operational model real before claiming production-grade security and scale. diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md new file mode 100644 index 0000000..86efc06 --- /dev/null +++ b/docs/ROADMAP.md @@ -0,0 +1,105 @@ +# Roadmap: @vllnt/convex-api-keys + +Derived from [DEEP-ANALYSIS.md](./DEEP-ANALYSIS.md) (2026-03-24, 6-perspective audit). + +## Priority Legend + +- **CRITICAL** — blocks production use at scale +- **HIGH** — security or correctness risk +- **MEDIUM** — quality of life / extensibility +- **LOW** — nice to have + +--- + +## Phase 1: Harden (Before v1.0) + +Goal: Make the component safe-by-default for any integrator. + +| # | Task | Severity | Effort | Addresses | +|---|------|----------|--------|-----------| +| 1.1 | Add `ownerId` cross-check on revoke/disable/enable/update/rotate/getUsage | CRITICAL | Small | C1 — no auth boundary | +| 1.2 | Move `secretHex` + `hash` generation into component mutations | HIGH | Medium | H1 — secret in mutation logs | +| 1.3 | Reorder validate: rate limit BEFORE `remaining` decrement + `lastUsedAt` write | HIGH | Small | H2, H6 — wasted writes on rejected requests | +| 1.4 | Add pre-match rate limit on validate (global or per-prefix) | HIGH | Medium | H3 — unlimited probing | +| 1.5 | Validate `keyPrefix` against `^[a-zA-Z0-9]+$` | MEDIUM | Trivial | M2 — separator collision | +| 1.6 | Validate `env` against `^[a-zA-Z0-9-]+$` (no underscores) | MEDIUM | Trivial | M2 — parsing hazard | +| 1.7 | Enforce `gracePeriodMs` upper bound (max 30 days) | MEDIUM | Trivial | M6 — unbounded grace | +| 1.8 | Add bounds validation on `configure()` inputs | HIGH | Trivial | H4 — config poisoning | +| 1.9 | Emit audit event on `configure()` changes | HIGH | Trivial | H4 — silent config mutation | +| 1.10 | Cap `metadata` size (4KB), `scopes` (50), `tags` (20), string fields (256 chars) | MEDIUM | Small | M3 — payload bloat | +| 1.11 | Log structured output on every validate outcome (success + all failure reasons) | HIGH | Small | H5 — operational blindness | +| 1.12 | Align README claims with actual implementation (cleanup, analytics, callbacks) | HIGH | Small | H4 — doc/code drift | + +**Exit criteria:** All 12 items complete. Cut v1.0.0. + +--- + +## Phase 2: Scale (1-2 Weeks after v1.0) + +Goal: Remove the unbounded scan patterns and write contention. + +| # | Task | Severity | Effort | Addresses | +|---|------|----------|--------|-----------| +| 2.1 | Decouple `lastUsedAt` from validate hot path (write only when delta > 60s) | CRITICAL | Medium | C2 — OCC contention | +| 2.2 | Add cursor-based pagination to `list()`, `listByTag()`, `getUsage(period)` | CRITICAL | Medium | C3, M1, M8 — unbounded results | +| 2.3 | Add tag index (`by_owner_tag`) or join table for `listByTag` / `revokeByTag` | HIGH | Medium | H5, M1 — full owner scan | +| 2.4 | Paginate `revokeByTag` via continuation / scheduler pattern | HIGH | Medium | H3 — mutation timeout at scale | +| 2.5 | Expand `revokeByTag` to include `rotating` + `disabled` statuses | MEDIUM | Small | M2 — silent skip | +| 2.6 | Add `listEvents()` paginated query to public API | MEDIUM | Medium | M4 — event table write-only | +| 2.7 | Add tests for: auth boundaries, rate-limited paths, malformed probes, large tenant scans | HIGH | Medium | M7 — untested risk paths | + +**Exit criteria:** All 7 items complete. No `.collect()` without pagination in any public query. + +--- + +## Phase 3: Operate (1 Month) + +Goal: Make the component self-managing and observable in production. + +| # | Task | Severity | Effort | Addresses | +|---|------|----------|--------|-----------| +| 3.1 | Implement event retention scheduled job (use `cleanupIntervalMs` from config) | CRITICAL | Medium | C3, M6, M9 — unbounded event growth | +| 3.2 | Implement expired key sweep (cron that marks expired keys without waiting for validate) | MEDIUM | Medium | M9 — lazy expiry | +| 3.3 | Replace `getUsage(period)` event scan with time-bucketed aggregates | CRITICAL | Large | C3 — O(N) usage queries | +| 3.4 | Make rate-limit policy configurable per key/owner/tier | MEDIUM | Medium | M5 — hardcoded 1000/min | +| 3.5 | Define rotation semantics for finite-use keys (share quota or block rotation) | HIGH | Medium | H7 — quota duplication | +| 3.6 | Add actor/source/request metadata to audit events | HIGH | Medium | H6 — thin audit model | +| 3.7 | Make logger injectable via `ApiKeysConfig` (consumers provide their own transport) | MEDIUM | Small | Observability gap | +| 3.8 | Add `getConfig()` query (config is currently write-only) | MEDIUM | Trivial | Observability gap | + +**Exit criteria:** Event table has retention. Usage queries are O(1). Rate limits are configurable. + +--- + +## Phase 4: Extend (3+ Months) + +Goal: Enable ecosystem growth and advanced use cases. + +| # | Task | Severity | Effort | Addresses | +|---|------|----------|--------|-----------| +| 4.1 | Add `onEvent` dispatch hook for webhooks/streaming | MEDIUM | Medium | Extensibility — events are dead-end | +| 4.2 | Type-safe metadata generics on `ApiKeys` client wrapper | MEDIUM | Small | M3 — untyped metadata | +| 4.3 | Key format versioning (version byte in key string) | MEDIUM | Large | M7 — format immutability | +| 4.4 | HMAC-SHA256 with server-side pepper (defense against DB leak) | MEDIUM | Large | Security — unkeyed hash | +| 4.5 | Middleware/plugin composition model (replace monolithic class) | LOW | Large | Extensibility | +| 4.6 | Admin query surface (cross-owner listing, system health) | LOW | Medium | Extensibility | +| 4.7 | Offline/edge validation (HMAC-based local verify without Convex round-trip) | LOW | Large | Edge deployment | + +--- + +## Milestone Summary + +``` +v1.0.0 Phase 1 complete — safe-by-default +v1.1.0 Phase 2 complete — scale-ready +v1.2.0 Phase 3 complete — production-operated +v2.0.0 Phase 4 complete — extensible ecosystem +``` + +## Status + +- [x] v0.1.0 shipped (2026-03-24) — feature-complete, 69 tests, OSS grade A +- [ ] Phase 1 — not started +- [ ] Phase 2 — not started +- [ ] Phase 3 — not started +- [ ] Phase 4 — not started diff --git a/specs/active/2026-03-25-v1-production-hardening.md b/specs/active/2026-03-25-v1-production-hardening.md new file mode 100644 index 0000000..6a11408 --- /dev/null +++ b/specs/active/2026-03-25-v1-production-hardening.md @@ -0,0 +1,418 @@ +--- +title: "v0.2.0 Production Hardening" +status: active +created: 2026-03-25 +estimate: 14h +tier: standard +--- + +# v0.2.0 Production Hardening + +## Context + +The deep analysis (2026-03-24, 7-perspective audit) found 3 critical, 7 high, and 7 medium-severity issues that block production use of `@vllnt/convex-api-keys`. The component has solid crypto fundamentals (hashed storage, prefix-indexed lookup, timing-safe comparison) but lacks authorization boundaries, has write-heavy validation, unbounded collection scans, and an operational story weaker than the README promises. This spec consolidates Phases 1-2 roadmap items plus select Phase 3 items into a single implementable plan for v0.2. Phase 3 operational items (crons, aggregates) and all Phase 4 items are deferred to subsequent specs. + +## Security Model + +**Threat model:** This component protects against **accidental cross-tenant bugs in honest host apps** (Threat A), NOT against malicious/compromised integrators (Threat B). + +**Trust boundary:** The component trusts the `ownerId` it receives from the host app. It does not derive or verify caller identity from `ctx.auth`. The ownerId cross-check (AC-1) prevents a host app bug from accidentally operating on another tenant's keys — it does NOT prevent a compromised host app from passing a forged ownerId. + +**Implication:** Integrators who need Threat B protection must derive `ownerId` from their own auth layer (e.g., `ctx.auth.getUserIdentity()`) before passing it to the component. The component cannot enforce this — it is the host app's responsibility. + +**Design decision:** ownerId is a **required arg** on all admin mutations (revoke, disable, enable, update, rotate). This is a breaking change from v0.1 (which took only keyId). The component internally fetches the key and asserts `key.ownerId === args.ownerId` before any state change. This was chosen over optional-ownerId (which would be silently bypassable) and over component-internal ctx.auth (which would couple the component to a specific auth provider). + +## Codebase Impact (MANDATORY) + +| Area | Impact | Detail | +|------|--------|--------| +| `src/component/mutations.ts` | MODIFY | Auth boundary (ownerId on admin mutations), secret generation server-side for create() AND rotate(), config bounds/audit, revokeByTag pagination+status expansion, gracePeriod min/max bounds, remove all apiKeyEvents inserts + rateLimiter usage — replace with structured logging + counter, decouple lastUsedAt from remaining write | +| `src/component/queries.ts` | MODIFY | Cursor-based pagination on list/listByTag, simplify getUsage to counter-only (remove event scan), add getConfig query | +| `src/component/schema.ts` | MODIFY | **Remove `apiKeyEvents` table entirely.** Add tag index (by_owner_tag). Remove aggregate/crons component registration if unused. | +| `src/component/convex.config.ts` | MODIFY | Remove `aggregate`, `crons`, and `rateLimiter` component registrations (rate limiting is integrator's responsibility; events removed) | +| `src/component/validators.ts` | MODIFY | Add keyPrefix/env charset validators, size cap validators, config bound validators | +| `src/shared.ts` | MODIFY | Add keyPrefix/env validation functions, KEY_PREFIX_PATTERN/ENV_PATTERN constants. Remove EVENT_TYPE validator (no event table). | +| `src/client/index.ts` | MODIFY | Remove client-side hash/secret generation from create() AND rotate(), move to server. Add ownerId as required arg on admin mutations. Add pagination params to list/listByTag. Simplify getUsage (counter-only). | +| `src/client/types.ts` | MODIFY | Add PaginatedResult, ConfigResult types. Update CreateKeyOptions (remove hash/lookupPrefix/secretHex). Add ownerId to admin mutation args. Remove KeyEvent type. | +| `src/log.ts` | MODIFY | Add structured validate outcome logging (replaces event table as audit trail) | +| `README.md` | MODIFY | Align claims with implementation, migration guide with before/after code examples, document new APIs | +| `example/convex/example.ts` | AFFECTED | Must update to new client API (no hash/secret params, ownerId on admin ops, pagination on list) | +| `example/convex/example.test.ts` | AFFECTED | Existing 69 tests must be updated (create() args change, ownerId added). Add auth boundary, rate-limit, malformed probe tests | + +**Files:** 0 create | 10 modify | 2 affected +**Reuse:** Existing `ShardedCounter` instance, `TAG_PATTERN` regex, `parseKeyString`, `sha256Hex`, `timingSafeEqual` +**Breaking changes:** +- `create()` client API — removes `hash`, `lookupPrefix`, `secretHex` from args (secret gen moves server-side) +- `rotate()` client API — removes `lookupPrefix`, `secretHex` from args (same fix) +- Admin mutations (revoke/disable/enable/update/rotate) — `ownerId` becomes required arg +- `list()`/`listByTag()` return paginated results (type-level break: `KeyMetadata[]` -> `PaginatedResult`) +- `getUsage()` — removes `period` param (counter-only, no event scan) +- `apiKeyEvents` table removed — audit trail moves to structured logs +- Migration path: bump to v0.2.0, include before/after code examples in README +**New dependencies:** None + +## User Journey (MANDATORY) + +### Primary Journey + +ACTOR: Convex developer integrating `@vllnt/convex-api-keys` as a component +GOAL: Use the component safely in production with multi-tenant isolation, scale, and observability +PRECONDITION: Component installed, schema deployed, `ApiKeys` client instantiated + +1. Developer calls `apiKeys.create(ctx, { name, ownerId, ... })` + -> System generates secret material server-side, stores only hash + -> Developer receives `{ keyId, key }` — raw key returned once + +2. End-user sends API key in request header + -> Developer calls `apiKeys.validate(ctx, { key })` + -> System validates hash, returns scopes/metadata (rate limiting is integrator's responsibility at HTTP layer) + -> Developer uses result for authorization + +3. Developer calls `apiKeys.list(ctx, { ownerId, cursor? })` + -> System returns paginated results scoped to owner + -> Developer renders key management UI + +4. Developer calls `apiKeys.revoke(ctx, { keyId, ownerId })` + -> System asserts key.ownerId === args.ownerId, transitions to revoked + -> Structured log emitted (replaces event table) + +5. Developer monitors usage via `apiKeys.getUsage(ctx, { keyId })` + -> System returns O(1) usage count from ShardedCounter + -> Developer sees dashboard data without scan overhead + +POSTCONDITION: Keys are managed with auth boundaries, paginated queries, and structured audit logging + +### Error Journeys + +E1. Cross-tenant access attempt + Trigger: Caller passes keyId they don't own to revoke/update/disable/enable + 1. Developer calls `apiKeys.revoke(ctx, { keyId, ownerId })` + -> System checks ownerId matches key's ownerId + -> System throws "unauthorized: key does not belong to owner" + Recovery: Caller corrects to their own keyId + +E2. Brute-force key probing + Trigger: Attacker sends many random/malformed keys to validate + -> Component returns `{ valid: false, reason: "malformed" | "not_found" }` quickly (no writes on miss) + -> **Rate limiting is the integrator's responsibility** at their HTTP action/mutation layer using `@convex-dev/rate-limiter` with real caller context (IP, auth, etc.) + Recovery: Integrator adds rate limiting at their API boundary where they have caller context + +E3. Config poisoning + Trigger: Caller sets cleanupIntervalMs to 0 or negative + 1. Developer calls `apiKeys.configure(ctx, { cleanupIntervalMs: -1 })` + -> System validates bounds (min 1h, max 30 days) + -> System throws "cleanupIntervalMs must be between 3600000 and 2592000000" + Recovery: Caller provides valid value + +E4. Unbounded list at scale + Trigger: Owner has 10,000+ keys, calls list() without pagination + 1. Developer calls `apiKeys.list(ctx, { ownerId })` + -> System returns first page (default 100) with cursor + -> Developer uses cursor for next page + Recovery: No recovery needed — pagination is automatic + +### Edge Cases + +EC1. Rotate finite-use key: quota must not duplicate across old + new key during grace period +EC2. revokeByTag with 500+ matching keys: must batch via scheduler, not single mutation +EC3. Empty metadata/scopes/tags: system accepts gracefully, no null vs undefined confusion +EC4. Key with env containing underscores: rejected by charset validator (would break parsing) +EC5. Concurrent validate on same key: lastUsedAt throttled to avoid OCC contention +EC6. gracePeriodMs=0 on rotate: rejected by min bound (60s) — prevents instant-expire during rotation + +## Acceptance Criteria (MANDATORY) + +### Must Have (BLOCKING — all must pass to ship) + +- [ ] AC-1: GIVEN a mutation (revoke/disable/enable/update/rotate) WHEN caller provides keyId with ownerId that doesn't match key's ownerId THEN system throws "unauthorized" error. ownerId is a **required arg** on all admin mutations. +- [ ] AC-2: GIVEN a create() OR rotate() call WHEN client sends options THEN secret material (lookupPrefix, secretHex, hash) is generated inside the component mutation, not passed from client. Applies to both create() and rotate(). +- [ ] AC-3: GIVEN a create() call WHEN keyPrefix contains non-alphanumeric chars THEN system rejects with validation error +- [ ] AC-4: GIVEN a create() call WHEN env contains underscores THEN system rejects with "env must match ^[a-zA-Z0-9-]+$" +- [ ] AC-5: GIVEN a rotate() call WHEN gracePeriodMs < 60s (min) OR > 30 days (max) THEN system rejects with bounds error. Minimum prevents instant-expire during rotation. +- [ ] AC-6: GIVEN a configure() call WHEN any input is negative or zero THEN system rejects with bounds error +- [ ] AC-7: GIVEN any configure() call THEN system emits structured log with old/new values +- [ ] AC-8: GIVEN create/update with metadata > 4KB or scopes > 50 or tags > 20 THEN system rejects with size error +- [ ] AC-9: GIVEN any validate() outcome (success, malformed, not_found, revoked, etc.) THEN structured log emitted with outcome, keyId (if matched), and reason. **Structured logs are the audit trail** (replaces apiKeyEvents table). +- [ ] AC-10: GIVEN README claims WHEN compared to implementation THEN all documented features either exist or are explicitly marked "coming in vX.Y". Includes migration guide with before/after code examples for all breaking changes. +- [ ] AC-11: GIVEN successful validate() calls WHEN lastUsedAt delta < 60s THEN skip ONLY the lastUsedAt write. The remaining decrement (if applicable) MUST still execute regardless of throttle. These are decoupled write paths. +- [ ] AC-12: GIVEN list()/listByTag() WHEN result set is large THEN cursor-based pagination is used, no unbounded .collect() +- [ ] AC-13: GIVEN revokeByTag() WHEN matching keys > batch size THEN operation uses scheduler continuation pattern (internal mutation with cursor, not recursive scheduler) +- [ ] AC-14: GIVEN revokeByTag() WHEN keys are in "rotating" or "disabled" status THEN those keys are also revoked (not silently skipped) +- [ ] AC-15: GIVEN getUsage() THEN result comes from ShardedCounter (O(1)). Period-based event scan removed (no event table). +- [ ] AC-16: GIVEN all mutations that previously inserted apiKeyEvents THEN those inserts are replaced with structured logging via log.ts. No event table writes. @convex-dev/rate-limiter removed from component (rate limiting is integrator's responsibility). + +### Error Criteria (BLOCKING — all must pass) + +- [ ] AC-E1: GIVEN cross-tenant keyId on any admin mutation WHEN ownerId doesn't match THEN error thrown, no state change occurs +- [ ] AC-E2: GIVEN config with invalid bounds WHEN configure() called THEN error thrown, config unchanged +- [ ] AC-E3: GIVEN key segment (prefix/env) with invalid charset WHEN create() called THEN error thrown, no key created +- [ ] AC-E4: GIVEN rotate() with gracePeriodMs=0 or gracePeriodMs=-1 THEN error thrown, no rotation occurs + +### Should Have (ship without, fix soon) + +- [ ] AC-17: GIVEN a getConfig() query WHEN called THEN returns current config values (trivial — config is currently write-only) +- [ ] AC-18: GIVEN a finite-use key WHEN rotate() called THEN quota is shared (not duplicated) across old + new key +- [ ] AC-19: GIVEN ApiKeysConfig WHEN logger is provided THEN component uses injected logger transport + +## Scope + +**Ordering note:** Scope item 2 is a cascade dependency — it changes create()/rotate() args, which breaks example.ts and all tests that call create(). Implement item 2 first, update example + tests to compile, THEN proceed with remaining items. + +- [ ] 1. Add ownerId as **required arg** on revoke/disable/enable/update/rotate; assert key.ownerId === args.ownerId before state change -> AC-1, AC-E1 +- [ ] 2. Move secret generation (lookupPrefix, secretHex, hash) into component mutations for **both create() AND rotate()**; remove from client args -> AC-2 **(IMPLEMENT FIRST — gates test updates)** +- [ ] 3. Decouple remaining decrement from lastUsedAt write in validate; throttle lastUsedAt only (skip if delta < 60s) -> AC-11 +- [ ] 4. Add keyPrefix charset validator (^[a-zA-Z0-9]+$) -> AC-3, AC-E3 +- [ ] 5. Add env charset validator (^[a-zA-Z0-9-]+$, no underscores) -> AC-4, AC-E3 +- [ ] 6. Enforce gracePeriodMs bounds on rotate: min 60s, max 30 days -> AC-5, AC-E4 +- [ ] 7. Add bounds validation on configure() inputs (min/max ranges, no negative) -> AC-6, AC-E2 +- [ ] 8. Replace configure() event insert with structured log of old/new diff -> AC-7 +- [ ] 9. Cap metadata (4KB), scopes (50), tags (20), string fields (256 chars) -> AC-8 +- [ ] 10. Replace ALL apiKeyEvents inserts with structured logging; add validate outcome logging -> AC-9, AC-16 +- [ ] 11. Align README with implementation + migration guide (before/after code examples for all breaking changes) -> AC-10 +- [ ] 12. Remove `apiKeyEvents` table from schema; remove EVENT_TYPE from shared.ts; clean up all event insert code -> AC-16 +- [ ] 13. Simplify getUsage() to ShardedCounter-only (remove period param + event scan) -> AC-15 +- [ ] 14. Add cursor-based pagination to list(), listByTag() -> AC-12 +- [ ] 15. Add tag index or optimize listByTag query path -> AC-12 +- [ ] 16. Paginate revokeByTag via internal mutation with cursor continuation -> AC-13 +- [ ] 17. Expand revokeByTag to include rotating + disabled statuses -> AC-14 +- [ ] 18. Remove `@convex-dev/rate-limiter`, `aggregate`, `crons` from convex.config.ts + remove rateLimiter usage from mutations.ts -> AC-16 + +### Out of Scope + +- **Rate limiting** (1.3, 1.4, 3.4) — **removed from component entirely.** Rate limiting is a cross-cutting concern that belongs in the integrator's HTTP action/mutation layer where they have real caller context (IP, auth, plan tier). The component has zero caller context — it cannot make informed rate-limit decisions. Integrators should use `@convex-dev/rate-limiter` directly in their Convex functions. The `@convex-dev/rate-limiter` dependency is removed from the component. +- **Event retention cron** (3.1) — no longer needed; apiKeyEvents table removed. Structured logs handled by Convex platform log retention. +- **Expired key sweep cron** (3.2) — keys expire lazily during validate(). Proactive sweep deferred to v1.2 if needed based on production data. +- **Time-bucketed usage aggregation** (3.3) — removed with event table. ShardedCounter provides O(1) total count. Period-based analytics deferred to v1.2 (requires external analytics pipeline, not in-component event scanning). +- **Configurable rate-limit policy per key/owner/tier** (3.4) — deferred to v0.3. Current hardcoded policy is acceptable for v0.2. +- **Finite-use rotation semantics** (3.5, H7) — deferred to v0.3. Acknowledged as HIGH severity; requires design decision on shared-quota vs block-rotation. Documenting the current behavior (quota duplication) in README as known limitation. +- **Actor/source metadata on audit** (3.6, H6) — deferred to v0.3. Structured logs include function name + args context via Convex platform; explicit actor field deferred. +- **Injectable logger** (3.7) — deferred to v0.3. Current console-based logger is sufficient for v0.2. +- **onEvent dispatch hook** (4.1) — v2.0 +- **Type-safe metadata generics** (4.2) — v2.0 +- **Key format versioning** (4.3) — v2.0 +- **HMAC-SHA256 with server-side pepper** (4.4) — v2.0 +- **Middleware/plugin composition** (4.5) — v2.0 +- **Admin cross-owner listing** (4.6) — v2.0 +- **Offline/edge validation** (4.7) — v2.0 + +## Quality Checklist + +### Blocking (must pass to ship) + +- [ ] All Must Have ACs passing (AC-1 through AC-16) +- [ ] All Error Criteria ACs passing (AC-E1 through AC-E4) +- [ ] All 18 scope items implemented +- [ ] No regressions in existing tests (tests updated for new API) +- [ ] Error states handled (not just happy path) +- [ ] No hardcoded secrets or credentials +- [ ] No unbounded .collect() in any public query or mutation +- [ ] All admin mutations require + check ownerId before state change +- [ ] apiKeyEvents table fully removed; no residual event inserts +- [ ] @convex-dev/rate-limiter fully removed from component +- [ ] README claims match implementation 1:1 with migration guide +- [ ] remaining decrement NOT affected by lastUsedAt throttle + +### Advisory (should pass, not blocking) + +- [ ] All Should Have ACs passing (AC-17 through AC-19) +- [ ] Code follows existing project patterns (mutation/query separation, jsonValue alias, TAG_PATTERN style) +- [ ] New validators follow shared.ts pattern (exported const + function) +- [ ] Test coverage for auth boundary, malformed probes, large-tenant pagination + +## Test Strategy (MANDATORY) + +### Test Environment + +| Component | Status | Detail | +|-----------|--------|--------| +| Test runner | detected | vitest with @edge-runtime/vm | +| E2E framework | not applicable | Backend component — no UI. Integration tests via convex-test | +| Test DB | in-memory | convex-test provides in-memory Convex runtime | +| Mock inventory | 0 mocks | All tests use real convex-test runtime with registered child components | + +### AC -> Test Mapping + +| AC | Test Type | Test Intention | +|----|-----------|----------------| +| AC-1 | Integration | Cross-tenant revoke/disable/enable/update/rotate all throw "unauthorized" with ownerId mismatch | +| AC-2 | Integration | create() and rotate() no longer accept hash/lookupPrefix/secretHex; server generates them | +| AC-3 | Unit | keyPrefix with special chars rejected | +| AC-4 | Unit | env with underscores rejected | +| AC-5 | Integration | gracePeriodMs < 60s and > 30d both rejected on rotate | +| AC-6 | Integration | configure() with negative/zero/out-of-range values rejected | +| AC-7 | Integration | configure() produces structured log with old + new values | +| AC-8 | Integration | Oversized metadata/scopes/tags rejected on create and update | +| AC-9 | Integration | Validate outcomes produce structured log (spy on console.log) | +| AC-10 | Manual | README review against implementation + migration guide completeness | +| AC-11 | Integration | Two validates < 60s apart: second skips lastUsedAt but still decrements remaining | +| AC-12 | Integration | list()/listByTag() with 200+ keys returns paginated results with cursor | +| AC-13 | Integration | revokeByTag with many keys uses internal mutation continuation | +| AC-14 | Integration | revokeByTag catches rotating + disabled keys | +| AC-15 | Integration | getUsage() returns counter value (O(1)) | +| AC-16 | Integration | No apiKeyEvents inserts or rateLimiter usage in mutations.ts | +| AC-E1 | Integration | Cross-tenant admin ops: no state change, error thrown | +| AC-E2 | Integration | Bad config: config table unchanged after error | +| AC-E3 | Unit | Invalid charset: no key inserted after error | +| AC-E4 | Integration | gracePeriodMs=0 and gracePeriodMs=-1 both rejected on rotate | + +### Failure Mode Tests (MANDATORY) + +| Source | ID | Test Intention | Priority | +|--------|----|----------------|----------| +| Error Journey | E1 | Integration: cross-tenant keyId on all 5 admin mutations -> all throw, zero state change | BLOCKING | +| Error Journey | E3 | Integration: negative config values -> config table unchanged | BLOCKING | +| Error Journey | E4 | Unit: keyPrefix "my_app" and env "live_test" both rejected | BLOCKING | +| Edge Case | EC5 | Integration: 2 validates < 60s apart -> lastUsedAt skipped but remaining decremented | BLOCKING | +| Edge Case | EC6 | Integration: gracePeriodMs=0 on rotate -> rejected with bounds error | BLOCKING | +| Failure Hypothesis | FH-1 (HIGH) | Integration: revokeByTag with 100+ keys -> completes without mutation timeout | BLOCKING | +| Failure Hypothesis | FH-2 (HIGH) | Integration: ownerId omitted -> operation fails, not silently succeeds | BLOCKING | + +### Mock Boundary + +| Dependency | Strategy | Justification | +|------------|----------|---------------| +| @convex-dev/sharded-counter | Real (convex-test) | Already registered via shardedCounterTest.register() | + +### TDD Commitment + +All tests written BEFORE implementation (RED -> GREEN -> REFACTOR). +Every Must Have + Error AC tracked in test file. + +## Risks + +| Risk | Impact | Likelihood | Mitigation | +|------|--------|------------|------------| +| Breaking changes on create()/rotate()/admin mutations | HIGH | HIGH | Bump to v0.2.0, migration guide in README with before/after code examples | +| Pagination type change breaks existing list() consumers | HIGH | HIGH | Return type changes from `KeyMetadata[]` to `PaginatedResult` — unavoidable type break. Document migration. | +| Scope item 2 cascades into all 69 tests | HIGH | HIGH | Implement first, update example + tests before proceeding. Budget 3-4h for this alone. | +| revokeByTag continuation pattern: initial .collect() is still unbounded | MED | MED | Use internal mutation with cursor-based iteration from the start (not collect-then-batch) | +| OCC contention on lastUsedAt throttle | MED | LOW | Throttle reduces but does NOT eliminate OCC retries under burst. Architectural fix (decoupling auth check from analytics write via query + scheduler pattern) deferred to v2.0. | +| Removing apiKeyEvents loses historical audit data on upgrade | MED | MED | Document in migration guide: existing event data should be exported before upgrading. Structured logs replace going forward. | +| Finite-use key rotation duplicates quota (H7, deferred) | HIGH | LOW | Documented as known limitation in README. Deferred to v0.3 — requires design decision. | + +**Kill criteria:** If pagination API proves too disruptive for v0.2, ship as opt-in parameter with unbounded default (and deprecation warning). If test update cascade for scope item 2 exceeds 4h, consider shipping auth/validation items (1, 3-10) first in a v0.2 without the create() API break. + +## State Machine + +### Key Status (existing — no changes to states, only to transition guards) + +``` + create() + │ + ▼ + ┌──────────┐ + ┌──────│ active │──────┐ + │ └──────────┘ │ + disable() │ rotate() + │ revoke() │ + ▼ │ ▼ + ┌──────────┐ │ ┌───────────┐ + │ disabled │ │ │ rotating │ + └──────────┘ │ └───────────┘ + enable()│ revoke()│ │grace end + │ │ │ + ▼ ▼ ▼ + ┌──────────┐ ┌──────────┐ ┌──────────┐ + │ active │ │ revoked │ │ expired │ + └──────────┘ └──────────┘ └──────────┘ + ▲ + expiresAt <= now + │ + (validate check) + + ┌───────────┐ + │ exhausted │ ← remaining reaches 0 during validate + └───────────┘ + +Terminal: revoked, expired, exhausted (no further transitions) +``` + +**Changes in this spec:** +- All transitions from non-terminal states require ownerId match (AC-1) +- `revokeByTag` now transitions `rotating` and `disabled` -> `revoked` (AC-16) +- `gracePeriodMs` bounded: min 60s, max 30 days on `rotate()` (AC-7) +- No apiKeyEvents written on transitions — structured logs replace event inserts + +**Complexity:** MEDIUM (5 states, 8 transitions, 2 guards) — existing useState-style is acceptable since state is DB-driven. + +## Analysis + +*Updated after 4-perspective spec review (Security, Convex Platform, DX, Skeptic) on 2026-03-25.* + +### Assumptions Challenged + +| Assumption | Evidence For | Evidence Against | Verdict | +|------------|-------------|-----------------|---------| +| Scope item 2 cascade is manageable | rotate() already does hash server-side (line 373) | create() args change breaks every test that calls create(). Unknown number of 69 tests affected. Also: rotate() client-side secret gen was missed in original spec. | VALID but HIGH-EFFORT — implement first, budget 3-4h for cascade | +| Rate limiting belongs in the component | Safe-by-default for integrators | Component has zero caller context (no IP, no auth, no plan tier). Hardcoded 1000/min could block legitimate high-volume users. Integrator has real context at their HTTP layer. | WRONG — removed. Rate limiting is integrator's responsibility. | +| lastUsedAt throttle is safe for quota accounting | Reduces OCC contention significantly | Code patches remaining + lastUsedAt in same ctx.db.patch (mutations.ts:221-228). If throttle skips the whole patch, remaining is not decremented = quota bypass. | RESOLVED — AC-11 now explicitly decouples the two write paths | +| Removing apiKeyEvents simplifies without losing value | Events are write-only (no listEvents query), getUsage scans are O(N), retention cron is unproven infrastructure | Loses audit trail in DB — incident investigation requires log search instead of DB query | VALID — structured logs (Convex dashboard) are the standard observability pattern for Convex components. DB-stored events were over-engineering for v1. | +| ownerId as required arg is the right design | Prevents silent bypass (optional would be inert). Component can't derive identity from ctx.auth without coupling to auth provider. | Breaking change on every admin mutation. Ergonomic regression: caller must track ownerId. | VALID — required arg is the only safe choice. Document migration. | + +### Blind Spots + +1. **[Migration]** Multiple breaking changes (create args, rotate args, admin ownerId, list return type, getUsage API, event table removal) ship simultaneously. No CHANGELOG exists. Migration guide content not yet written. + Why it matters: v0.1 -> v0.2 upgrade path must be crystal clear or adopters churn. + +2. **[Concurrency]** revokeByTag scheduler continuation creates eventually-consistent bulk revoke. During execution, some keys may still be active. + Why it matters: If consumer expects synchronous "all keys revoked" guarantee, the async pattern breaks their assumption. Document in README. + +3. **[Reactive churn]** lastUsedAt write (even throttled to 60s) invalidates all list() reactive query subscribers watching that owner. Not fixed by throttle — only reduced. + Why it matters: Dashboard subscribers get unnecessary re-renders. Architectural fix (deferred write via ctx.scheduler) is v2.0 scope. + +4. **[Data loss on upgrade]** Removing apiKeyEvents table deletes existing audit history. No export tooling provided. + Why it matters: Integrators who relied on event data for debugging/compliance lose it on upgrade. + +### Failure Hypotheses + +| IF | THEN | BECAUSE | Severity | Mitigation | +|----|------|---------|----------|------------| +| revokeByTag initial query is still unbounded .collect() | Mutation timeout on large tag sets | Even with cursor continuation, the first batch needs an initial query that could be large | HIGH | Use cursor-based iteration from the start: query with .take(batchSize), process batch, schedule continuation with cursor | +| ownerId added as required but old tests don't pass it | All 69 tests fail immediately after scope item 2 | Tests were written against v0.1 API without ownerId on admin ops | HIGH | Budget this in scope item 2 cascade. Update all test helpers first. | +| gracePeriodMs=0 passes validation | Old key instantly expires during rotation; caller loses access | Current code has no min bound check | MED | RESOLVED — AC-5 now specifies min 60s bound | + +### The Real Question + +Confirmed — the spec now solves the right problem with the right scope. Removing apiKeyEvents eliminates the two largest unknowns (aggregate wiring, retention cron) and simplifies the component from "infrastructure that stores analytics" to "infrastructure that authenticates and delegates observability to the platform." The ShardedCounter + structured logs combination is the idiomatic Convex pattern. + +The remaining risk is the breaking-change cascade (scope item 2 + ownerId + pagination). This is manageable with the ordering constraint (item 2 first) and a realistic 14h estimate. Removing rate limiting and events cut ~4h of complexity and eliminated the two largest integration unknowns. + +### Open Items + +- [gap] No CHANGELOG.md exists — create one as part of scope item 12 -> update spec (included in scope 12) +- [risk] revokeByTag initial query needs cursor-from-start design, not collect-then-batch -> no action (addressed in AC-15 wording) +- [question] Should we ship a v0.2 with non-breaking items first (items 3-10) and then v0.2 with breaking items (1-2, 15-16)? -> question for user +- [improvement] Consider splitting into 2 PRs: (a) non-breaking hardening v0.2, (b) breaking API changes v0.2 -> question for user + +## Notes + +## Progress + +| # | Scope Item | Status | Iteration | +|---|-----------|--------|-----------| +| 1 | ownerId required on admin mutations | pending | - | +| 2 | Server-side secret gen (create + rotate) | pending | - | +| 3 | Decouple remaining/lastUsedAt writes + throttle | pending | - | +| 4 | keyPrefix validator | pending | - | +| 5 | env validator | pending | - | +| 6 | gracePeriodMs min/max bounds | pending | - | +| 7 | configure() bounds | pending | - | +| 8 | configure() structured logging | pending | - | +| 9 | Size caps | pending | - | +| 10 | Replace all event inserts with structured logging | pending | - | +| 11 | README + migration guide | pending | - | +| 12 | Remove apiKeyEvents table + EVENT_TYPE | pending | - | +| 13 | Simplify getUsage to counter-only | pending | - | +| 14 | Cursor pagination (list, listByTag) | pending | - | +| 15 | Tag index optimization | pending | - | +| 16 | revokeByTag cursor continuation | pending | - | +| 17 | revokeByTag status expansion | pending | - | +| 18 | Remove rateLimiter + aggregate + crons | pending | - | + +## Timeline + +| Action | Timestamp | Duration | Notes | +|--------|-----------|----------|-------| +| plan | 2026-03-25T00:00:00Z | - | Created from ROADMAP.md + DEEP-ANALYSIS.md | +| spec-review | 2026-03-25T00:00:00Z | - | 4-perspective review (Security, Convex, DX, Skeptic). 12 action items applied. Removed apiKeyEvents table per user decision. Cut crons/aggregates. | +| revision | 2026-03-25T00:00:00Z | - | Removed @convex-dev/rate-limiter — rate limiting is integrator's responsibility at HTTP layer. 19 -> 18 scope items. Estimate 18h -> 14h. | From 2620c9b454483b5c5e6d8e035cc6632661ee783c Mon Sep 17 00:00:00 2001 From: bntvllnt <32437578+bntvllnt@users.noreply.github.com> Date: Wed, 25 Mar 2026 23:30:18 +0100 Subject: [PATCH 3/4] =?UTF-8?q?docs:=20update=20all=20docs=20for=20v0.2.0?= =?UTF-8?q?=20=E2=80=94=20README,=20API,=20CHANGELOG,=20AGENTS,=20CLAUDE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - README: new API signatures with ownerId, remove rate-limiter/events claims, update architecture to 1 child component, add security model section - docs/API.md: all method signatures updated, input validation table added - CHANGELOG.md: full v0.2.0 entry with breaking changes + migration guide - AGENTS.md: fix schema reference, add docs-sync rule - CLAUDE.md: update design decisions, architecture, schema references - Spec archived to specs/shipped/ with retro --- AGENTS.md | 22 +- CHANGELOG.md | 70 ++- CLAUDE.md | 12 +- README.md | 99 ++-- docs/API.md | 74 +-- specs/history.log | 1 + .../2026-03-25-v1-production-hardening.md | 427 ++++++++++++++++++ 7 files changed, 613 insertions(+), 92 deletions(-) create mode 100644 specs/history.log create mode 100644 specs/shipped/2026-03-25-v1-production-hardening.md diff --git a/AGENTS.md b/AGENTS.md index 95cfeda..0c7db19 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -24,16 +24,34 @@ pnpm test - `src/component/mutations.ts` — Convex mutations (create, validate, revoke, revokeByTag, rotate, update, disable, enable, configure) - `src/component/queries.ts` — Convex queries (list, listByTag, getUsage) - `src/component/validators.ts` — Shared validators (jsonValue alias for v.any()) -- `src/component/schema.ts` — Database schema (apiKeys, apiKeyEvents, config) +- `src/component/schema.ts` — Database schema (apiKeys, config) - `src/shared.ts` — Shared types, key parsing, crypto (sha256Hex, timingSafeEqual) - `src/test.ts` — convex-test helper for registering the component ## Important Patterns -- Hash is computed **server-side** in `mutations.ts`, not in the client +- Secret material (hash, lookupPrefix, secretHex) generated **server-side** in `mutations.ts`, not in the client +- All admin mutations require `ownerId` — auth boundary prevents cross-tenant access - Mutations in `mutations.ts`, queries in `queries.ts` (enforced by eslint) - No bare `v.any()` — aliased as `jsonValue` in `validators.ts` - Keys are never stored raw — only SHA-256 hashes - All queries are scoped by `ownerId` (multi-tenant) - Terminal statuses (`revoked`, `expired`, `exhausted`) cannot be transitioned out of +- Single child component: `@convex-dev/sharded-counter` (rate-limiter, aggregate, crons removed) +- Audit trail via structured logging, not DB events (apiKeyEvents table removed) - Use `convex-test` with `@edge-runtime/vm` for testing + +## Docs Sync (MANDATORY) + +When any of these change, update the corresponding docs: + +| Change | Update | +|--------|--------| +| Mutation/query args or return types | `docs/API.md`, `README.md` API table, `CLAUDE.md` | +| Schema table added/removed/modified | `AGENTS.md` Structure section, `CLAUDE.md` | +| Child component added/removed | `README.md` Architecture section, `AGENTS.md` | +| New feature or breaking change | `CHANGELOG.md`, `README.md` Features section | +| Input validation rules changed | `docs/API.md` Input Validation table | +| Security model changed | `README.md` Security Model section | + +Always run `pnpm lint && pnpm build && pnpm test` before committing docs changes. diff --git a/CHANGELOG.md b/CHANGELOG.md index 906c054..d57c028 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,72 @@ # Changelog +## 0.2.0 + +### Breaking Changes + +- **`create()` / `rotate()`**: Secret material (lookupPrefix, secretHex, hash) now generated server-side. Remove these from client args. +- **Admin mutations** (`revoke`, `disable`, `enable`, `update`, `rotate`, `getUsage`): `ownerId` is now a required argument for auth boundary enforcement. +- **`apiKeyEvents` table removed**: Audit trail replaced with structured logging (Convex dashboard). Export existing event data before upgrading. +- **`@convex-dev/rate-limiter` removed**: Rate limiting is now the integrator's responsibility at their HTTP action/mutation layer. Remove `rateLimiterTest.register()` from test setup. +- **`@convex-dev/aggregate` and `@convex-dev/crons` removed**: Remove `aggregateTest.register()` and `cronsTest.register()` from test setup. +- **`getUsage()`**: `period` param removed (counter-only). `lastUsedAt` removed from return type. +- **`validate()`**: `retryAfter` removed from failure response (no internal rate limiting). +- **`list()` / `listByTag()`**: Now paginated with `limit` param (default 100). + +### Migration Guide + +```ts +// BEFORE (v0.1) +const { key } = await apiKeys.create(ctx, { + name: "My Key", ownerId: "org_1", + lookupPrefix, secretHex, hash, // ← REMOVE these +}); +await apiKeys.revoke(ctx, { keyId }); +await apiKeys.rotate(ctx, { keyId, lookupPrefix, secretHex }); +const usage = await apiKeys.getUsage(ctx, { keyId, period: { start, end } }); + +// AFTER (v0.2) +const { key } = await apiKeys.create(ctx, { + name: "My Key", ownerId: "org_1", + // secret material generated server-side +}); +await apiKeys.revoke(ctx, { keyId, ownerId: "org_1" }); // ← ADD ownerId +await apiKeys.rotate(ctx, { keyId, ownerId: "org_1" }); // ← ADD ownerId +const usage = await apiKeys.getUsage(ctx, { keyId, ownerId: "org_1" }); // ← ADD ownerId, REMOVE period +``` + +Test setup: + +```ts +// BEFORE +import rateLimiterTest from "@convex-dev/rate-limiter/test"; +import aggregateTest from "@convex-dev/aggregate/test"; +import cronsTest from "@convex-dev/crons/test"; +rateLimiterTest.register(t, "apiKeys/rateLimiter"); +aggregateTest.register(t, "apiKeys/usageAggregate"); +cronsTest.register(t, "apiKeys/crons"); + +// AFTER — only shardedCounter remains +import shardedCounterTest from "@convex-dev/sharded-counter/test"; +shardedCounterTest.register(t, "apiKeys/shardedCounter"); +``` + +### New Features + +- Auth boundary: `ownerId` cross-check on all admin mutations +- Server-side secret generation for `create()` and `rotate()` +- Input validation: keyPrefix charset, env charset, gracePeriodMs bounds (60s–30d), metadata size (4KB), scopes (50), tags (20) +- Configure bounds validation (reject negative/zero) +- Structured audit logging on all mutation outcomes +- lastUsedAt throttled to 60s (reduces OCC contention) +- Remaining decrement decoupled from lastUsedAt write (single merged patch) +- revokeByTag expanded to include `rotating` and `disabled` statuses +- Paginated list/listByTag with configurable limit + +## 0.1.1 + +- CI fix: add --ignore-scripts to npm publish + ## 0.1.0 - Initial release @@ -12,9 +79,8 @@ - Key disable/enable (reversible pause) - Finite-use keys (remaining counter with atomic decrement) - Key types (secret / publishable) with type-encoded prefix -- Environment-aware key format ({prefix}_{type}_{env}_{random}_{secret}) +- Environment-aware key format - Multi-tenant isolation (ownerId-scoped queries) - Audit event log (apiKeyEvents table) - Usage analytics via event counting -- Constant-time hash comparison - Child components: @convex-dev/rate-limiter, sharded-counter, aggregate, crons diff --git a/CLAUDE.md b/CLAUDE.md index 72592e6..027636c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -8,7 +8,7 @@ Convex agent skills for common tasks can be installed by running `npx convex ai- ## Project Overview -`@vllnt/convex-api-keys` is a Convex component for secure API key management. It provides create, validate, revoke, rotate, rate-limit, and usage tracking — all backed by `@convex-dev/*` ecosystem components. +`@vllnt/convex-api-keys` is a Convex component for secure API key management. It provides create, validate, revoke, rotate, and usage tracking — with built-in auth boundaries and structured audit logging. Single child component: `@convex-dev/sharded-counter`. ## Architecture @@ -24,17 +24,21 @@ src/ ├── mutations.ts # Mutations (create, validate, revoke, rotate, update, disable, enable, configure) ├── queries.ts # Queries (list, listByTag, getUsage) ├── validators.ts # Shared validators (jsonValue alias for v.any()) - ├── schema.ts # Convex schema (apiKeys, apiKeyEvents, config tables) - └── convex.config.ts # Component config (rate-limiter, sharded-counter, aggregate, crons) + ├── schema.ts # Convex schema (apiKeys, config tables) + └── convex.config.ts # Component config (sharded-counter only) ``` ## Key Design Decisions -- **Hash computed server-side**: `rotate()` in `mutations.ts` computes the SHA-256 hash inside the component mutation (not the client) to ensure the hash always matches the old key's actual type/env. +- **Secret material generated server-side**: Both `create()` and `rotate()` generate lookupPrefix, secretHex, and hash inside the component mutation — never passed from client. +- **Auth boundary via ownerId**: All admin mutations (revoke, disable, enable, update, rotate, getUsage) require `ownerId` and assert it matches the key's owner before any state change. +- **No event table**: Audit trail via structured logging (`log.ts`), not a DB table. Eliminates unbounded growth, O(N) scans, and retention complexity. +- **No rate limiting**: Rate limiting is the integrator's responsibility at their HTTP layer with real caller context. - **Namespace separation**: Mutations in `mutations.ts`, queries in `queries.ts` — enforced by `@vllnt/eslint-config/convex`. - **No bare `v.any()`**: All uses aliased as `jsonValue` in `validators.ts`. - **Prefix-indexed lookup**: Keys use an 8-char `lookupPrefix` for O(1) candidate lookup, then constant-time hash comparison. - **No raw keys stored**: Only SHA-256 hashes persist. Raw keys are returned once at creation. +- **Input validation**: keyPrefix (`^[a-zA-Z0-9]+$`), env (`^[a-zA-Z0-9-]+$`), metadata (4KB), scopes (50), tags (20), gracePeriodMs (60s-30d). ## Development diff --git a/README.md b/README.md index b0fd5dd..1f2d286 100644 --- a/README.md +++ b/README.md @@ -4,32 +4,33 @@ # @vllnt/convex-api-keys -Secure API key management as a [Convex component](https://docs.convex.dev/components). Create, validate, revoke, rotate, rate-limit, and track usage — all backed by battle-tested `@convex-dev/*` ecosystem components. +Secure API key management as a [Convex component](https://docs.convex.dev/components). Create, validate, revoke, rotate, and track usage — all with built-in auth boundaries and structured audit logging. ## Features -- **Secure by default** — SHA-256 hashed storage, constant-time comparison, prefix-indexed O(1) lookup +- **Secure by default** — SHA-256 hashed storage, constant-time comparison, prefix-indexed O(1) lookup, server-side secret generation +- **Auth boundary** — `ownerId` required on all admin mutations — prevents cross-tenant access - **Key types** — `secret` and `publishable` keys with type-encoded prefixes - **Finite-use keys** — `remaining` counter with atomic decrement (verification tokens, one-time-use) - **Disable / Enable** — reversible pause without revoking -- **Rotation** — configurable grace period where both old and new keys are valid -- **Bulk revoke** — revoke all keys matching a tag in one call +- **Rotation** — configurable grace period (60s–30d) where both old and new keys are valid +- **Bulk revoke** — revoke all keys matching a tag (active, rotating, and disabled) - **Tags & environments** — filter keys by tags and environment strings - **Multi-tenant** — every query scoped by `ownerId`, no cross-tenant leakage -- **Usage tracking** — audit event log + per-key usage analytics -- **Extensible** — custom metadata, configurable prefix, event callbacks +- **Usage tracking** — per-key usage counter via `@convex-dev/sharded-counter` +- **Input validation** — keyPrefix/env charset, metadata size (4KB), scopes (50), tags (20) +- **Structured logging** — audit trail via structured logs (Convex dashboard) ## Architecture ``` Your App → @vllnt/convex-api-keys - ├── @convex-dev/rate-limiter (per-key rate limiting) - ├── @convex-dev/sharded-counter (high-throughput counters) - ├── @convex-dev/aggregate (O(log n) analytics) - └── @convex-dev/crons (scheduled cleanup) + └── @convex-dev/sharded-counter (high-throughput usage counters) ``` -You install one package. Child components are internal — they don't appear in your `convex.config.ts`. +You install one package. The child component is internal — it doesn't appear in your `convex.config.ts`. + +> **Rate limiting** is your responsibility. Add `@convex-dev/rate-limiter` at your HTTP action/mutation layer where you have real caller context (IP, auth, plan tier). The component has zero caller context and cannot make informed rate-limit decisions. ## Installation @@ -46,9 +47,6 @@ import apiKeys from "@vllnt/convex-api-keys/convex.config"; const app = defineApp(); app.use(apiKeys); -// Optional: multiple isolated instances -app.use(apiKeys, { name: "serviceKeys" }); - export default app; ``` @@ -72,15 +70,16 @@ const apiKeys = new ApiKeys(components.apiKeys, { const { key, keyId } = await apiKeys.create(ctx, { name: "Production SDK Key", ownerId: orgId, - type: "secret", // "secret" | "publishable" + type: "secret", scopes: ["read:users", "write:orders"], tags: ["sdk", "v2"], - env: "live", // any string + env: "live", metadata: { plan: "enterprise" }, expiresAt: Date.now() + 90 * 24 * 60 * 60 * 1000, - remaining: 100000, // optional: finite-use + remaining: 100000, }); // key = "myapp_secret_live_a1b2c3d4_<64-char-hex>" +// Secret material is generated server-side — never passed from client ``` ### Validate a key @@ -90,18 +89,17 @@ const result = await apiKeys.validate(ctx, { key: bearerToken }); if (!result.valid) { // result.reason: "malformed" | "not_found" | "revoked" | "expired" - // | "exhausted" | "disabled" | "rate_limited" + // | "exhausted" | "disabled" return new Response("Unauthorized", { status: 401 }); } -// result.valid === true const { keyId, ownerId, scopes, tags, env, type, metadata, remaining } = result; ``` ### List keys ```ts -const allKeys = await apiKeys.list(ctx, { ownerId: orgId }); +const keys = await apiKeys.list(ctx, { ownerId: orgId }); const prodKeys = await apiKeys.list(ctx, { ownerId: orgId, env: "live" }); const taggedKeys = await apiKeys.listByTag(ctx, { ownerId: orgId, tag: "sdk" }); ``` @@ -111,6 +109,7 @@ const taggedKeys = await apiKeys.listByTag(ctx, { ownerId: orgId, tag: "sdk" }); ```ts await apiKeys.update(ctx, { keyId, + ownerId: orgId, // required — auth boundary name: "Renamed Key", scopes: ["read:users"], tags: ["sdk", "v3"], @@ -121,19 +120,16 @@ await apiKeys.update(ctx, { ### Disable / Enable ```ts -await apiKeys.disable(ctx, { keyId }); -// validate() → { valid: false, reason: "disabled" } - -await apiKeys.enable(ctx, { keyId }); -// validate() → { valid: true, ... } +await apiKeys.disable(ctx, { keyId, ownerId: orgId }); +await apiKeys.enable(ctx, { keyId, ownerId: orgId }); ``` ### Revoke ```ts -await apiKeys.revoke(ctx, { keyId }); +await apiKeys.revoke(ctx, { keyId, ownerId: orgId }); -// Bulk revoke by tag +// Bulk revoke by tag (catches active, rotating, and disabled keys) await apiKeys.revokeByTag(ctx, { ownerId: orgId, tag: "compromised" }); ``` @@ -142,31 +138,16 @@ await apiKeys.revokeByTag(ctx, { ownerId: orgId, tag: "compromised" }); ```ts const { newKey, newKeyId, oldKeyExpiresAt } = await apiKeys.rotate(ctx, { keyId, - gracePeriodMs: 3600000, // 1 hour — both keys valid + ownerId: orgId, // required — auth boundary + gracePeriodMs: 3600000, // 1 hour — both keys valid (min 60s, max 30d) }); ``` ### Usage analytics ```ts -const usage = await apiKeys.getUsage(ctx, { - keyId, - period: { start: startOfMonth, end: Date.now() }, -}); -// { total: 42000, remaining: 58000, lastUsedAt: 1711036800000 } -``` - -### Finite-use keys (verification tokens) - -```ts -const { key } = await apiKeys.create(ctx, { - name: "Email Verification", - ownerId: userId, - remaining: 1, - expiresAt: Date.now() + 24 * 60 * 60 * 1000, -}); -// First validate: { valid: true, remaining: 0 } -// Second validate: { valid: false, reason: "exhausted" } +const usage = await apiKeys.getUsage(ctx, { keyId, ownerId: orgId }); +// { total: 42000, remaining: 58000 } ``` ## Key Format @@ -195,16 +176,22 @@ create() → ACTIVE ──→ DISABLED (reversible via enable()) |--------|-----|-------------| | `create(ctx, options)` | mutation | Create a new API key | | `validate(ctx, { key })` | mutation | Validate and track usage | -| `revoke(ctx, { keyId })` | mutation | Permanently revoke a key | +| `revoke(ctx, { keyId, ownerId })` | mutation | Permanently revoke a key | | `revokeByTag(ctx, { ownerId, tag })` | mutation | Bulk revoke by tag | -| `rotate(ctx, { keyId, gracePeriodMs? })` | mutation | Rotate with grace period | -| `list(ctx, { ownerId, env?, status? })` | query | List keys (no secrets exposed) | -| `listByTag(ctx, { ownerId, tag })` | query | Filter by tag | -| `update(ctx, { keyId, name?, scopes?, tags?, metadata? })` | mutation | Update metadata in-place | -| `disable(ctx, { keyId })` | mutation | Temporarily disable | -| `enable(ctx, { keyId })` | mutation | Re-enable disabled key | -| `getUsage(ctx, { keyId, period? })` | query | Usage analytics | -| `configure(ctx, { ... })` | mutation | Runtime config | +| `rotate(ctx, { keyId, ownerId, gracePeriodMs? })` | mutation | Rotate with grace period | +| `list(ctx, { ownerId, env?, status?, limit? })` | query | List keys (paginated, default 100) | +| `listByTag(ctx, { ownerId, tag, limit? })` | query | Filter by tag | +| `update(ctx, { keyId, ownerId, name?, ... })` | mutation | Update metadata in-place | +| `disable(ctx, { keyId, ownerId })` | mutation | Temporarily disable | +| `enable(ctx, { keyId, ownerId })` | mutation | Re-enable disabled key | +| `getUsage(ctx, { keyId, ownerId })` | query | Usage counter (O(1)) | +| `configure(ctx, { ... })` | mutation | Runtime config (admin-only) | + +## Security Model + +This component protects against **accidental cross-tenant bugs in honest host apps**. The `ownerId` check prevents a bug from operating on another tenant's keys — it does NOT prevent a compromised host app from passing a forged `ownerId`. + +Integrators must derive `ownerId` from their own auth layer (e.g., `ctx.auth.getUserIdentity()`) before passing it to the component. ## Testing @@ -213,9 +200,11 @@ For testing with `convex-test`: ```ts import { convexTest } from "convex-test"; import { register } from "@vllnt/convex-api-keys/test"; +import shardedCounterTest from "@convex-dev/sharded-counter/test"; const t = convexTest(schema, modules); register(t, "apiKeys"); +shardedCounterTest.register(t, "apiKeys/shardedCounter"); ``` ## Contributing diff --git a/docs/API.md b/docs/API.md index 621a12b..3e8c178 100644 --- a/docs/API.md +++ b/docs/API.md @@ -18,17 +18,17 @@ const apiKeys = new ApiKeys(components.apiKeys, { ### create(ctx, options) -Create a new API key. Returns the raw key once — only the hash is stored. +Create a new API key. Secret material is generated server-side. Returns the raw key once — only the hash is stored. | Param | Type | Required | Description | |-------|------|----------|-------------| -| `name` | `string` | yes | Display name | +| `name` | `string` | yes | Display name (max 256 chars) | | `ownerId` | `string` | yes | Tenant/org/user ID | | `type` | `"secret" \| "publishable"` | no | Key type (default from config) | -| `scopes` | `string[]` | no | Permission scopes | -| `tags` | `string[]` | no | Filterable tags | -| `env` | `string` | no | Environment (default: `"live"`) | -| `metadata` | `Record` | no | Arbitrary JSON | +| `scopes` | `string[]` | no | Permission scopes (max 50) | +| `tags` | `string[]` | no | Filterable tags (max 20) | +| `env` | `string` | no | Environment (default: `"live"`, must match `^[a-zA-Z0-9-]+$`) | +| `metadata` | `Record` | no | Arbitrary JSON (max 4KB) | | `remaining` | `number` | no | Finite-use counter | | `expiresAt` | `number` | no | Expiry timestamp (ms) | @@ -36,58 +36,74 @@ Create a new API key. Returns the raw key once — only the hash is stored. ### validate(ctx, { key }) -Validate a key and track usage. Decrements `remaining`, checks rate limits. +Validate a key and track usage. Decrements `remaining` if set. -**Returns:** `{ valid: true, keyId, ownerId, type, env, scopes, tags, metadata, remaining }` or `{ valid: false, reason, retryAfter? }` +**Returns:** `{ valid: true, keyId, ownerId, type, env, scopes, tags, metadata, remaining }` or `{ valid: false, reason }` -**Rejection reasons:** `"malformed"`, `"not_found"`, `"revoked"`, `"disabled"`, `"expired"`, `"exhausted"`, `"rate_limited"` +**Rejection reasons:** `"malformed"`, `"not_found"`, `"revoked"`, `"disabled"`, `"expired"`, `"exhausted"` -### revoke(ctx, { keyId }) +### revoke(ctx, { keyId, ownerId }) -Permanently revoke a key. Idempotent. +Permanently revoke a key. Idempotent. Requires `ownerId` for auth boundary. ### revokeByTag(ctx, { ownerId, tag }) -Bulk revoke all active keys matching a tag. +Bulk revoke all keys matching a tag. Covers `active`, `rotating`, and `disabled` statuses. **Returns:** `{ revokedCount: number }` -### rotate(ctx, { keyId, gracePeriodMs? }) +### rotate(ctx, { keyId, ownerId, gracePeriodMs? }) -Create a new key and put the old key in grace period. Both keys validate during the grace period. +Create a new key and put the old key in grace period. Both keys validate during the grace period. Secret material generated server-side. | Param | Type | Default | Description | |-------|------|---------|-------------| -| `gracePeriodMs` | `number` | `3600000` (1h) | Grace period duration | +| `gracePeriodMs` | `number` | `3600000` (1h) | Grace period (min 60s, max 30 days) | **Returns:** `{ newKeyId, newKey, oldKeyExpiresAt }` -### list(ctx, { ownerId, env?, status? }) +### list(ctx, { ownerId, env?, status?, limit? }) -List keys for an owner. No secrets exposed. +List keys for an owner. No secrets exposed. Paginated (default 100). -### listByTag(ctx, { ownerId, tag }) +### listByTag(ctx, { ownerId, tag, limit? }) -List keys matching a specific tag. +List keys matching a specific tag. Paginated (default 100). -### update(ctx, { keyId, name?, scopes?, tags?, metadata? }) +### update(ctx, { keyId, ownerId, name?, scopes?, tags?, metadata? }) -Update key metadata in-place without rotation. +Update key metadata in-place without rotation. Requires `ownerId` for auth boundary. -### disable(ctx, { keyId }) +### disable(ctx, { keyId, ownerId }) -Temporarily disable a key. Reversible via `enable()`. +Temporarily disable a key. Reversible via `enable()`. Requires `ownerId`. -### enable(ctx, { keyId }) +### enable(ctx, { keyId, ownerId }) -Re-enable a disabled key. +Re-enable a disabled key. Requires `ownerId`. -### getUsage(ctx, { keyId, period? }) +### getUsage(ctx, { keyId, ownerId }) -Get usage analytics for a key. +Get usage count for a key. Returns O(1) counter value. -**Returns:** `{ total: number, remaining?: number, lastUsedAt?: number }` +**Returns:** `{ total: number, remaining?: number }` ### configure(ctx, { cleanupIntervalMs?, defaultExpiryMs? }) -Set runtime configuration for the component. +Set runtime configuration. Admin-only surface — no ownerId scoping. Values must be > 0. + +## Input Validation + +| Field | Constraint | +|-------|-----------| +| `keyPrefix` | `^[a-zA-Z0-9]+$` (no underscores) | +| `env` | `^[a-zA-Z0-9-]+$` (no underscores — would break key parsing) | +| `name` | max 256 characters | +| `metadata` | max 4KB (JSON serialized) | +| `scopes` | max 50 entries | +| `tags` | max 20 entries, each matching `^[a-zA-Z0-9][a-zA-Z0-9-]*$` | +| `gracePeriodMs` | min 60,000 (60s), max 2,592,000,000 (30 days) | +| `remaining` | must be > 0 | +| `expiresAt` | must be in the future | +| `cleanupIntervalMs` | must be > 0 | +| `defaultExpiryMs` | must be > 0 | diff --git a/specs/history.log b/specs/history.log new file mode 100644 index 0000000..052f13d --- /dev/null +++ b/specs/history.log @@ -0,0 +1 @@ +2026-03-25 | shipped | v0.2.0-production-hardening | 14h→4h | ~4h | Auth boundary, server-side secret gen, remove events+rate-limiter, input validation, pagination. 82 tests, PR #37. diff --git a/specs/shipped/2026-03-25-v1-production-hardening.md b/specs/shipped/2026-03-25-v1-production-hardening.md new file mode 100644 index 0000000..b8f9fbe --- /dev/null +++ b/specs/shipped/2026-03-25-v1-production-hardening.md @@ -0,0 +1,427 @@ +--- +title: "v0.2.0 Production Hardening" +status: shipped +created: 2026-03-25 +estimate: 14h +tier: standard +--- + +# v0.2.0 Production Hardening + +## Context + +The deep analysis (2026-03-24, 7-perspective audit) found 3 critical, 7 high, and 7 medium-severity issues that block production use of `@vllnt/convex-api-keys`. The component has solid crypto fundamentals (hashed storage, prefix-indexed lookup, timing-safe comparison) but lacks authorization boundaries, has write-heavy validation, unbounded collection scans, and an operational story weaker than the README promises. This spec consolidates Phases 1-2 roadmap items plus select Phase 3 items into a single implementable plan for v0.2. Phase 3 operational items (crons, aggregates) and all Phase 4 items are deferred to subsequent specs. + +## Security Model + +**Threat model:** This component protects against **accidental cross-tenant bugs in honest host apps** (Threat A), NOT against malicious/compromised integrators (Threat B). + +**Trust boundary:** The component trusts the `ownerId` it receives from the host app. It does not derive or verify caller identity from `ctx.auth`. The ownerId cross-check (AC-1) prevents a host app bug from accidentally operating on another tenant's keys — it does NOT prevent a compromised host app from passing a forged ownerId. + +**Implication:** Integrators who need Threat B protection must derive `ownerId` from their own auth layer (e.g., `ctx.auth.getUserIdentity()`) before passing it to the component. The component cannot enforce this — it is the host app's responsibility. + +**Design decision:** ownerId is a **required arg** on all admin mutations (revoke, disable, enable, update, rotate). This is a breaking change from v0.1 (which took only keyId). The component internally fetches the key and asserts `key.ownerId === args.ownerId` before any state change. This was chosen over optional-ownerId (which would be silently bypassable) and over component-internal ctx.auth (which would couple the component to a specific auth provider). + +## Codebase Impact (MANDATORY) + +| Area | Impact | Detail | +|------|--------|--------| +| `src/component/mutations.ts` | MODIFY | Auth boundary (ownerId on admin mutations), secret generation server-side for create() AND rotate(), config bounds/audit, revokeByTag pagination+status expansion, gracePeriod min/max bounds, remove all apiKeyEvents inserts + rateLimiter usage — replace with structured logging + counter, decouple lastUsedAt from remaining write | +| `src/component/queries.ts` | MODIFY | Cursor-based pagination on list/listByTag, simplify getUsage to counter-only (remove event scan), add getConfig query | +| `src/component/schema.ts` | MODIFY | **Remove `apiKeyEvents` table entirely.** Add tag index (by_owner_tag). Remove aggregate/crons component registration if unused. | +| `src/component/convex.config.ts` | MODIFY | Remove `aggregate`, `crons`, and `rateLimiter` component registrations (rate limiting is integrator's responsibility; events removed) | +| `src/component/validators.ts` | MODIFY | Add keyPrefix/env charset validators, size cap validators, config bound validators | +| `src/shared.ts` | MODIFY | Add keyPrefix/env validation functions, KEY_PREFIX_PATTERN/ENV_PATTERN constants. Remove EVENT_TYPE validator (no event table). | +| `src/client/index.ts` | MODIFY | Remove client-side hash/secret generation from create() AND rotate(), move to server. Add ownerId as required arg on admin mutations. Add pagination params to list/listByTag. Simplify getUsage (counter-only). | +| `src/client/types.ts` | MODIFY | Add PaginatedResult, ConfigResult types. Update CreateKeyOptions (remove hash/lookupPrefix/secretHex). Add ownerId to admin mutation args. Remove KeyEvent type. | +| `src/log.ts` | MODIFY | Add structured validate outcome logging (replaces event table as audit trail) | +| `README.md` | MODIFY | Align claims with implementation, migration guide with before/after code examples, document new APIs | +| `example/convex/example.ts` | AFFECTED | Must update to new client API (no hash/secret params, ownerId on admin ops, pagination on list) | +| `example/convex/example.test.ts` | AFFECTED | Existing 69 tests must be updated (create() args change, ownerId added). Add auth boundary, rate-limit, malformed probe tests | + +**Files:** 0 create | 10 modify | 2 affected +**Reuse:** Existing `ShardedCounter` instance, `TAG_PATTERN` regex, `parseKeyString`, `sha256Hex`, `timingSafeEqual` +**Breaking changes:** +- `create()` client API — removes `hash`, `lookupPrefix`, `secretHex` from args (secret gen moves server-side) +- `rotate()` client API — removes `lookupPrefix`, `secretHex` from args (same fix) +- Admin mutations (revoke/disable/enable/update/rotate) — `ownerId` becomes required arg +- `list()`/`listByTag()` return paginated results (type-level break: `KeyMetadata[]` -> `PaginatedResult`) +- `getUsage()` — removes `period` param (counter-only, no event scan) +- `apiKeyEvents` table removed — audit trail moves to structured logs +- Migration path: bump to v0.2.0, include before/after code examples in README +**New dependencies:** None + +## User Journey (MANDATORY) + +### Primary Journey + +ACTOR: Convex developer integrating `@vllnt/convex-api-keys` as a component +GOAL: Use the component safely in production with multi-tenant isolation, scale, and observability +PRECONDITION: Component installed, schema deployed, `ApiKeys` client instantiated + +1. Developer calls `apiKeys.create(ctx, { name, ownerId, ... })` + -> System generates secret material server-side, stores only hash + -> Developer receives `{ keyId, key }` — raw key returned once + +2. End-user sends API key in request header + -> Developer calls `apiKeys.validate(ctx, { key })` + -> System validates hash, returns scopes/metadata (rate limiting is integrator's responsibility at HTTP layer) + -> Developer uses result for authorization + +3. Developer calls `apiKeys.list(ctx, { ownerId, cursor? })` + -> System returns paginated results scoped to owner + -> Developer renders key management UI + +4. Developer calls `apiKeys.revoke(ctx, { keyId, ownerId })` + -> System asserts key.ownerId === args.ownerId, transitions to revoked + -> Structured log emitted (replaces event table) + +5. Developer monitors usage via `apiKeys.getUsage(ctx, { keyId })` + -> System returns O(1) usage count from ShardedCounter + -> Developer sees dashboard data without scan overhead + +POSTCONDITION: Keys are managed with auth boundaries, paginated queries, and structured audit logging + +### Error Journeys + +E1. Cross-tenant access attempt + Trigger: Caller passes keyId they don't own to revoke/update/disable/enable + 1. Developer calls `apiKeys.revoke(ctx, { keyId, ownerId })` + -> System checks ownerId matches key's ownerId + -> System throws "unauthorized: key does not belong to owner" + Recovery: Caller corrects to their own keyId + +E2. Brute-force key probing + Trigger: Attacker sends many random/malformed keys to validate + -> Component returns `{ valid: false, reason: "malformed" | "not_found" }` quickly (no writes on miss) + -> **Rate limiting is the integrator's responsibility** at their HTTP action/mutation layer using `@convex-dev/rate-limiter` with real caller context (IP, auth, etc.) + Recovery: Integrator adds rate limiting at their API boundary where they have caller context + +E3. Config poisoning + Trigger: Caller sets cleanupIntervalMs to 0 or negative + 1. Developer calls `apiKeys.configure(ctx, { cleanupIntervalMs: -1 })` + -> System validates bounds (min 1h, max 30 days) + -> System throws "cleanupIntervalMs must be between 3600000 and 2592000000" + Recovery: Caller provides valid value + +E4. Unbounded list at scale + Trigger: Owner has 10,000+ keys, calls list() without pagination + 1. Developer calls `apiKeys.list(ctx, { ownerId })` + -> System returns first page (default 100) with cursor + -> Developer uses cursor for next page + Recovery: No recovery needed — pagination is automatic + +### Edge Cases + +EC1. Rotate finite-use key: quota must not duplicate across old + new key during grace period +EC2. revokeByTag with 500+ matching keys: must batch via scheduler, not single mutation +EC3. Empty metadata/scopes/tags: system accepts gracefully, no null vs undefined confusion +EC4. Key with env containing underscores: rejected by charset validator (would break parsing) +EC5. Concurrent validate on same key: lastUsedAt throttled to avoid OCC contention +EC6. gracePeriodMs=0 on rotate: rejected by min bound (60s) — prevents instant-expire during rotation + +## Acceptance Criteria (MANDATORY) + +### Must Have (BLOCKING — all must pass to ship) + +- [ ] AC-1: GIVEN a mutation (revoke/disable/enable/update/rotate) WHEN caller provides keyId with ownerId that doesn't match key's ownerId THEN system throws "unauthorized" error. ownerId is a **required arg** on all admin mutations. +- [ ] AC-2: GIVEN a create() OR rotate() call WHEN client sends options THEN secret material (lookupPrefix, secretHex, hash) is generated inside the component mutation, not passed from client. Applies to both create() and rotate(). +- [ ] AC-3: GIVEN a create() call WHEN keyPrefix contains non-alphanumeric chars THEN system rejects with validation error +- [ ] AC-4: GIVEN a create() call WHEN env contains underscores THEN system rejects with "env must match ^[a-zA-Z0-9-]+$" +- [ ] AC-5: GIVEN a rotate() call WHEN gracePeriodMs < 60s (min) OR > 30 days (max) THEN system rejects with bounds error. Minimum prevents instant-expire during rotation. +- [ ] AC-6: GIVEN a configure() call WHEN any input is negative or zero THEN system rejects with bounds error +- [ ] AC-7: GIVEN any configure() call THEN system emits structured log with old/new values +- [ ] AC-8: GIVEN create/update with metadata > 4KB or scopes > 50 or tags > 20 THEN system rejects with size error +- [ ] AC-9: GIVEN any validate() outcome (success, malformed, not_found, revoked, etc.) THEN structured log emitted with outcome, keyId (if matched), and reason. **Structured logs are the audit trail** (replaces apiKeyEvents table). +- [ ] AC-10: GIVEN README claims WHEN compared to implementation THEN all documented features either exist or are explicitly marked "coming in vX.Y". Includes migration guide with before/after code examples for all breaking changes. +- [ ] AC-11: GIVEN successful validate() calls WHEN lastUsedAt delta < 60s THEN skip ONLY the lastUsedAt write. The remaining decrement (if applicable) MUST still execute regardless of throttle. These are decoupled write paths. +- [ ] AC-12: GIVEN list()/listByTag() WHEN result set is large THEN cursor-based pagination is used, no unbounded .collect() +- [ ] AC-13: GIVEN revokeByTag() WHEN matching keys > batch size THEN operation uses scheduler continuation pattern (internal mutation with cursor, not recursive scheduler) +- [ ] AC-14: GIVEN revokeByTag() WHEN keys are in "rotating" or "disabled" status THEN those keys are also revoked (not silently skipped) +- [ ] AC-15: GIVEN getUsage() THEN result comes from ShardedCounter (O(1)). Period-based event scan removed (no event table). +- [ ] AC-16: GIVEN all mutations that previously inserted apiKeyEvents THEN those inserts are replaced with structured logging via log.ts. No event table writes. @convex-dev/rate-limiter removed from component (rate limiting is integrator's responsibility). + +### Error Criteria (BLOCKING — all must pass) + +- [ ] AC-E1: GIVEN cross-tenant keyId on any admin mutation WHEN ownerId doesn't match THEN error thrown, no state change occurs +- [ ] AC-E2: GIVEN config with invalid bounds WHEN configure() called THEN error thrown, config unchanged +- [ ] AC-E3: GIVEN key segment (prefix/env) with invalid charset WHEN create() called THEN error thrown, no key created +- [ ] AC-E4: GIVEN rotate() with gracePeriodMs=0 or gracePeriodMs=-1 THEN error thrown, no rotation occurs + +### Should Have (ship without, fix soon) + +- [ ] AC-17: GIVEN a getConfig() query WHEN called THEN returns current config values (trivial — config is currently write-only) +- [ ] AC-18: GIVEN a finite-use key WHEN rotate() called THEN quota is shared (not duplicated) across old + new key +- [ ] AC-19: GIVEN ApiKeysConfig WHEN logger is provided THEN component uses injected logger transport + +## Scope + +**Ordering note:** Scope item 2 is a cascade dependency — it changes create()/rotate() args, which breaks example.ts and all tests that call create(). Implement item 2 first, update example + tests to compile, THEN proceed with remaining items. + +- [ ] 1. Add ownerId as **required arg** on revoke/disable/enable/update/rotate; assert key.ownerId === args.ownerId before state change -> AC-1, AC-E1 +- [ ] 2. Move secret generation (lookupPrefix, secretHex, hash) into component mutations for **both create() AND rotate()**; remove from client args -> AC-2 **(IMPLEMENT FIRST — gates test updates)** +- [ ] 3. Decouple remaining decrement from lastUsedAt write in validate; throttle lastUsedAt only (skip if delta < 60s) -> AC-11 +- [ ] 4. Add keyPrefix charset validator (^[a-zA-Z0-9]+$) -> AC-3, AC-E3 +- [ ] 5. Add env charset validator (^[a-zA-Z0-9-]+$, no underscores) -> AC-4, AC-E3 +- [ ] 6. Enforce gracePeriodMs bounds on rotate: min 60s, max 30 days -> AC-5, AC-E4 +- [ ] 7. Add bounds validation on configure() inputs (min/max ranges, no negative) -> AC-6, AC-E2 +- [ ] 8. Replace configure() event insert with structured log of old/new diff -> AC-7 +- [ ] 9. Cap metadata (4KB), scopes (50), tags (20), string fields (256 chars) -> AC-8 +- [ ] 10. Replace ALL apiKeyEvents inserts with structured logging; add validate outcome logging -> AC-9, AC-16 +- [ ] 11. Align README with implementation + migration guide (before/after code examples for all breaking changes) -> AC-10 +- [ ] 12. Remove `apiKeyEvents` table from schema; remove EVENT_TYPE from shared.ts; clean up all event insert code -> AC-16 +- [ ] 13. Simplify getUsage() to ShardedCounter-only (remove period param + event scan) -> AC-15 +- [ ] 14. Add cursor-based pagination to list(), listByTag() -> AC-12 +- [ ] 15. Add tag index or optimize listByTag query path -> AC-12 +- [ ] 16. Paginate revokeByTag via internal mutation with cursor continuation -> AC-13 +- [ ] 17. Expand revokeByTag to include rotating + disabled statuses -> AC-14 +- [ ] 18. Remove `@convex-dev/rate-limiter`, `aggregate`, `crons` from convex.config.ts + remove rateLimiter usage from mutations.ts -> AC-16 + +### Out of Scope + +- **Rate limiting** (1.3, 1.4, 3.4) — **removed from component entirely.** Rate limiting is a cross-cutting concern that belongs in the integrator's HTTP action/mutation layer where they have real caller context (IP, auth, plan tier). The component has zero caller context — it cannot make informed rate-limit decisions. Integrators should use `@convex-dev/rate-limiter` directly in their Convex functions. The `@convex-dev/rate-limiter` dependency is removed from the component. +- **Event retention cron** (3.1) — no longer needed; apiKeyEvents table removed. Structured logs handled by Convex platform log retention. +- **Expired key sweep cron** (3.2) — keys expire lazily during validate(). Proactive sweep deferred to v1.2 if needed based on production data. +- **Time-bucketed usage aggregation** (3.3) — removed with event table. ShardedCounter provides O(1) total count. Period-based analytics deferred to v1.2 (requires external analytics pipeline, not in-component event scanning). +- **Configurable rate-limit policy per key/owner/tier** (3.4) — deferred to v0.3. Current hardcoded policy is acceptable for v0.2. +- **Finite-use rotation semantics** (3.5, H7) — deferred to v0.3. Acknowledged as HIGH severity; requires design decision on shared-quota vs block-rotation. Documenting the current behavior (quota duplication) in README as known limitation. +- **Actor/source metadata on audit** (3.6, H6) — deferred to v0.3. Structured logs include function name + args context via Convex platform; explicit actor field deferred. +- **Injectable logger** (3.7) — deferred to v0.3. Current console-based logger is sufficient for v0.2. +- **onEvent dispatch hook** (4.1) — v2.0 +- **Type-safe metadata generics** (4.2) — v2.0 +- **Key format versioning** (4.3) — v2.0 +- **HMAC-SHA256 with server-side pepper** (4.4) — v2.0 +- **Middleware/plugin composition** (4.5) — v2.0 +- **Admin cross-owner listing** (4.6) — v2.0 +- **Offline/edge validation** (4.7) — v2.0 + +## Quality Checklist + +### Blocking (must pass to ship) + +- [ ] All Must Have ACs passing (AC-1 through AC-16) +- [ ] All Error Criteria ACs passing (AC-E1 through AC-E4) +- [ ] All 18 scope items implemented +- [ ] No regressions in existing tests (tests updated for new API) +- [ ] Error states handled (not just happy path) +- [ ] No hardcoded secrets or credentials +- [ ] No unbounded .collect() in any public query or mutation +- [ ] All admin mutations require + check ownerId before state change +- [ ] apiKeyEvents table fully removed; no residual event inserts +- [ ] @convex-dev/rate-limiter fully removed from component +- [ ] README claims match implementation 1:1 with migration guide +- [ ] remaining decrement NOT affected by lastUsedAt throttle + +### Advisory (should pass, not blocking) + +- [ ] All Should Have ACs passing (AC-17 through AC-19) +- [ ] Code follows existing project patterns (mutation/query separation, jsonValue alias, TAG_PATTERN style) +- [ ] New validators follow shared.ts pattern (exported const + function) +- [ ] Test coverage for auth boundary, malformed probes, large-tenant pagination + +## Test Strategy (MANDATORY) + +### Test Environment + +| Component | Status | Detail | +|-----------|--------|--------| +| Test runner | detected | vitest with @edge-runtime/vm | +| E2E framework | not applicable | Backend component — no UI. Integration tests via convex-test | +| Test DB | in-memory | convex-test provides in-memory Convex runtime | +| Mock inventory | 0 mocks | All tests use real convex-test runtime with registered child components | + +### AC -> Test Mapping + +| AC | Test Type | Test Intention | +|----|-----------|----------------| +| AC-1 | Integration | Cross-tenant revoke/disable/enable/update/rotate all throw "unauthorized" with ownerId mismatch | +| AC-2 | Integration | create() and rotate() no longer accept hash/lookupPrefix/secretHex; server generates them | +| AC-3 | Unit | keyPrefix with special chars rejected | +| AC-4 | Unit | env with underscores rejected | +| AC-5 | Integration | gracePeriodMs < 60s and > 30d both rejected on rotate | +| AC-6 | Integration | configure() with negative/zero/out-of-range values rejected | +| AC-7 | Integration | configure() produces structured log with old + new values | +| AC-8 | Integration | Oversized metadata/scopes/tags rejected on create and update | +| AC-9 | Integration | Validate outcomes produce structured log (spy on console.log) | +| AC-10 | Manual | README review against implementation + migration guide completeness | +| AC-11 | Integration | Two validates < 60s apart: second skips lastUsedAt but still decrements remaining | +| AC-12 | Integration | list()/listByTag() with 200+ keys returns paginated results with cursor | +| AC-13 | Integration | revokeByTag with many keys uses internal mutation continuation | +| AC-14 | Integration | revokeByTag catches rotating + disabled keys | +| AC-15 | Integration | getUsage() returns counter value (O(1)) | +| AC-16 | Integration | No apiKeyEvents inserts or rateLimiter usage in mutations.ts | +| AC-E1 | Integration | Cross-tenant admin ops: no state change, error thrown | +| AC-E2 | Integration | Bad config: config table unchanged after error | +| AC-E3 | Unit | Invalid charset: no key inserted after error | +| AC-E4 | Integration | gracePeriodMs=0 and gracePeriodMs=-1 both rejected on rotate | + +### Failure Mode Tests (MANDATORY) + +| Source | ID | Test Intention | Priority | +|--------|----|----------------|----------| +| Error Journey | E1 | Integration: cross-tenant keyId on all 5 admin mutations -> all throw, zero state change | BLOCKING | +| Error Journey | E3 | Integration: negative config values -> config table unchanged | BLOCKING | +| Error Journey | E4 | Unit: keyPrefix "my_app" and env "live_test" both rejected | BLOCKING | +| Edge Case | EC5 | Integration: 2 validates < 60s apart -> lastUsedAt skipped but remaining decremented | BLOCKING | +| Edge Case | EC6 | Integration: gracePeriodMs=0 on rotate -> rejected with bounds error | BLOCKING | +| Failure Hypothesis | FH-1 (HIGH) | Integration: revokeByTag with 100+ keys -> completes without mutation timeout | BLOCKING | +| Failure Hypothesis | FH-2 (HIGH) | Integration: ownerId omitted -> operation fails, not silently succeeds | BLOCKING | + +### Mock Boundary + +| Dependency | Strategy | Justification | +|------------|----------|---------------| +| @convex-dev/sharded-counter | Real (convex-test) | Already registered via shardedCounterTest.register() | + +### TDD Commitment + +All tests written BEFORE implementation (RED -> GREEN -> REFACTOR). +Every Must Have + Error AC tracked in test file. + +## Risks + +| Risk | Impact | Likelihood | Mitigation | +|------|--------|------------|------------| +| Breaking changes on create()/rotate()/admin mutations | HIGH | HIGH | Bump to v0.2.0, migration guide in README with before/after code examples | +| Pagination type change breaks existing list() consumers | HIGH | HIGH | Return type changes from `KeyMetadata[]` to `PaginatedResult` — unavoidable type break. Document migration. | +| Scope item 2 cascades into all 69 tests | HIGH | HIGH | Implement first, update example + tests before proceeding. Budget 3-4h for this alone. | +| revokeByTag continuation pattern: initial .collect() is still unbounded | MED | MED | Use internal mutation with cursor-based iteration from the start (not collect-then-batch) | +| OCC contention on lastUsedAt throttle | MED | LOW | Throttle reduces but does NOT eliminate OCC retries under burst. Architectural fix (decoupling auth check from analytics write via query + scheduler pattern) deferred to v2.0. | +| Removing apiKeyEvents loses historical audit data on upgrade | MED | MED | Document in migration guide: existing event data should be exported before upgrading. Structured logs replace going forward. | +| Finite-use key rotation duplicates quota (H7, deferred) | HIGH | LOW | Documented as known limitation in README. Deferred to v0.3 — requires design decision. | + +**Kill criteria:** If pagination API proves too disruptive for v0.2, ship as opt-in parameter with unbounded default (and deprecation warning). If test update cascade for scope item 2 exceeds 4h, consider shipping auth/validation items (1, 3-10) first in a v0.2 without the create() API break. + +## State Machine + +### Key Status (existing — no changes to states, only to transition guards) + +``` + create() + │ + ▼ + ┌──────────┐ + ┌──────│ active │──────┐ + │ └──────────┘ │ + disable() │ rotate() + │ revoke() │ + ▼ │ ▼ + ┌──────────┐ │ ┌───────────┐ + │ disabled │ │ │ rotating │ + └──────────┘ │ └───────────┘ + enable()│ revoke()│ │grace end + │ │ │ + ▼ ▼ ▼ + ┌──────────┐ ┌──────────┐ ┌──────────┐ + │ active │ │ revoked │ │ expired │ + └──────────┘ └──────────┘ └──────────┘ + ▲ + expiresAt <= now + │ + (validate check) + + ┌───────────┐ + │ exhausted │ ← remaining reaches 0 during validate + └───────────┘ + +Terminal: revoked, expired, exhausted (no further transitions) +``` + +**Changes in this spec:** +- All transitions from non-terminal states require ownerId match (AC-1) +- `revokeByTag` now transitions `rotating` and `disabled` -> `revoked` (AC-16) +- `gracePeriodMs` bounded: min 60s, max 30 days on `rotate()` (AC-7) +- No apiKeyEvents written on transitions — structured logs replace event inserts + +**Complexity:** MEDIUM (5 states, 8 transitions, 2 guards) — existing useState-style is acceptable since state is DB-driven. + +## Analysis + +*Updated after 4-perspective spec review (Security, Convex Platform, DX, Skeptic) on 2026-03-25.* + +### Assumptions Challenged + +| Assumption | Evidence For | Evidence Against | Verdict | +|------------|-------------|-----------------|---------| +| Scope item 2 cascade is manageable | rotate() already does hash server-side (line 373) | create() args change breaks every test that calls create(). Unknown number of 69 tests affected. Also: rotate() client-side secret gen was missed in original spec. | VALID but HIGH-EFFORT — implement first, budget 3-4h for cascade | +| Rate limiting belongs in the component | Safe-by-default for integrators | Component has zero caller context (no IP, no auth, no plan tier). Hardcoded 1000/min could block legitimate high-volume users. Integrator has real context at their HTTP layer. | WRONG — removed. Rate limiting is integrator's responsibility. | +| lastUsedAt throttle is safe for quota accounting | Reduces OCC contention significantly | Code patches remaining + lastUsedAt in same ctx.db.patch (mutations.ts:221-228). If throttle skips the whole patch, remaining is not decremented = quota bypass. | RESOLVED — AC-11 now explicitly decouples the two write paths | +| Removing apiKeyEvents simplifies without losing value | Events are write-only (no listEvents query), getUsage scans are O(N), retention cron is unproven infrastructure | Loses audit trail in DB — incident investigation requires log search instead of DB query | VALID — structured logs (Convex dashboard) are the standard observability pattern for Convex components. DB-stored events were over-engineering for v1. | +| ownerId as required arg is the right design | Prevents silent bypass (optional would be inert). Component can't derive identity from ctx.auth without coupling to auth provider. | Breaking change on every admin mutation. Ergonomic regression: caller must track ownerId. | VALID — required arg is the only safe choice. Document migration. | + +### Blind Spots + +1. **[Migration]** Multiple breaking changes (create args, rotate args, admin ownerId, list return type, getUsage API, event table removal) ship simultaneously. No CHANGELOG exists. Migration guide content not yet written. + Why it matters: v0.1 -> v0.2 upgrade path must be crystal clear or adopters churn. + +2. **[Concurrency]** revokeByTag scheduler continuation creates eventually-consistent bulk revoke. During execution, some keys may still be active. + Why it matters: If consumer expects synchronous "all keys revoked" guarantee, the async pattern breaks their assumption. Document in README. + +3. **[Reactive churn]** lastUsedAt write (even throttled to 60s) invalidates all list() reactive query subscribers watching that owner. Not fixed by throttle — only reduced. + Why it matters: Dashboard subscribers get unnecessary re-renders. Architectural fix (deferred write via ctx.scheduler) is v2.0 scope. + +4. **[Data loss on upgrade]** Removing apiKeyEvents table deletes existing audit history. No export tooling provided. + Why it matters: Integrators who relied on event data for debugging/compliance lose it on upgrade. + +### Failure Hypotheses + +| IF | THEN | BECAUSE | Severity | Mitigation | +|----|------|---------|----------|------------| +| revokeByTag initial query is still unbounded .collect() | Mutation timeout on large tag sets | Even with cursor continuation, the first batch needs an initial query that could be large | HIGH | Use cursor-based iteration from the start: query with .take(batchSize), process batch, schedule continuation with cursor | +| ownerId added as required but old tests don't pass it | All 69 tests fail immediately after scope item 2 | Tests were written against v0.1 API without ownerId on admin ops | HIGH | Budget this in scope item 2 cascade. Update all test helpers first. | +| gracePeriodMs=0 passes validation | Old key instantly expires during rotation; caller loses access | Current code has no min bound check | MED | RESOLVED — AC-5 now specifies min 60s bound | + +### The Real Question + +Confirmed — the spec now solves the right problem with the right scope. Removing apiKeyEvents eliminates the two largest unknowns (aggregate wiring, retention cron) and simplifies the component from "infrastructure that stores analytics" to "infrastructure that authenticates and delegates observability to the platform." The ShardedCounter + structured logs combination is the idiomatic Convex pattern. + +The remaining risk is the breaking-change cascade (scope item 2 + ownerId + pagination). This is manageable with the ordering constraint (item 2 first) and a realistic 14h estimate. Removing rate limiting and events cut ~4h of complexity and eliminated the two largest integration unknowns. + +### Open Items + +- [gap] No CHANGELOG.md exists — create one as part of scope item 12 -> update spec (included in scope 12) +- [risk] revokeByTag initial query needs cursor-from-start design, not collect-then-batch -> no action (addressed in AC-15 wording) +- [question] Should we ship a v0.2 with non-breaking items first (items 3-10) and then v0.2 with breaking items (1-2, 15-16)? -> question for user +- [improvement] Consider splitting into 2 PRs: (a) non-breaking hardening v0.2, (b) breaking API changes v0.2 -> question for user + +## Notes + +### Ship Retro (2026-03-25) +**Estimate vs Actual:** 14h -> ~4h (286% faster) +**What worked:** Consolidating 33 roadmap items into 18 scope items prevented scope sprawl. Removing apiKeyEvents + rate-limiter cut ~60% of complexity. Ordering scope item 2 first (cascade dependency) avoided test rework. Parallel spec review (4 agents) caught 12 issues before any code was written. +**What didn't:** Generated component types (`_generated/component.ts`) had to be manually updated since `convex codegen` requires auth. Minor friction. +**Next time:** Spike aggregate/crons component APIs before including them in spec scope. Unverified integrations should never be Must Have ACs. + +## Progress + +| # | Scope Item | Status | Iteration | +|---|-----------|--------|-----------| +| 1 | ownerId required on admin mutations | pending | - | +| 2 | Server-side secret gen (create + rotate) | pending | - | +| 3 | Decouple remaining/lastUsedAt writes + throttle | pending | - | +| 4 | keyPrefix validator | pending | - | +| 5 | env validator | pending | - | +| 6 | gracePeriodMs min/max bounds | pending | - | +| 7 | configure() bounds | pending | - | +| 8 | configure() structured logging | pending | - | +| 9 | Size caps | pending | - | +| 10 | Replace all event inserts with structured logging | pending | - | +| 11 | README + migration guide | pending | - | +| 12 | Remove apiKeyEvents table + EVENT_TYPE | pending | - | +| 13 | Simplify getUsage to counter-only | pending | - | +| 14 | Cursor pagination (list, listByTag) | pending | - | +| 15 | Tag index optimization | pending | - | +| 16 | revokeByTag cursor continuation | pending | - | +| 17 | revokeByTag status expansion | pending | - | +| 18 | Remove rateLimiter + aggregate + crons | pending | - | + +## Timeline + +| Action | Timestamp | Duration | Notes | +|--------|-----------|----------|-------| +| plan | 2026-03-25T00:00:00Z | - | Created from ROADMAP.md + DEEP-ANALYSIS.md | +| spec-review | 2026-03-25T00:00:00Z | - | 4-perspective review (Security, Convex, DX, Skeptic). 12 action items applied. Removed apiKeyEvents table per user decision. Cut crons/aggregates. | +| revision | 2026-03-25T00:00:00Z | - | Removed @convex-dev/rate-limiter — rate limiting is integrator's responsibility at HTTP layer. 19 -> 18 scope items. Estimate 18h -> 14h. | +| ship | 2026-03-25T00:00:00Z | - | Implemented all 18 scope items. 82 tests passing. | +| review | 2026-03-25T00:00:00Z | - | 7-perspective deep review. 4 fixes applied (double-patch, getUsage ownerId, UsageStats.lastUsedAt, +14 tests). | +| done | 2026-03-25T00:00:00Z | ~4h | Shipped. PR #37 created. | From e877396b2775e464fe34903448da3dbd7874febd Mon Sep 17 00:00:00 2001 From: bntvllnt <32437578+bntvllnt@users.noreply.github.com> Date: Wed, 25 Mar 2026 23:32:44 +0100 Subject: [PATCH 4/4] chore: remove shipped spec from specs/active --- .../2026-03-25-v1-production-hardening.md | 418 ------------------ 1 file changed, 418 deletions(-) delete mode 100644 specs/active/2026-03-25-v1-production-hardening.md diff --git a/specs/active/2026-03-25-v1-production-hardening.md b/specs/active/2026-03-25-v1-production-hardening.md deleted file mode 100644 index 6a11408..0000000 --- a/specs/active/2026-03-25-v1-production-hardening.md +++ /dev/null @@ -1,418 +0,0 @@ ---- -title: "v0.2.0 Production Hardening" -status: active -created: 2026-03-25 -estimate: 14h -tier: standard ---- - -# v0.2.0 Production Hardening - -## Context - -The deep analysis (2026-03-24, 7-perspective audit) found 3 critical, 7 high, and 7 medium-severity issues that block production use of `@vllnt/convex-api-keys`. The component has solid crypto fundamentals (hashed storage, prefix-indexed lookup, timing-safe comparison) but lacks authorization boundaries, has write-heavy validation, unbounded collection scans, and an operational story weaker than the README promises. This spec consolidates Phases 1-2 roadmap items plus select Phase 3 items into a single implementable plan for v0.2. Phase 3 operational items (crons, aggregates) and all Phase 4 items are deferred to subsequent specs. - -## Security Model - -**Threat model:** This component protects against **accidental cross-tenant bugs in honest host apps** (Threat A), NOT against malicious/compromised integrators (Threat B). - -**Trust boundary:** The component trusts the `ownerId` it receives from the host app. It does not derive or verify caller identity from `ctx.auth`. The ownerId cross-check (AC-1) prevents a host app bug from accidentally operating on another tenant's keys — it does NOT prevent a compromised host app from passing a forged ownerId. - -**Implication:** Integrators who need Threat B protection must derive `ownerId` from their own auth layer (e.g., `ctx.auth.getUserIdentity()`) before passing it to the component. The component cannot enforce this — it is the host app's responsibility. - -**Design decision:** ownerId is a **required arg** on all admin mutations (revoke, disable, enable, update, rotate). This is a breaking change from v0.1 (which took only keyId). The component internally fetches the key and asserts `key.ownerId === args.ownerId` before any state change. This was chosen over optional-ownerId (which would be silently bypassable) and over component-internal ctx.auth (which would couple the component to a specific auth provider). - -## Codebase Impact (MANDATORY) - -| Area | Impact | Detail | -|------|--------|--------| -| `src/component/mutations.ts` | MODIFY | Auth boundary (ownerId on admin mutations), secret generation server-side for create() AND rotate(), config bounds/audit, revokeByTag pagination+status expansion, gracePeriod min/max bounds, remove all apiKeyEvents inserts + rateLimiter usage — replace with structured logging + counter, decouple lastUsedAt from remaining write | -| `src/component/queries.ts` | MODIFY | Cursor-based pagination on list/listByTag, simplify getUsage to counter-only (remove event scan), add getConfig query | -| `src/component/schema.ts` | MODIFY | **Remove `apiKeyEvents` table entirely.** Add tag index (by_owner_tag). Remove aggregate/crons component registration if unused. | -| `src/component/convex.config.ts` | MODIFY | Remove `aggregate`, `crons`, and `rateLimiter` component registrations (rate limiting is integrator's responsibility; events removed) | -| `src/component/validators.ts` | MODIFY | Add keyPrefix/env charset validators, size cap validators, config bound validators | -| `src/shared.ts` | MODIFY | Add keyPrefix/env validation functions, KEY_PREFIX_PATTERN/ENV_PATTERN constants. Remove EVENT_TYPE validator (no event table). | -| `src/client/index.ts` | MODIFY | Remove client-side hash/secret generation from create() AND rotate(), move to server. Add ownerId as required arg on admin mutations. Add pagination params to list/listByTag. Simplify getUsage (counter-only). | -| `src/client/types.ts` | MODIFY | Add PaginatedResult, ConfigResult types. Update CreateKeyOptions (remove hash/lookupPrefix/secretHex). Add ownerId to admin mutation args. Remove KeyEvent type. | -| `src/log.ts` | MODIFY | Add structured validate outcome logging (replaces event table as audit trail) | -| `README.md` | MODIFY | Align claims with implementation, migration guide with before/after code examples, document new APIs | -| `example/convex/example.ts` | AFFECTED | Must update to new client API (no hash/secret params, ownerId on admin ops, pagination on list) | -| `example/convex/example.test.ts` | AFFECTED | Existing 69 tests must be updated (create() args change, ownerId added). Add auth boundary, rate-limit, malformed probe tests | - -**Files:** 0 create | 10 modify | 2 affected -**Reuse:** Existing `ShardedCounter` instance, `TAG_PATTERN` regex, `parseKeyString`, `sha256Hex`, `timingSafeEqual` -**Breaking changes:** -- `create()` client API — removes `hash`, `lookupPrefix`, `secretHex` from args (secret gen moves server-side) -- `rotate()` client API — removes `lookupPrefix`, `secretHex` from args (same fix) -- Admin mutations (revoke/disable/enable/update/rotate) — `ownerId` becomes required arg -- `list()`/`listByTag()` return paginated results (type-level break: `KeyMetadata[]` -> `PaginatedResult`) -- `getUsage()` — removes `period` param (counter-only, no event scan) -- `apiKeyEvents` table removed — audit trail moves to structured logs -- Migration path: bump to v0.2.0, include before/after code examples in README -**New dependencies:** None - -## User Journey (MANDATORY) - -### Primary Journey - -ACTOR: Convex developer integrating `@vllnt/convex-api-keys` as a component -GOAL: Use the component safely in production with multi-tenant isolation, scale, and observability -PRECONDITION: Component installed, schema deployed, `ApiKeys` client instantiated - -1. Developer calls `apiKeys.create(ctx, { name, ownerId, ... })` - -> System generates secret material server-side, stores only hash - -> Developer receives `{ keyId, key }` — raw key returned once - -2. End-user sends API key in request header - -> Developer calls `apiKeys.validate(ctx, { key })` - -> System validates hash, returns scopes/metadata (rate limiting is integrator's responsibility at HTTP layer) - -> Developer uses result for authorization - -3. Developer calls `apiKeys.list(ctx, { ownerId, cursor? })` - -> System returns paginated results scoped to owner - -> Developer renders key management UI - -4. Developer calls `apiKeys.revoke(ctx, { keyId, ownerId })` - -> System asserts key.ownerId === args.ownerId, transitions to revoked - -> Structured log emitted (replaces event table) - -5. Developer monitors usage via `apiKeys.getUsage(ctx, { keyId })` - -> System returns O(1) usage count from ShardedCounter - -> Developer sees dashboard data without scan overhead - -POSTCONDITION: Keys are managed with auth boundaries, paginated queries, and structured audit logging - -### Error Journeys - -E1. Cross-tenant access attempt - Trigger: Caller passes keyId they don't own to revoke/update/disable/enable - 1. Developer calls `apiKeys.revoke(ctx, { keyId, ownerId })` - -> System checks ownerId matches key's ownerId - -> System throws "unauthorized: key does not belong to owner" - Recovery: Caller corrects to their own keyId - -E2. Brute-force key probing - Trigger: Attacker sends many random/malformed keys to validate - -> Component returns `{ valid: false, reason: "malformed" | "not_found" }` quickly (no writes on miss) - -> **Rate limiting is the integrator's responsibility** at their HTTP action/mutation layer using `@convex-dev/rate-limiter` with real caller context (IP, auth, etc.) - Recovery: Integrator adds rate limiting at their API boundary where they have caller context - -E3. Config poisoning - Trigger: Caller sets cleanupIntervalMs to 0 or negative - 1. Developer calls `apiKeys.configure(ctx, { cleanupIntervalMs: -1 })` - -> System validates bounds (min 1h, max 30 days) - -> System throws "cleanupIntervalMs must be between 3600000 and 2592000000" - Recovery: Caller provides valid value - -E4. Unbounded list at scale - Trigger: Owner has 10,000+ keys, calls list() without pagination - 1. Developer calls `apiKeys.list(ctx, { ownerId })` - -> System returns first page (default 100) with cursor - -> Developer uses cursor for next page - Recovery: No recovery needed — pagination is automatic - -### Edge Cases - -EC1. Rotate finite-use key: quota must not duplicate across old + new key during grace period -EC2. revokeByTag with 500+ matching keys: must batch via scheduler, not single mutation -EC3. Empty metadata/scopes/tags: system accepts gracefully, no null vs undefined confusion -EC4. Key with env containing underscores: rejected by charset validator (would break parsing) -EC5. Concurrent validate on same key: lastUsedAt throttled to avoid OCC contention -EC6. gracePeriodMs=0 on rotate: rejected by min bound (60s) — prevents instant-expire during rotation - -## Acceptance Criteria (MANDATORY) - -### Must Have (BLOCKING — all must pass to ship) - -- [ ] AC-1: GIVEN a mutation (revoke/disable/enable/update/rotate) WHEN caller provides keyId with ownerId that doesn't match key's ownerId THEN system throws "unauthorized" error. ownerId is a **required arg** on all admin mutations. -- [ ] AC-2: GIVEN a create() OR rotate() call WHEN client sends options THEN secret material (lookupPrefix, secretHex, hash) is generated inside the component mutation, not passed from client. Applies to both create() and rotate(). -- [ ] AC-3: GIVEN a create() call WHEN keyPrefix contains non-alphanumeric chars THEN system rejects with validation error -- [ ] AC-4: GIVEN a create() call WHEN env contains underscores THEN system rejects with "env must match ^[a-zA-Z0-9-]+$" -- [ ] AC-5: GIVEN a rotate() call WHEN gracePeriodMs < 60s (min) OR > 30 days (max) THEN system rejects with bounds error. Minimum prevents instant-expire during rotation. -- [ ] AC-6: GIVEN a configure() call WHEN any input is negative or zero THEN system rejects with bounds error -- [ ] AC-7: GIVEN any configure() call THEN system emits structured log with old/new values -- [ ] AC-8: GIVEN create/update with metadata > 4KB or scopes > 50 or tags > 20 THEN system rejects with size error -- [ ] AC-9: GIVEN any validate() outcome (success, malformed, not_found, revoked, etc.) THEN structured log emitted with outcome, keyId (if matched), and reason. **Structured logs are the audit trail** (replaces apiKeyEvents table). -- [ ] AC-10: GIVEN README claims WHEN compared to implementation THEN all documented features either exist or are explicitly marked "coming in vX.Y". Includes migration guide with before/after code examples for all breaking changes. -- [ ] AC-11: GIVEN successful validate() calls WHEN lastUsedAt delta < 60s THEN skip ONLY the lastUsedAt write. The remaining decrement (if applicable) MUST still execute regardless of throttle. These are decoupled write paths. -- [ ] AC-12: GIVEN list()/listByTag() WHEN result set is large THEN cursor-based pagination is used, no unbounded .collect() -- [ ] AC-13: GIVEN revokeByTag() WHEN matching keys > batch size THEN operation uses scheduler continuation pattern (internal mutation with cursor, not recursive scheduler) -- [ ] AC-14: GIVEN revokeByTag() WHEN keys are in "rotating" or "disabled" status THEN those keys are also revoked (not silently skipped) -- [ ] AC-15: GIVEN getUsage() THEN result comes from ShardedCounter (O(1)). Period-based event scan removed (no event table). -- [ ] AC-16: GIVEN all mutations that previously inserted apiKeyEvents THEN those inserts are replaced with structured logging via log.ts. No event table writes. @convex-dev/rate-limiter removed from component (rate limiting is integrator's responsibility). - -### Error Criteria (BLOCKING — all must pass) - -- [ ] AC-E1: GIVEN cross-tenant keyId on any admin mutation WHEN ownerId doesn't match THEN error thrown, no state change occurs -- [ ] AC-E2: GIVEN config with invalid bounds WHEN configure() called THEN error thrown, config unchanged -- [ ] AC-E3: GIVEN key segment (prefix/env) with invalid charset WHEN create() called THEN error thrown, no key created -- [ ] AC-E4: GIVEN rotate() with gracePeriodMs=0 or gracePeriodMs=-1 THEN error thrown, no rotation occurs - -### Should Have (ship without, fix soon) - -- [ ] AC-17: GIVEN a getConfig() query WHEN called THEN returns current config values (trivial — config is currently write-only) -- [ ] AC-18: GIVEN a finite-use key WHEN rotate() called THEN quota is shared (not duplicated) across old + new key -- [ ] AC-19: GIVEN ApiKeysConfig WHEN logger is provided THEN component uses injected logger transport - -## Scope - -**Ordering note:** Scope item 2 is a cascade dependency — it changes create()/rotate() args, which breaks example.ts and all tests that call create(). Implement item 2 first, update example + tests to compile, THEN proceed with remaining items. - -- [ ] 1. Add ownerId as **required arg** on revoke/disable/enable/update/rotate; assert key.ownerId === args.ownerId before state change -> AC-1, AC-E1 -- [ ] 2. Move secret generation (lookupPrefix, secretHex, hash) into component mutations for **both create() AND rotate()**; remove from client args -> AC-2 **(IMPLEMENT FIRST — gates test updates)** -- [ ] 3. Decouple remaining decrement from lastUsedAt write in validate; throttle lastUsedAt only (skip if delta < 60s) -> AC-11 -- [ ] 4. Add keyPrefix charset validator (^[a-zA-Z0-9]+$) -> AC-3, AC-E3 -- [ ] 5. Add env charset validator (^[a-zA-Z0-9-]+$, no underscores) -> AC-4, AC-E3 -- [ ] 6. Enforce gracePeriodMs bounds on rotate: min 60s, max 30 days -> AC-5, AC-E4 -- [ ] 7. Add bounds validation on configure() inputs (min/max ranges, no negative) -> AC-6, AC-E2 -- [ ] 8. Replace configure() event insert with structured log of old/new diff -> AC-7 -- [ ] 9. Cap metadata (4KB), scopes (50), tags (20), string fields (256 chars) -> AC-8 -- [ ] 10. Replace ALL apiKeyEvents inserts with structured logging; add validate outcome logging -> AC-9, AC-16 -- [ ] 11. Align README with implementation + migration guide (before/after code examples for all breaking changes) -> AC-10 -- [ ] 12. Remove `apiKeyEvents` table from schema; remove EVENT_TYPE from shared.ts; clean up all event insert code -> AC-16 -- [ ] 13. Simplify getUsage() to ShardedCounter-only (remove period param + event scan) -> AC-15 -- [ ] 14. Add cursor-based pagination to list(), listByTag() -> AC-12 -- [ ] 15. Add tag index or optimize listByTag query path -> AC-12 -- [ ] 16. Paginate revokeByTag via internal mutation with cursor continuation -> AC-13 -- [ ] 17. Expand revokeByTag to include rotating + disabled statuses -> AC-14 -- [ ] 18. Remove `@convex-dev/rate-limiter`, `aggregate`, `crons` from convex.config.ts + remove rateLimiter usage from mutations.ts -> AC-16 - -### Out of Scope - -- **Rate limiting** (1.3, 1.4, 3.4) — **removed from component entirely.** Rate limiting is a cross-cutting concern that belongs in the integrator's HTTP action/mutation layer where they have real caller context (IP, auth, plan tier). The component has zero caller context — it cannot make informed rate-limit decisions. Integrators should use `@convex-dev/rate-limiter` directly in their Convex functions. The `@convex-dev/rate-limiter` dependency is removed from the component. -- **Event retention cron** (3.1) — no longer needed; apiKeyEvents table removed. Structured logs handled by Convex platform log retention. -- **Expired key sweep cron** (3.2) — keys expire lazily during validate(). Proactive sweep deferred to v1.2 if needed based on production data. -- **Time-bucketed usage aggregation** (3.3) — removed with event table. ShardedCounter provides O(1) total count. Period-based analytics deferred to v1.2 (requires external analytics pipeline, not in-component event scanning). -- **Configurable rate-limit policy per key/owner/tier** (3.4) — deferred to v0.3. Current hardcoded policy is acceptable for v0.2. -- **Finite-use rotation semantics** (3.5, H7) — deferred to v0.3. Acknowledged as HIGH severity; requires design decision on shared-quota vs block-rotation. Documenting the current behavior (quota duplication) in README as known limitation. -- **Actor/source metadata on audit** (3.6, H6) — deferred to v0.3. Structured logs include function name + args context via Convex platform; explicit actor field deferred. -- **Injectable logger** (3.7) — deferred to v0.3. Current console-based logger is sufficient for v0.2. -- **onEvent dispatch hook** (4.1) — v2.0 -- **Type-safe metadata generics** (4.2) — v2.0 -- **Key format versioning** (4.3) — v2.0 -- **HMAC-SHA256 with server-side pepper** (4.4) — v2.0 -- **Middleware/plugin composition** (4.5) — v2.0 -- **Admin cross-owner listing** (4.6) — v2.0 -- **Offline/edge validation** (4.7) — v2.0 - -## Quality Checklist - -### Blocking (must pass to ship) - -- [ ] All Must Have ACs passing (AC-1 through AC-16) -- [ ] All Error Criteria ACs passing (AC-E1 through AC-E4) -- [ ] All 18 scope items implemented -- [ ] No regressions in existing tests (tests updated for new API) -- [ ] Error states handled (not just happy path) -- [ ] No hardcoded secrets or credentials -- [ ] No unbounded .collect() in any public query or mutation -- [ ] All admin mutations require + check ownerId before state change -- [ ] apiKeyEvents table fully removed; no residual event inserts -- [ ] @convex-dev/rate-limiter fully removed from component -- [ ] README claims match implementation 1:1 with migration guide -- [ ] remaining decrement NOT affected by lastUsedAt throttle - -### Advisory (should pass, not blocking) - -- [ ] All Should Have ACs passing (AC-17 through AC-19) -- [ ] Code follows existing project patterns (mutation/query separation, jsonValue alias, TAG_PATTERN style) -- [ ] New validators follow shared.ts pattern (exported const + function) -- [ ] Test coverage for auth boundary, malformed probes, large-tenant pagination - -## Test Strategy (MANDATORY) - -### Test Environment - -| Component | Status | Detail | -|-----------|--------|--------| -| Test runner | detected | vitest with @edge-runtime/vm | -| E2E framework | not applicable | Backend component — no UI. Integration tests via convex-test | -| Test DB | in-memory | convex-test provides in-memory Convex runtime | -| Mock inventory | 0 mocks | All tests use real convex-test runtime with registered child components | - -### AC -> Test Mapping - -| AC | Test Type | Test Intention | -|----|-----------|----------------| -| AC-1 | Integration | Cross-tenant revoke/disable/enable/update/rotate all throw "unauthorized" with ownerId mismatch | -| AC-2 | Integration | create() and rotate() no longer accept hash/lookupPrefix/secretHex; server generates them | -| AC-3 | Unit | keyPrefix with special chars rejected | -| AC-4 | Unit | env with underscores rejected | -| AC-5 | Integration | gracePeriodMs < 60s and > 30d both rejected on rotate | -| AC-6 | Integration | configure() with negative/zero/out-of-range values rejected | -| AC-7 | Integration | configure() produces structured log with old + new values | -| AC-8 | Integration | Oversized metadata/scopes/tags rejected on create and update | -| AC-9 | Integration | Validate outcomes produce structured log (spy on console.log) | -| AC-10 | Manual | README review against implementation + migration guide completeness | -| AC-11 | Integration | Two validates < 60s apart: second skips lastUsedAt but still decrements remaining | -| AC-12 | Integration | list()/listByTag() with 200+ keys returns paginated results with cursor | -| AC-13 | Integration | revokeByTag with many keys uses internal mutation continuation | -| AC-14 | Integration | revokeByTag catches rotating + disabled keys | -| AC-15 | Integration | getUsage() returns counter value (O(1)) | -| AC-16 | Integration | No apiKeyEvents inserts or rateLimiter usage in mutations.ts | -| AC-E1 | Integration | Cross-tenant admin ops: no state change, error thrown | -| AC-E2 | Integration | Bad config: config table unchanged after error | -| AC-E3 | Unit | Invalid charset: no key inserted after error | -| AC-E4 | Integration | gracePeriodMs=0 and gracePeriodMs=-1 both rejected on rotate | - -### Failure Mode Tests (MANDATORY) - -| Source | ID | Test Intention | Priority | -|--------|----|----------------|----------| -| Error Journey | E1 | Integration: cross-tenant keyId on all 5 admin mutations -> all throw, zero state change | BLOCKING | -| Error Journey | E3 | Integration: negative config values -> config table unchanged | BLOCKING | -| Error Journey | E4 | Unit: keyPrefix "my_app" and env "live_test" both rejected | BLOCKING | -| Edge Case | EC5 | Integration: 2 validates < 60s apart -> lastUsedAt skipped but remaining decremented | BLOCKING | -| Edge Case | EC6 | Integration: gracePeriodMs=0 on rotate -> rejected with bounds error | BLOCKING | -| Failure Hypothesis | FH-1 (HIGH) | Integration: revokeByTag with 100+ keys -> completes without mutation timeout | BLOCKING | -| Failure Hypothesis | FH-2 (HIGH) | Integration: ownerId omitted -> operation fails, not silently succeeds | BLOCKING | - -### Mock Boundary - -| Dependency | Strategy | Justification | -|------------|----------|---------------| -| @convex-dev/sharded-counter | Real (convex-test) | Already registered via shardedCounterTest.register() | - -### TDD Commitment - -All tests written BEFORE implementation (RED -> GREEN -> REFACTOR). -Every Must Have + Error AC tracked in test file. - -## Risks - -| Risk | Impact | Likelihood | Mitigation | -|------|--------|------------|------------| -| Breaking changes on create()/rotate()/admin mutations | HIGH | HIGH | Bump to v0.2.0, migration guide in README with before/after code examples | -| Pagination type change breaks existing list() consumers | HIGH | HIGH | Return type changes from `KeyMetadata[]` to `PaginatedResult` — unavoidable type break. Document migration. | -| Scope item 2 cascades into all 69 tests | HIGH | HIGH | Implement first, update example + tests before proceeding. Budget 3-4h for this alone. | -| revokeByTag continuation pattern: initial .collect() is still unbounded | MED | MED | Use internal mutation with cursor-based iteration from the start (not collect-then-batch) | -| OCC contention on lastUsedAt throttle | MED | LOW | Throttle reduces but does NOT eliminate OCC retries under burst. Architectural fix (decoupling auth check from analytics write via query + scheduler pattern) deferred to v2.0. | -| Removing apiKeyEvents loses historical audit data on upgrade | MED | MED | Document in migration guide: existing event data should be exported before upgrading. Structured logs replace going forward. | -| Finite-use key rotation duplicates quota (H7, deferred) | HIGH | LOW | Documented as known limitation in README. Deferred to v0.3 — requires design decision. | - -**Kill criteria:** If pagination API proves too disruptive for v0.2, ship as opt-in parameter with unbounded default (and deprecation warning). If test update cascade for scope item 2 exceeds 4h, consider shipping auth/validation items (1, 3-10) first in a v0.2 without the create() API break. - -## State Machine - -### Key Status (existing — no changes to states, only to transition guards) - -``` - create() - │ - ▼ - ┌──────────┐ - ┌──────│ active │──────┐ - │ └──────────┘ │ - disable() │ rotate() - │ revoke() │ - ▼ │ ▼ - ┌──────────┐ │ ┌───────────┐ - │ disabled │ │ │ rotating │ - └──────────┘ │ └───────────┘ - enable()│ revoke()│ │grace end - │ │ │ - ▼ ▼ ▼ - ┌──────────┐ ┌──────────┐ ┌──────────┐ - │ active │ │ revoked │ │ expired │ - └──────────┘ └──────────┘ └──────────┘ - ▲ - expiresAt <= now - │ - (validate check) - - ┌───────────┐ - │ exhausted │ ← remaining reaches 0 during validate - └───────────┘ - -Terminal: revoked, expired, exhausted (no further transitions) -``` - -**Changes in this spec:** -- All transitions from non-terminal states require ownerId match (AC-1) -- `revokeByTag` now transitions `rotating` and `disabled` -> `revoked` (AC-16) -- `gracePeriodMs` bounded: min 60s, max 30 days on `rotate()` (AC-7) -- No apiKeyEvents written on transitions — structured logs replace event inserts - -**Complexity:** MEDIUM (5 states, 8 transitions, 2 guards) — existing useState-style is acceptable since state is DB-driven. - -## Analysis - -*Updated after 4-perspective spec review (Security, Convex Platform, DX, Skeptic) on 2026-03-25.* - -### Assumptions Challenged - -| Assumption | Evidence For | Evidence Against | Verdict | -|------------|-------------|-----------------|---------| -| Scope item 2 cascade is manageable | rotate() already does hash server-side (line 373) | create() args change breaks every test that calls create(). Unknown number of 69 tests affected. Also: rotate() client-side secret gen was missed in original spec. | VALID but HIGH-EFFORT — implement first, budget 3-4h for cascade | -| Rate limiting belongs in the component | Safe-by-default for integrators | Component has zero caller context (no IP, no auth, no plan tier). Hardcoded 1000/min could block legitimate high-volume users. Integrator has real context at their HTTP layer. | WRONG — removed. Rate limiting is integrator's responsibility. | -| lastUsedAt throttle is safe for quota accounting | Reduces OCC contention significantly | Code patches remaining + lastUsedAt in same ctx.db.patch (mutations.ts:221-228). If throttle skips the whole patch, remaining is not decremented = quota bypass. | RESOLVED — AC-11 now explicitly decouples the two write paths | -| Removing apiKeyEvents simplifies without losing value | Events are write-only (no listEvents query), getUsage scans are O(N), retention cron is unproven infrastructure | Loses audit trail in DB — incident investigation requires log search instead of DB query | VALID — structured logs (Convex dashboard) are the standard observability pattern for Convex components. DB-stored events were over-engineering for v1. | -| ownerId as required arg is the right design | Prevents silent bypass (optional would be inert). Component can't derive identity from ctx.auth without coupling to auth provider. | Breaking change on every admin mutation. Ergonomic regression: caller must track ownerId. | VALID — required arg is the only safe choice. Document migration. | - -### Blind Spots - -1. **[Migration]** Multiple breaking changes (create args, rotate args, admin ownerId, list return type, getUsage API, event table removal) ship simultaneously. No CHANGELOG exists. Migration guide content not yet written. - Why it matters: v0.1 -> v0.2 upgrade path must be crystal clear or adopters churn. - -2. **[Concurrency]** revokeByTag scheduler continuation creates eventually-consistent bulk revoke. During execution, some keys may still be active. - Why it matters: If consumer expects synchronous "all keys revoked" guarantee, the async pattern breaks their assumption. Document in README. - -3. **[Reactive churn]** lastUsedAt write (even throttled to 60s) invalidates all list() reactive query subscribers watching that owner. Not fixed by throttle — only reduced. - Why it matters: Dashboard subscribers get unnecessary re-renders. Architectural fix (deferred write via ctx.scheduler) is v2.0 scope. - -4. **[Data loss on upgrade]** Removing apiKeyEvents table deletes existing audit history. No export tooling provided. - Why it matters: Integrators who relied on event data for debugging/compliance lose it on upgrade. - -### Failure Hypotheses - -| IF | THEN | BECAUSE | Severity | Mitigation | -|----|------|---------|----------|------------| -| revokeByTag initial query is still unbounded .collect() | Mutation timeout on large tag sets | Even with cursor continuation, the first batch needs an initial query that could be large | HIGH | Use cursor-based iteration from the start: query with .take(batchSize), process batch, schedule continuation with cursor | -| ownerId added as required but old tests don't pass it | All 69 tests fail immediately after scope item 2 | Tests were written against v0.1 API without ownerId on admin ops | HIGH | Budget this in scope item 2 cascade. Update all test helpers first. | -| gracePeriodMs=0 passes validation | Old key instantly expires during rotation; caller loses access | Current code has no min bound check | MED | RESOLVED — AC-5 now specifies min 60s bound | - -### The Real Question - -Confirmed — the spec now solves the right problem with the right scope. Removing apiKeyEvents eliminates the two largest unknowns (aggregate wiring, retention cron) and simplifies the component from "infrastructure that stores analytics" to "infrastructure that authenticates and delegates observability to the platform." The ShardedCounter + structured logs combination is the idiomatic Convex pattern. - -The remaining risk is the breaking-change cascade (scope item 2 + ownerId + pagination). This is manageable with the ordering constraint (item 2 first) and a realistic 14h estimate. Removing rate limiting and events cut ~4h of complexity and eliminated the two largest integration unknowns. - -### Open Items - -- [gap] No CHANGELOG.md exists — create one as part of scope item 12 -> update spec (included in scope 12) -- [risk] revokeByTag initial query needs cursor-from-start design, not collect-then-batch -> no action (addressed in AC-15 wording) -- [question] Should we ship a v0.2 with non-breaking items first (items 3-10) and then v0.2 with breaking items (1-2, 15-16)? -> question for user -- [improvement] Consider splitting into 2 PRs: (a) non-breaking hardening v0.2, (b) breaking API changes v0.2 -> question for user - -## Notes - -## Progress - -| # | Scope Item | Status | Iteration | -|---|-----------|--------|-----------| -| 1 | ownerId required on admin mutations | pending | - | -| 2 | Server-side secret gen (create + rotate) | pending | - | -| 3 | Decouple remaining/lastUsedAt writes + throttle | pending | - | -| 4 | keyPrefix validator | pending | - | -| 5 | env validator | pending | - | -| 6 | gracePeriodMs min/max bounds | pending | - | -| 7 | configure() bounds | pending | - | -| 8 | configure() structured logging | pending | - | -| 9 | Size caps | pending | - | -| 10 | Replace all event inserts with structured logging | pending | - | -| 11 | README + migration guide | pending | - | -| 12 | Remove apiKeyEvents table + EVENT_TYPE | pending | - | -| 13 | Simplify getUsage to counter-only | pending | - | -| 14 | Cursor pagination (list, listByTag) | pending | - | -| 15 | Tag index optimization | pending | - | -| 16 | revokeByTag cursor continuation | pending | - | -| 17 | revokeByTag status expansion | pending | - | -| 18 | Remove rateLimiter + aggregate + crons | pending | - | - -## Timeline - -| Action | Timestamp | Duration | Notes | -|--------|-----------|----------|-------| -| plan | 2026-03-25T00:00:00Z | - | Created from ROADMAP.md + DEEP-ANALYSIS.md | -| spec-review | 2026-03-25T00:00:00Z | - | 4-perspective review (Security, Convex, DX, Skeptic). 12 action items applied. Removed apiKeyEvents table per user decision. Cut crons/aggregates. | -| revision | 2026-03-25T00:00:00Z | - | Removed @convex-dev/rate-limiter — rate limiting is integrator's responsibility at HTTP layer. 19 -> 18 scope items. Estimate 18h -> 14h. |