diff --git a/.github/workflows/vercel-deploy-hook-backup.yml b/.github/workflows/vercel-deploy-hook-backup.yml index 00e1694d6..7172691bf 100644 --- a/.github/workflows/vercel-deploy-hook-backup.yml +++ b/.github/workflows/vercel-deploy-hook-backup.yml @@ -40,7 +40,7 @@ jobs: ping-vercel-deploy-hook: name: Ping Vercel Deploy Hook runs-on: ubuntu-latest - timeout-minutes: 5 + timeout-minutes: 15 steps: - name: Sanity-check secret presence run: | @@ -50,6 +50,22 @@ jobs: fi echo "secret-present=true" + # Why this wait: GitHub's main branch HEAD takes a few seconds to + # propagate to Vercel's git-mirror. If we fire the deploy hook + # immediately after a merge, Vercel can clone an OLDER commit and + # ship a build that doesn't contain the just-merged code — exactly + # what bit us on commit 5a4d690 (PR #204): a deploy completed at + # 08:13:43 but built without the merged source, causing 15 minutes + # of stale serving until a manual re-trigger. Native GitHub→Vercel + # webhook integration usually handles this race because it passes + # the commit SHA explicitly; deploy hook URLs do not, so we wait. + - name: Wait for git mirror propagation (race fix) + if: github.event_name == 'push' + run: | + echo "Waiting 60s before firing deploy hook so Vercel's git mirror catches up to GitHub HEAD..." + sleep 60 + echo "Done waiting." + - name: Trigger Vercel deploy hook env: DEPLOY_HOOK_URL: ${{ secrets.VERCEL_DEPLOY_HOOK_URL }} @@ -67,11 +83,36 @@ jobs: exit 1 fi + # Tier-A live verification per .claude/rules/live_dom_verification.md: + # poll the production URL until the entry-bundle hash changes from + # whatever was previously served. Times out at ~7 minutes + # (35 polls × 12s). Non-blocking — if Vercel takes longer, we still + # fail loudly so engineers know to check. + - name: Verify new bundle is live (Tier A) + if: github.event_name == 'push' + env: + PROD_URL: https://www.nodebenchai.com + run: | + echo "Capturing baseline bundle hash before deploy completes..." + baseline=$(curl -fsS "$PROD_URL/?nc=$RANDOM-$(date +%s)" 2>/dev/null | grep -oE 'assets/index-[A-Za-z0-9_-]+\.js' | head -1 || echo "none") + echo "Baseline: $baseline" + for i in $(seq 1 35); do + sleep 12 + current=$(curl -fsS "$PROD_URL/?nc=$RANDOM-$(date +%s)" 2>/dev/null | grep -oE 'assets/index-[A-Za-z0-9_-]+\.js' | head -1 || echo "none") + if [ "$current" != "$baseline" ] && [ "$current" != "none" ]; then + echo "::notice::Live bundle rotated to $current after ~$((i*12))s" + echo "live-verified=true" >> "$GITHUB_OUTPUT" 2>/dev/null || true + exit 0 + fi + echo "Poll $i/35: still $current" + done + echo "::warning::Live bundle did not rotate within 7 minutes. Vercel deploy may still be in progress, or edge cache is stuck. Check https://vercel.com/hshum2018-gmailcoms-projects/nodebench-ai" + - name: Summary if: success() run: | echo "## Vercel deploy hook fired" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - echo "Triggered for commit \`${{ github.sha }}\` on \`main\`." >> $GITHUB_STEP_SUMMARY + echo "Triggered for commit \`${{ github.sha }}\` on \`main\` after 60s mirror-propagation wait." >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "Vercel deduplicates by commit SHA, so this is a no-op if the native push integration also fired. If the native integration is broken, this is the only thing that ships your code." >> $GITHUB_STEP_SUMMARY diff --git a/convex/_generated/api.d.ts b/convex/_generated/api.d.ts index c5f0aa67e..8547f6af2 100644 --- a/convex/_generated/api.d.ts +++ b/convex/_generated/api.d.ts @@ -547,8 +547,13 @@ import type * as domains_evaluation_validators from "../domains/evaluation/valid import type * as domains_evaluation_workbenchQueries from "../domains/evaluation/workbenchQueries.js"; import type * as domains_financialOperator_attFixture from "../domains/financialOperator/attFixture.js"; import type * as domains_financialOperator_extractors from "../domains/financialOperator/extractors.js"; +import type * as domains_financialOperator_fixtures_covenantFixture from "../domains/financialOperator/fixtures/covenantFixture.js"; +import type * as domains_financialOperator_fixtures_crmFixture from "../domains/financialOperator/fixtures/crmFixture.js"; +import type * as domains_financialOperator_fixtures_varianceFixture from "../domains/financialOperator/fixtures/varianceFixture.js"; import type * as domains_financialOperator_index from "../domains/financialOperator/index.js"; import type * as domains_financialOperator_orchestrator from "../domains/financialOperator/orchestrator.js"; +import type * as domains_financialOperator_orchestratorExamples from "../domains/financialOperator/orchestratorExamples.js"; +import type * as domains_financialOperator_realExtractors from "../domains/financialOperator/realExtractors.js"; import type * as domains_financialOperator_runOps from "../domains/financialOperator/runOps.js"; import type * as domains_financialOperator_sandbox from "../domains/financialOperator/sandbox.js"; import type * as domains_financialOperator_types from "../domains/financialOperator/types.js"; @@ -1994,8 +1999,13 @@ declare const fullApi: ApiFromModules<{ "domains/evaluation/workbenchQueries": typeof domains_evaluation_workbenchQueries; "domains/financialOperator/attFixture": typeof domains_financialOperator_attFixture; "domains/financialOperator/extractors": typeof domains_financialOperator_extractors; + "domains/financialOperator/fixtures/covenantFixture": typeof domains_financialOperator_fixtures_covenantFixture; + "domains/financialOperator/fixtures/crmFixture": typeof domains_financialOperator_fixtures_crmFixture; + "domains/financialOperator/fixtures/varianceFixture": typeof domains_financialOperator_fixtures_varianceFixture; "domains/financialOperator/index": typeof domains_financialOperator_index; "domains/financialOperator/orchestrator": typeof domains_financialOperator_orchestrator; + "domains/financialOperator/orchestratorExamples": typeof domains_financialOperator_orchestratorExamples; + "domains/financialOperator/realExtractors": typeof domains_financialOperator_realExtractors; "domains/financialOperator/runOps": typeof domains_financialOperator_runOps; "domains/financialOperator/sandbox": typeof domains_financialOperator_sandbox; "domains/financialOperator/types": typeof domains_financialOperator_types; diff --git a/convex/domains/financialOperator/fixtures/covenantFixture.ts b/convex/domains/financialOperator/fixtures/covenantFixture.ts new file mode 100644 index 000000000..eec33ecc4 --- /dev/null +++ b/convex/domains/financialOperator/fixtures/covenantFixture.ts @@ -0,0 +1,74 @@ +/** + * Credit-agreement covenant compliance fixture — Example C. + * + * Demo numbers chosen to land inside the leverage covenant (3.55x vs + * 4.25x cap) so the deterministic sandbox produces a clean "compliant" + * verdict. Real implementations should hit a covenant-extractor backed + * by an LLM section reader. + */ + +export const COVENANT_FIXTURE = { + meta: { + borrower: "Demo Borrower Inc.", + creditAgreementFile: "credit_agreement.pdf", + financialsFile: "q4_financials.xlsx", + }, + + sections: [ + { + page: 42, + label: "Financial Covenants", + match: "Maximum Total Net Leverage Ratio", + }, + { + page: 87, + label: "Definitions — Consolidated EBITDA", + match: "Consolidated EBITDA", + }, + { + page: 91, + label: "Definitions — Total Net Debt", + match: "Total Net Debt", + }, + ], + + covenant: { + name: "Maximum Total Net Leverage Ratio", + threshold: 4.25, + ratioType: "Total Net Debt / Consolidated EBITDA", + numeratorDefinition: "Total Net Debt = total debt - unrestricted cash", + denominatorDefinition: "Consolidated EBITDA (with permitted add-backs)", + }, + + // Inputs in dollars (full units, not millions, so the formatter shows $.XM) + inputs: { + totalDebt: { + value: 840_000_000, + sourceRef: "Q4 Financials — Debt Schedule!B14", + confidence: 0.96, + }, + cash: { + value: 95_000_000, + sourceRef: "Q4 Financials — Balance Sheet!B21", + confidence: 0.98, + }, + adjustedEBITDA: { + value: 210_000_000, + sourceRef: "Q4 Financials — Adjusted EBITDA!B37", + confidence: 0.88, + }, + }, + + excerpts: [ + { + sourceRef: "Credit Agreement p.42", + excerpt: + "Borrower shall not permit the Total Net Leverage Ratio at the end of any fiscal quarter to exceed 4.25 to 1.00.", + }, + { + sourceRef: "Credit Agreement p.87", + excerpt: + "\"Consolidated EBITDA\" means, for any period, the sum of net income plus interest expense, taxes, depreciation, amortization, and certain permitted non-recurring add-backs.", + }, + ], +} as const; diff --git a/convex/domains/financialOperator/fixtures/crmFixture.ts b/convex/domains/financialOperator/fixtures/crmFixture.ts new file mode 100644 index 000000000..c9aa5842f --- /dev/null +++ b/convex/domains/financialOperator/fixtures/crmFixture.ts @@ -0,0 +1,65 @@ +/** + * CRM cleanup fixture — Example B in the operator-console spec. + * + * Stand-in for a real spreadsheet+PDF dedup pipeline. Stable, citable, + * deterministic. Replace `extractCrmInputs` with a real reader once the + * spreadsheet/PDF parsing surface is wired. + */ + +export const CRM_FIXTURE = { + meta: { + files: [ + { name: "prospects.xlsx", kind: "xlsx" }, + { name: "investor_packet_1.pdf", kind: "pdf" }, + { name: "investor_packet_2.pdf", kind: "pdf" }, + { name: "investor_packet_3.pdf", kind: "pdf" }, + ], + spreadsheetSheets: ["Raw Leads", "Notes", "Duplicates"], + pdfPages: 86, + }, + + profile: { + columns: ["Company", "Website", "Notes", "Partner", "Status"], + rowCount: 387, + missingFields: ["Sector", "HQ", "Last Round", "Source"], + }, + + entityExtraction: { + companiesFound: 142, + fundingEventsFound: 37, + locationsFound: 91, + }, + + dedup: { + originalRows: 387, + dedupedRows: 312, + mergedExamples: [ + { + canonical: "Acme Bio", + merged: ["AcmeBio", "Acme Bio Inc.", "acmebio.com"], + }, + { + canonical: "Northstar Robotics", + merged: ["Northstar Robotics LLC", "northstar-robotics.io"], + }, + ], + }, + + enrichment: { + recordsUpdated: 241, + lowConfidenceRecords: 31, + unresolvedRecords: 40, + sampleEnriched: [ + { company: "Acme Bio", sector: "Biotech", hq: "Boston, MA", lastRound: "Series B" }, + { company: "Northstar Robotics", sector: "Industrial AI", hq: "Pittsburgh, PA", lastRound: "Series A" }, + { company: "HelioGrid", sector: "Energy", hq: "Austin, TX", lastRound: "Seed" }, + ], + }, + + csvValidation: { + schema: ["Company", "Website", "Sector", "HQ", "Last Round", "Source", "Confidence", "Owner"], + validRows: 295, + warningRows: 17, + failedRows: 0, + }, +} as const; diff --git a/convex/domains/financialOperator/fixtures/varianceFixture.ts b/convex/domains/financialOperator/fixtures/varianceFixture.ts new file mode 100644 index 000000000..e00fb07ab --- /dev/null +++ b/convex/domains/financialOperator/fixtures/varianceFixture.ts @@ -0,0 +1,72 @@ +/** + * Monthly variance-analysis fixture — Example D. + * + * Includes a diverse top/bottom mix so the variance sandbox renders + * both signed-positive and signed-negative formatting paths. + */ + +export interface VarianceLine { + account: string; + category: "revenue" | "cost_of_revenue" | "opex" | "infrastructure" | "other"; + actual: number; + budget: number; + driverNote?: string; +} + +export const VARIANCE_FIXTURE = { + period: "March 2026", + files: [ + { name: "march_actuals.xlsx", kind: "xlsx" }, + { name: "fy_budget.xlsx", kind: "xlsx" }, + ], + alignment: { + matchedAccounts: 124, + unmatchedActuals: 3, + unmatchedBudget: 5, + }, + // Lines in dollars; formatter divides by 1M for display. + lines: [ + { + account: "Subscription Revenue", + category: "revenue", + actual: 4_200_000, + budget: 3_700_000, + driverNote: "Enterprise renewals closed earlier than budgeted.", + }, + { + account: "Services Revenue", + category: "revenue", + actual: 850_000, + budget: 900_000, + driverNote: "One implementation slipped to April.", + }, + { + account: "Cloud Infrastructure", + category: "infrastructure", + actual: 980_000, + budget: 720_000, + driverNote: "GPU inference costs rose with higher agent run volume.", + }, + { + account: "Sales Headcount", + category: "opex", + actual: 1_100_000, + budget: 1_150_000, + driverNote: "Two requisitions delayed by one month.", + }, + { + account: "Engineering Headcount", + category: "opex", + actual: 1_400_000, + budget: 1_350_000, + driverNote: "On plan; small-bonus accrual.", + }, + { + account: "Marketing", + category: "opex", + actual: 320_000, + budget: 400_000, + driverNote: "Demand-gen campaign delayed.", + }, + ] satisfies VarianceLine[], +} as const; diff --git a/convex/domains/financialOperator/index.ts b/convex/domains/financialOperator/index.ts index 3a6ee2153..926f202ed 100644 --- a/convex/domains/financialOperator/index.ts +++ b/convex/domains/financialOperator/index.ts @@ -10,3 +10,5 @@ export * as runOps from "./runOps"; export * as orchestrator from "./orchestrator"; +export * as orchestratorExamples from "./orchestratorExamples"; +export * as realExtractors from "./realExtractors"; diff --git a/convex/domains/financialOperator/orchestratorExamples.ts b/convex/domains/financialOperator/orchestratorExamples.ts new file mode 100644 index 000000000..cca4c84d2 --- /dev/null +++ b/convex/domains/financialOperator/orchestratorExamples.ts @@ -0,0 +1,871 @@ +/** + * Orchestrator actions for Examples B, C, D from the operator-console spec: + * + * B — CRM cleanup (financial_data_cleanup) + * C — Covenant compliance (covenant_compliance) + * D — Variance analysis (variance_analysis) + * + * All three use the same shared backbone (runOps + sandbox + validators) + * established in orchestrator.ts and types.ts. Math runs in JS sandbox, + * sources ride along on every extracted field, validations surface + * findings verbatim — same invariants as the AT&T example. + */ + +import { v } from "convex/values"; +import { action } from "../../_generated/server"; +import { api } from "../../_generated/api"; +import type { Id } from "../../_generated/dataModel"; + +import { + computeLeverageRatio, + checkCompliance, + computeVariance, +} from "./sandbox"; +import { validateExtraction, type FieldSpec } from "./validators"; +import { CRM_FIXTURE } from "./fixtures/crmFixture"; +import { COVENANT_FIXTURE } from "./fixtures/covenantFixture"; +import { VARIANCE_FIXTURE } from "./fixtures/varianceFixture"; +import type { + ApprovalRequestPayload, + ArtifactPayload, + CalculationPayload, + EvidencePayload, + ExtractedField, + ExtractionPayload, + ResultPayload, + RunBriefPayload, + ToolCallPayload, + ValidationPayload, +} from "./types"; + +const STEP_PACING_MS = 350; +const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); + +function classifyStatus( + confidence: number, +): ExtractedField["status"] { + if (confidence >= 0.9) return "verified"; + if (confidence >= 0.5) return "needs_review"; + return "unresolved"; +} + +/* ================================================================== */ +/* EXAMPLE B — CRM CLEANUP */ +/* ================================================================== */ + +export const runCrmCleanupDemo = action({ + args: { + userId: v.optional(v.id("users")), + threadId: v.optional(v.string()), + }, + handler: async (ctx, args): Promise<{ runId: Id<"financialOperatorRuns"> }> => { + const goal = + "Clean and dedupe a 387-row prospect list, enrich with sector/HQ/last round, and export CRM-ready CSV."; + + const runId: Id<"financialOperatorRuns"> = await ctx.runMutation( + api.domains.financialOperator.runOps.createRun, + { + userId: args.userId, + threadId: args.threadId, + taskType: "financial_data_cleanup", + goal, + files: CRM_FIXTURE.meta.files.map((f) => ({ name: f.name, kind: f.kind })), + totalSteps: 9, + }, + ); + + // 1. Plan + await ctx.runMutation(api.domains.financialOperator.runOps.updateRunStatus, { + runId, + status: "planning", + }); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "run_brief", + status: "complete", + title: "Plan", + payload: { + goal, + numberedSteps: [ + "Inspect files (1 spreadsheet + 3 PDFs)", + "Profile spreadsheet schema", + "Extract company mentions from PDFs", + "Resolve duplicates", + "Enrich missing fields", + "Validate CRM CSV schema", + "Export CSV", + ], + estimatedDurationMs: 5000, + outputFormat: "CRM-ready CSV + low-confidence review queue", + } satisfies RunBriefPayload, + }); + + await ctx.runMutation(api.domains.financialOperator.runOps.updateRunStatus, { + runId, + status: "running", + }); + + // 2. Inspect files + await sleep(STEP_PACING_MS); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "tool_call", + status: "complete", + title: "Inspect uploaded files", + payload: { + toolName: "files.inspect", + inputSummary: "1 spreadsheet (3 sheets) + 3 investor PDFs (86 pages)", + outputSummary: `Sheets: ${CRM_FIXTURE.meta.spreadsheetSheets.join(", ")} · ${CRM_FIXTURE.entityExtraction.companiesFound} company mentions detected`, + } satisfies ToolCallPayload, + }); + + // 3. Profile spreadsheet + await sleep(STEP_PACING_MS); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "tool_call", + status: "complete", + title: "Profile spreadsheet schema", + payload: { + toolName: "spreadsheet.profile", + inputSummary: "prospects.xlsx → Raw Leads sheet", + outputSummary: `${CRM_FIXTURE.profile.rowCount} rows; missing fields: ${CRM_FIXTURE.profile.missingFields.join(", ")}`, + } satisfies ToolCallPayload, + }); + + // 4. Extract entities from PDFs + await sleep(STEP_PACING_MS); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "tool_call", + status: "complete", + title: "Extract company mentions from PDFs", + payload: { + toolName: "document.extract_entities", + inputSummary: "3 investor packets (86 pages)", + outputSummary: `${CRM_FIXTURE.entityExtraction.companiesFound} companies, ${CRM_FIXTURE.entityExtraction.fundingEventsFound} funding events, ${CRM_FIXTURE.entityExtraction.locationsFound} locations`, + } satisfies ToolCallPayload, + }); + + // 5. Dedup → extraction card showing merge groups + await sleep(STEP_PACING_MS); + const dedupFields: ExtractedField[] = CRM_FIXTURE.dedup.mergedExamples.map( + (m) => ({ + fieldName: m.canonical, + value: `${m.merged.length} variants merged`, + unit: "rows", + sourceRef: `dedup pass — ${m.merged.join(" / ")}`, + confidence: 0.93, + status: classifyStatus(0.93), + }), + ); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "extraction", + status: "complete", + title: "Dedup merge groups", + payload: { + schemaName: "company_dedup_groups", + fields: dedupFields, + totalFound: CRM_FIXTURE.dedup.mergedExamples.length, + needsReviewCount: 0, + } satisfies ExtractionPayload, + }); + + // 6. Calculation: dedup ratio + await sleep(STEP_PACING_MS); + const dedupRatio = + (CRM_FIXTURE.dedup.originalRows - CRM_FIXTURE.dedup.dedupedRows) / + CRM_FIXTURE.dedup.originalRows; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "calculation", + status: "complete", + title: "Dedup ratio", + payload: { + formulaLabel: "Duplicate-row reduction", + formulaText: + "dedup_ratio = (original_rows - deduped_rows) / original_rows", + inputs: { + originalRows: CRM_FIXTURE.dedup.originalRows, + dedupedRows: CRM_FIXTURE.dedup.dedupedRows, + }, + outputs: { dedupRatio, mergedRows: CRM_FIXTURE.dedup.originalRows - CRM_FIXTURE.dedup.dedupedRows }, + formattedOutputs: { + dedupRatio: `${(dedupRatio * 100).toFixed(1)}%`, + mergedRows: `${CRM_FIXTURE.dedup.originalRows - CRM_FIXTURE.dedup.dedupedRows} rows`, + }, + sandboxKind: "js_pure", + computedAt: Date.now(), + } satisfies CalculationPayload, + }); + + // 7. Enrichment → extraction card + await sleep(STEP_PACING_MS); + const enrichmentFields: ExtractedField[] = + CRM_FIXTURE.enrichment.sampleEnriched.map((e) => ({ + fieldName: e.company, + value: `${e.sector} · ${e.hq} · ${e.lastRound}`, + unit: "company_profile", + sourceRef: "company.enrich_profile", + confidence: 0.91, + status: classifyStatus(0.91), + })); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "extraction", + status: "needs_review", + title: "Enriched company profiles", + payload: { + schemaName: "crm_enrichment", + fields: enrichmentFields, + totalFound: CRM_FIXTURE.enrichment.recordsUpdated, + needsReviewCount: + CRM_FIXTURE.enrichment.lowConfidenceRecords + + CRM_FIXTURE.enrichment.unresolvedRecords, + } satisfies ExtractionPayload, + }); + + // 8. Validation + await sleep(STEP_PACING_MS); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "validation", + status: "complete", + title: "CRM CSV schema", + payload: { + schemaPassed: CRM_FIXTURE.csvValidation.failedRows === 0, + unitsNormalized: true, + findings: [ + { + level: "info", + message: `${CRM_FIXTURE.csvValidation.validRows} rows validated cleanly`, + }, + { + level: "warning", + message: `${CRM_FIXTURE.csvValidation.warningRows} rows exportable with caveats (low-confidence enrichment)`, + }, + ], + checksRun: + CRM_FIXTURE.csvValidation.validRows + + CRM_FIXTURE.csvValidation.warningRows + + CRM_FIXTURE.csvValidation.failedRows, + checksPassed: + CRM_FIXTURE.csvValidation.validRows + + CRM_FIXTURE.csvValidation.warningRows, + } satisfies ValidationPayload, + }); + + // 9. Artifact (CSV) + await sleep(STEP_PACING_MS); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "artifact", + status: "complete", + title: "CRM-ready CSV", + payload: { + kind: "csv", + label: "crm_ready_company_list.csv", + description: `${CRM_FIXTURE.dedup.dedupedRows} unique rows after dedup; ${CRM_FIXTURE.enrichment.recordsUpdated} enriched.`, + diffSummary: [ + `Reduced from ${CRM_FIXTURE.dedup.originalRows} → ${CRM_FIXTURE.dedup.dedupedRows} rows`, + `Enriched: ${CRM_FIXTURE.enrichment.recordsUpdated}`, + `Needs review: ${CRM_FIXTURE.enrichment.lowConfidenceRecords + CRM_FIXTURE.enrichment.unresolvedRecords}`, + ], + } satisfies ArtifactPayload, + }); + + // 10. Result with low-confidence escape hatch + await sleep(STEP_PACING_MS); + const result: ResultPayload = { + headline: `${CRM_FIXTURE.dedup.dedupedRows} CRM-ready rows; ${CRM_FIXTURE.enrichment.lowConfidenceRecords + CRM_FIXTURE.enrichment.unresolvedRecords} flagged for review.`, + prose: + "Dedup merged duplicate company variants by domain + name similarity. Enrichment added sector, HQ, and last funding round per record. The low-confidence subset is exportable but should be sampled before being trusted in outreach.", + metrics: { + "Final rows": String(CRM_FIXTURE.dedup.dedupedRows), + "Dedup reduction": `${(dedupRatio * 100).toFixed(1)}%`, + "Needs review": String( + CRM_FIXTURE.enrichment.lowConfidenceRecords + + CRM_FIXTURE.enrichment.unresolvedRecords, + ), + }, + openIssues: [ + `${CRM_FIXTURE.enrichment.unresolvedRecords} companies could not be enriched and need manual investigation.`, + ], + nextActions: [ + { id: "download_csv", label: "Download CSV", kind: "export" }, + { id: "review_low_conf", label: "Review low-confidence rows", kind: "open" }, + { id: "save_to_crm", label: "Save to CRM", kind: "approve" }, + ], + }; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "result", + status: "complete", + title: "Result", + payload: result, + }); + await ctx.runMutation(api.domains.financialOperator.runOps.updateRunStatus, { + runId, + status: "completed", + finalSummary: result.headline, + }); + + return { runId }; + }, +}); + +/* ================================================================== */ +/* EXAMPLE C — COVENANT COMPLIANCE */ +/* ================================================================== */ + +const COVENANT_INPUT_SPEC: FieldSpec[] = [ + { fieldName: "Total Debt", expectedUnit: "USD", required: true, sanityRange: { min: 0, max: 1e12 } }, + { fieldName: "Cash", expectedUnit: "USD", required: true, sanityRange: { min: 0, max: 1e12 } }, + { fieldName: "Adjusted EBITDA", expectedUnit: "USD", required: true, sanityRange: { min: 1, max: 1e12 } }, +]; + +export const runCovenantComplianceDemo = action({ + args: { + userId: v.optional(v.id("users")), + threadId: v.optional(v.string()), + }, + handler: async (ctx, args): Promise<{ runId: Id<"financialOperatorRuns"> }> => { + const goal = `Check ${COVENANT_FIXTURE.meta.borrower} against the leverage covenant in the credit agreement.`; + + const runId: Id<"financialOperatorRuns"> = await ctx.runMutation( + api.domains.financialOperator.runOps.createRun, + { + userId: args.userId, + threadId: args.threadId, + taskType: "covenant_compliance", + goal, + files: [ + { name: COVENANT_FIXTURE.meta.creditAgreementFile, kind: "pdf" }, + { name: COVENANT_FIXTURE.meta.financialsFile, kind: "xlsx" }, + ], + totalSteps: 9, + }, + ); + + // 1. Plan + await ctx.runMutation(api.domains.financialOperator.runOps.updateRunStatus, { + runId, + status: "planning", + }); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "run_brief", + status: "complete", + title: "Plan", + payload: { + goal, + numberedSteps: [ + "Locate covenant + definition sections in credit agreement", + "Extract covenant threshold and ratio definition", + "Extract financial inputs from Q4 financials", + "Validate inputs against covenant definitions", + "Compute leverage ratio in sandbox", + "Check compliance against threshold", + "Produce reviewer-ready compliance memo", + ], + estimatedDurationMs: 5500, + outputFormat: "Compliance memo with verdict + reviewer notes", + } satisfies RunBriefPayload, + }); + + await ctx.runMutation(api.domains.financialOperator.runOps.updateRunStatus, { + runId, + status: "running", + }); + + // 2. Locate sections + await sleep(STEP_PACING_MS); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "tool_call", + status: "complete", + title: "Locate covenant sections", + payload: { + toolName: "document.locate_sections", + inputSummary: `${COVENANT_FIXTURE.meta.creditAgreementFile} → leverage covenant + EBITDA + debt definitions`, + outputSummary: COVENANT_FIXTURE.sections + .map((s) => `${s.label} (p.${s.page})`) + .join("; "), + } satisfies ToolCallPayload, + }); + + // 3. Extract covenant terms + await sleep(STEP_PACING_MS); + const covenantTermsFields: ExtractedField[] = [ + { + fieldName: "Covenant name", + value: COVENANT_FIXTURE.covenant.name, + unit: "string", + sourceRef: "Credit Agreement p.42", + confidence: 0.97, + status: "verified", + }, + { + fieldName: "Threshold", + value: COVENANT_FIXTURE.covenant.threshold, + unit: "ratio_x", + sourceRef: "Credit Agreement p.42", + confidence: 0.97, + status: "verified", + }, + { + fieldName: "Ratio definition", + value: COVENANT_FIXTURE.covenant.ratioType, + unit: "string", + sourceRef: "Credit Agreement p.87 + p.91", + confidence: 0.93, + status: "verified", + }, + ]; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "extraction", + status: "complete", + title: "Covenant terms", + payload: { + schemaName: "covenant_terms", + fields: covenantTermsFields, + totalFound: covenantTermsFields.length, + needsReviewCount: 0, + } satisfies ExtractionPayload, + }); + + // 4. Extract financial inputs + await sleep(STEP_PACING_MS); + const inputFields: ExtractedField[] = [ + { + fieldName: "Total Debt", + value: COVENANT_FIXTURE.inputs.totalDebt.value, + unit: "USD", + sourceRef: COVENANT_FIXTURE.inputs.totalDebt.sourceRef, + confidence: COVENANT_FIXTURE.inputs.totalDebt.confidence, + status: classifyStatus(COVENANT_FIXTURE.inputs.totalDebt.confidence), + }, + { + fieldName: "Cash", + value: COVENANT_FIXTURE.inputs.cash.value, + unit: "USD", + sourceRef: COVENANT_FIXTURE.inputs.cash.sourceRef, + confidence: COVENANT_FIXTURE.inputs.cash.confidence, + status: classifyStatus(COVENANT_FIXTURE.inputs.cash.confidence), + }, + { + fieldName: "Adjusted EBITDA", + value: COVENANT_FIXTURE.inputs.adjustedEBITDA.value, + unit: "USD", + sourceRef: COVENANT_FIXTURE.inputs.adjustedEBITDA.sourceRef, + confidence: COVENANT_FIXTURE.inputs.adjustedEBITDA.confidence, + status: classifyStatus(COVENANT_FIXTURE.inputs.adjustedEBITDA.confidence), + reviewNote: + "EBITDA add-backs should be reviewed against the credit agreement's permitted-add-back schedule.", + }, + ]; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "extraction", + status: "needs_review", + title: "Financial inputs", + payload: { + schemaName: "covenant_inputs", + fields: inputFields, + totalFound: inputFields.length, + needsReviewCount: inputFields.filter((f) => f.status === "needs_review").length, + } satisfies ExtractionPayload, + }); + + // 5. Validation + await sleep(STEP_PACING_MS); + const validation = validateExtraction({ + fields: inputFields, + spec: COVENANT_INPUT_SPEC, + }); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "validation", + status: validation.schemaPassed ? "complete" : "error", + title: "Validate covenant inputs", + payload: validation satisfies ValidationPayload, + }); + + // 6. Compute leverage + compliance check + await sleep(STEP_PACING_MS); + const lev = computeLeverageRatio({ + totalDebt: COVENANT_FIXTURE.inputs.totalDebt.value, + cash: COVENANT_FIXTURE.inputs.cash.value, + ebitda: COVENANT_FIXTURE.inputs.adjustedEBITDA.value, + }); + const compliance = checkCompliance({ + observedRatio: lev.outputs.ratio, + threshold: COVENANT_FIXTURE.covenant.threshold, + ratioName: "net_leverage", + }); + const calcPayload: CalculationPayload = { + formulaLabel: "Net leverage ratio + compliance gate", + formulaText: [lev.formulaText, compliance.formulaText].join("\n"), + inputs: { + totalDebt: COVENANT_FIXTURE.inputs.totalDebt.value, + cash: COVENANT_FIXTURE.inputs.cash.value, + ebitda: COVENANT_FIXTURE.inputs.adjustedEBITDA.value, + threshold: COVENANT_FIXTURE.covenant.threshold, + }, + outputs: { + netDebt: lev.outputs.netDebt, + ratio: lev.outputs.ratio, + threshold: COVENANT_FIXTURE.covenant.threshold, + compliant: compliance.outputs.compliant, + headroom: compliance.outputs.headroom, + }, + formattedOutputs: { + netDebt: lev.formattedOutputs.netDebt, + ratio: lev.formattedOutputs.ratio, + threshold: `${COVENANT_FIXTURE.covenant.threshold.toFixed(2)}x`, + compliant: compliance.formattedOutputs.compliant, + headroom: compliance.formattedOutputs.headroom, + }, + sandboxKind: "js_pure", + computedAt: Date.now(), + }; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "calculation", + status: "complete", + title: "Sandbox compute + compliance gate", + payload: calcPayload, + }); + + // 7. Evidence + await sleep(STEP_PACING_MS); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "evidence", + status: "complete", + title: "Source anchors", + payload: { + anchors: COVENANT_FIXTURE.excerpts.map((e) => ({ + label: COVENANT_FIXTURE.sections.find((s) => + e.sourceRef.includes(`p.${s.page}`), + )?.label ?? "Source", + sourceRef: e.sourceRef, + excerpt: e.excerpt, + })), + totalSources: COVENANT_FIXTURE.excerpts.length, + } satisfies EvidencePayload, + }); + + // 8. Artifact + await sleep(STEP_PACING_MS); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "artifact", + status: "complete", + title: "Compliance memo", + payload: { + kind: "memo", + label: `${COVENANT_FIXTURE.meta.borrower} — Q4 Leverage Covenant Review`, + description: + "Memo summarizes compliance verdict, computed ratio, threshold, and review items.", + diffSummary: [ + `Verdict: ${compliance.outputs.compliant === 1 ? "Compliant" : "Breach"}`, + `Observed ratio: ${lev.formattedOutputs.ratio} vs ${COVENANT_FIXTURE.covenant.threshold.toFixed(2)}x cap`, + "Reviewer note: confirm EBITDA add-backs", + ], + } satisfies ArtifactPayload, + }); + + // 9. Result OR approval + await sleep(STEP_PACING_MS); + const isCompliant = compliance.outputs.compliant === 1; + if (isCompliant) { + const result: ResultPayload = { + headline: `${COVENANT_FIXTURE.meta.borrower} compliant: ${lev.formattedOutputs.ratio} vs ${COVENANT_FIXTURE.covenant.threshold.toFixed(2)}x cap (${compliance.formattedOutputs.headroom} headroom).`, + prose: + "Net leverage was computed deterministically from total debt minus unrestricted cash, divided by adjusted EBITDA. EBITDA add-backs require human confirmation before this verdict is locked into the lender package.", + metrics: { + "Net debt": lev.formattedOutputs.netDebt, + "Adjusted EBITDA": `$${(COVENANT_FIXTURE.inputs.adjustedEBITDA.value / 1_000_000).toFixed(1)}M`, + "Leverage ratio": lev.formattedOutputs.ratio, + "Covenant threshold": `${COVENANT_FIXTURE.covenant.threshold.toFixed(2)}x`, + "Verdict": "Compliant", + }, + openIssues: ["EBITDA add-backs require human confirmation before lender sign-off."], + nextActions: [ + { id: "open_memo", label: "Open memo", kind: "open" }, + { id: "review_addbacks", label: "Review add-backs", kind: "open" }, + { id: "export_lender_pack", label: "Export lender summary", kind: "export" }, + ], + }; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "result", + status: "complete", + title: "Result — compliant", + payload: result, + }); + await ctx.runMutation(api.domains.financialOperator.runOps.updateRunStatus, { + runId, + status: "completed", + finalSummary: result.headline, + }); + } else { + // Breach → ask for explicit reviewer approval before sending notice. + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "approval_request", + status: "pending", + title: "Reviewer approval — covenant breach", + payload: { + question: "Approve formal breach notice to lender?", + context: `Net leverage ${lev.formattedOutputs.ratio} exceeds ${COVENANT_FIXTURE.covenant.threshold.toFixed(2)}x cap.`, + options: [ + { id: "approve", label: "Approve breach notice", description: "Generate lender notification." }, + { id: "narrow", label: "Re-extract EBITDA add-backs", description: "Tighter pass over add-back schedule." }, + { id: "reject", label: "Hold", description: "Do not send notice; leave run as needs_review." }, + ], + consequences: { + approve: "Generates lender notification + flags portfolio.", + narrow: "Re-runs EBITDA extraction; recompute may bring ratio under cap.", + reject: "Run held in awaiting-approval state.", + }, + } satisfies ApprovalRequestPayload, + }); + await ctx.runMutation(api.domains.financialOperator.runOps.updateRunStatus, { + runId, + status: "awaiting_approval", + }); + } + + return { runId }; + }, +}); + +/* ================================================================== */ +/* EXAMPLE D — VARIANCE ANALYSIS */ +/* ================================================================== */ + +export const runVarianceAnalysisDemo = action({ + args: { + userId: v.optional(v.id("users")), + threadId: v.optional(v.string()), + }, + handler: async (ctx, args): Promise<{ runId: Id<"financialOperatorRuns"> }> => { + const goal = `Compare ${VARIANCE_FIXTURE.period} actuals vs budget; surface top variances; draft a CFO summary.`; + + const runId: Id<"financialOperatorRuns"> = await ctx.runMutation( + api.domains.financialOperator.runOps.createRun, + { + userId: args.userId, + threadId: args.threadId, + taskType: "variance_analysis", + goal, + files: VARIANCE_FIXTURE.files.map((f) => ({ name: f.name, kind: f.kind })), + totalSteps: 8, + }, + ); + + // 1. Plan + await ctx.runMutation(api.domains.financialOperator.runOps.updateRunStatus, { + runId, + status: "planning", + }); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "run_brief", + status: "complete", + title: "Plan", + payload: { + goal, + numberedSteps: [ + "Inspect actuals + budget files", + "Align chart of accounts", + "Compute variance per account in sandbox", + "Surface top favorable and unfavorable lines", + "Pull qualitative driver context", + "Draft CFO summary", + ], + estimatedDurationMs: 4500, + outputFormat: "CFO-style variance memo", + } satisfies RunBriefPayload, + }); + + await ctx.runMutation(api.domains.financialOperator.runOps.updateRunStatus, { + runId, + status: "running", + }); + + // 2. Inspect + await sleep(STEP_PACING_MS); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "tool_call", + status: "complete", + title: "Inspect spreadsheets", + payload: { + toolName: "spreadsheet.inspect", + inputSummary: VARIANCE_FIXTURE.files.map((f) => f.name).join(", "), + outputSummary: `Periods: Jan/Feb/${VARIANCE_FIXTURE.period}; currency USD`, + } satisfies ToolCallPayload, + }); + + // 3. Align accounts + await sleep(STEP_PACING_MS); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "tool_call", + status: "complete", + title: "Align chart of accounts", + payload: { + toolName: "finance.align_accounts", + inputSummary: "Match by accountName + accountCode", + outputSummary: `Matched ${VARIANCE_FIXTURE.alignment.matchedAccounts}; ${VARIANCE_FIXTURE.alignment.unmatchedActuals + VARIANCE_FIXTURE.alignment.unmatchedBudget} need mapping review`, + } satisfies ToolCallPayload, + }); + + // 4. Compute variance per line in sandbox + await sleep(STEP_PACING_MS); + const variances = VARIANCE_FIXTURE.lines.map((line) => { + const r = computeVariance({ actual: line.actual, budget: line.budget }); + return { line, variance: r }; + }); + const topFavorable = variances + .filter((v) => v.variance.outputs.variance > 0 && v.line.category === "revenue") + .sort((a, b) => b.variance.outputs.variance - a.variance.outputs.variance)[0]; + const topUnfavorable = variances + .filter((v) => v.variance.outputs.variance > 0 && v.line.category !== "revenue") + .sort((a, b) => b.variance.outputs.variance - a.variance.outputs.variance)[0]; + + const calcPayload: CalculationPayload = { + formulaLabel: "Per-account variance", + formulaText: + "for each account: variance = actual - budget; variance_pct = variance / budget", + inputs: { lineCount: VARIANCE_FIXTURE.lines.length }, + outputs: { + topFavorableAmount: topFavorable?.variance.outputs.variance ?? 0, + topUnfavorableAmount: topUnfavorable?.variance.outputs.variance ?? 0, + }, + formattedOutputs: { + topFavorableAmount: topFavorable?.variance.formattedOutputs.variance ?? "$0", + topUnfavorableAmount: topUnfavorable?.variance.formattedOutputs.variance ?? "$0", + }, + sandboxKind: "js_pure", + computedAt: Date.now(), + }; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "calculation", + status: "complete", + title: "Variance computation", + payload: calcPayload, + }); + + // 5. Top-variances extraction + await sleep(STEP_PACING_MS); + const topLines = [...variances] + .sort( + (a, b) => + Math.abs(b.variance.outputs.variance) - Math.abs(a.variance.outputs.variance), + ) + .slice(0, 4); + const varianceFields: ExtractedField[] = topLines.map((entry) => ({ + fieldName: entry.line.account, + value: entry.variance.formattedOutputs.variance, + unit: "USD_signed", + sourceRef: `march_actuals.xlsx + fy_budget.xlsx (${entry.line.category})`, + confidence: 0.97, + status: "verified", + reviewNote: entry.line.driverNote, + })); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "extraction", + status: "complete", + title: "Top variance lines", + payload: { + schemaName: "top_variance_lines", + fields: varianceFields, + totalFound: varianceFields.length, + needsReviewCount: 0, + } satisfies ExtractionPayload, + }); + + // 6. Driver search (tool_call) + await sleep(STEP_PACING_MS); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "tool_call", + status: "complete", + title: "Pull driver context from notes", + payload: { + toolName: "notes.search_context", + inputSummary: "Search board_notes + monthly_close_notes for top variance accounts", + outputSummary: topLines + .map((l) => `${l.line.account}: ${l.line.driverNote}`) + .join(" · "), + } satisfies ToolCallPayload, + }); + + // 7. Artifact (CFO summary) + await sleep(STEP_PACING_MS); + const summaryProse = topFavorable && topUnfavorable + ? `${VARIANCE_FIXTURE.period} revenue finished ahead of budget, primarily driven by ${topFavorable.line.driverNote?.toLowerCase() ?? "favorable revenue mix"}. The main expense pressure came from ${topUnfavorable.line.account.toLowerCase()}, where ${topUnfavorable.line.driverNote?.toLowerCase() ?? "spend exceeded plan"}. Net impact remains favorable; infrastructure usage should be monitored before scaling additional workflows.` + : "Variance memo drafted from current actuals."; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "artifact", + status: "complete", + title: "CFO variance memo", + payload: { + kind: "memo", + label: `${VARIANCE_FIXTURE.period} Variance Memo`, + description: summaryProse, + diffSummary: [ + `Top favorable: ${topFavorable?.line.account ?? "—"} ${topFavorable?.variance.formattedOutputs.variance ?? ""}`, + `Top unfavorable: ${topUnfavorable?.line.account ?? "—"} ${topUnfavorable?.variance.formattedOutputs.variance ?? ""}`, + `Unmatched accounts to map: ${VARIANCE_FIXTURE.alignment.unmatchedActuals + VARIANCE_FIXTURE.alignment.unmatchedBudget}`, + ], + } satisfies ArtifactPayload, + }); + + // 8. Result + await sleep(STEP_PACING_MS); + const result: ResultPayload = { + headline: `${VARIANCE_FIXTURE.period} variance: revenue ${topFavorable?.variance.formattedOutputs.variance ?? "+$0"}; biggest cost overrun ${topUnfavorable?.line.account ?? "n/a"} (${topUnfavorable?.variance.formattedOutputs.variance ?? ""}).`, + prose: summaryProse, + metrics: { + "Top favorable": topFavorable?.variance.formattedOutputs.variance ?? "$0", + "Top favorable %": topFavorable?.variance.formattedOutputs.variancePct ?? "0%", + "Top unfavorable": topUnfavorable?.variance.formattedOutputs.variance ?? "$0", + "Top unfavorable %": topUnfavorable?.variance.formattedOutputs.variancePct ?? "0%", + }, + openIssues: + VARIANCE_FIXTURE.alignment.unmatchedActuals + VARIANCE_FIXTURE.alignment.unmatchedBudget > 0 + ? [`${VARIANCE_FIXTURE.alignment.unmatchedActuals + VARIANCE_FIXTURE.alignment.unmatchedBudget} unmatched accounts need mapping review before this memo is finalized.`] + : [], + nextActions: [ + { id: "open_memo", label: "Open variance memo", kind: "open" }, + { id: "review_unmatched", label: "Review unmatched accounts", kind: "open" }, + { id: "export_table", label: "Export variance table", kind: "export" }, + { id: "create_slide", label: "Create board slide", kind: "follow_up" }, + ], + }; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "result", + status: "complete", + title: "Result", + payload: result, + }); + await ctx.runMutation(api.domains.financialOperator.runOps.updateRunStatus, { + runId, + status: "completed", + finalSummary: result.headline, + }); + + return { runId }; + }, +}); diff --git a/convex/domains/financialOperator/realExtractors.ts b/convex/domains/financialOperator/realExtractors.ts new file mode 100644 index 000000000..e603fa534 --- /dev/null +++ b/convex/domains/financialOperator/realExtractors.ts @@ -0,0 +1,519 @@ +/** + * Real PDF extractors — production-grade replacements for the demo fixtures. + * + * Pattern: Claude / Gemini accept PDFs as document input directly, so we + * skip the parse-text-then-prompt pipeline entirely. The PDF goes in, + * structured JSON comes out. The model is constrained by an explicit + * field schema and instructed never to fabricate values — if it can't + * find a number, it returns null with confidence 0. + * + * Math runs in JS sandbox after extraction (sandbox.ts), per the + * scratchpad-first invariant in .claude/rules/scratchpad_first.md. + * + * IMPORTANT: this is the production path. Fixtures (attFixture.ts etc) + * remain as the deterministic demo path. The orchestrator can call + * either one without changing its step-emission pattern. + */ + +import { v } from "convex/values"; +import { action } from "../../_generated/server"; +import { api } from "../../_generated/api"; +import type { Id } from "../../_generated/dataModel"; +import Anthropic from "@anthropic-ai/sdk"; + +import { + computeAfterTaxCostOfDebt, + computeETR, +} from "./sandbox"; +import { validateExtraction, type FieldSpec } from "./validators"; +import type { + ApprovalRequestPayload, + ArtifactPayload, + CalculationPayload, + EvidencePayload, + ExtractedField, + ExtractionPayload, + ResultPayload, + RunBriefPayload, + ToolCallPayload, + ValidationPayload, +} from "./types"; + +const STEP_PACING_MS = 250; +const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); + +const MAX_PDF_BYTES = 20 * 1024 * 1024; // BOUND_READ — 20MB cap + +const TAX_AND_DEBT_SPEC: FieldSpec[] = [ + { + fieldName: "Income before income taxes", + expectedUnit: "USD_millions", + required: true, + sanityRange: { min: 0, max: 5_000_000 }, + }, + { + fieldName: "Income tax expense", + expectedUnit: "USD_millions", + required: true, + sanityRange: { min: 0, max: 1_000_000 }, + }, + { + fieldName: "Weighted average debt rate", + expectedUnit: "decimal", + required: true, + sanityRange: { min: 0, max: 0.5 }, + }, +]; + +interface ClaudeExtractedField { + fieldName: string; + value: number | null; + unit: string; + sourceRef: string; // "10-K p.72" or similar + excerpt: string; // verbatim quote anchoring the value + confidence: number; // 0..1, model-reported + notes?: string; +} + +interface ClaudeExtractionResponse { + fields: ClaudeExtractedField[]; + unresolvedFields: string[]; + modelConfidenceOverall: number; +} + +const SYSTEM_PROMPT = `You extract financial values from SEC filings (10-K, 10-Q) for a deterministic operator console. + +RULES: +1. NEVER fabricate or guess. If a value is not explicitly in the document, return null with confidence 0 and add it to unresolvedFields. +2. NEVER do math. Return raw values only. Effective tax rates and cost of debt are computed downstream in a JS sandbox. +3. Always cite the page number and a short verbatim excerpt for every value. +4. Confidence reflects how clearly the value is stated. 0.95+ = exact match in a labeled row. 0.70-0.94 = present but ambiguous unit/period. <0.70 = uncertain. +5. Units: report income statement values in USD_millions (e.g. 22450 means $22.45B). Report rates as decimal (e.g. 0.0542 for 5.42%). +6. Output JSON ONLY. No prose.`; + +const FIELD_SCHEMA = `{ + "fields": [ + { + "fieldName": string, // Must match one of: ${TAX_AND_DEBT_SPEC.map((s) => JSON.stringify(s.fieldName)).join(", ")} + "value": number | null, + "unit": "USD_millions" | "decimal", + "sourceRef": string, // e.g. "10-K p.72" + "excerpt": string, // verbatim quote, max 200 chars + "confidence": number, // 0..1 + "notes": string | null + } + ], + "unresolvedFields": string[], + "modelConfidenceOverall": number +}`; + +function getAnthropicKey(): string { + const key = process.env.ANTHROPIC_API_KEY; + if (!key) { + throw new Error( + "ANTHROPIC_API_KEY not configured for the Convex deployment. Set it via `npx convex env set ANTHROPIC_API_KEY ...`.", + ); + } + return key; +} + +/** + * Extract tax-and-debt inputs from a PDF stored in Convex `_storage`. + * + * Returns ExtractedField[] in the SAME shape the fixture extractor + * returns, so the orchestrator can call either path without branching. + */ +async function extractTaxAndDebtFromPdf( + pdfBase64: string, +): Promise<{ + fields: ExtractedField[]; + modelConfidenceOverall: number; + unresolvedFields: string[]; +}> { + const client = new Anthropic({ apiKey: getAnthropicKey() }); + + const response = await client.messages.create({ + model: "claude-opus-4-7", + max_tokens: 2000, + system: SYSTEM_PROMPT, + messages: [ + { + role: "user", + content: [ + { + type: "document", + source: { + type: "base64", + media_type: "application/pdf", + data: pdfBase64, + }, + }, + { + type: "text", + text: `Extract these fields and return JSON in exactly this shape:\n${FIELD_SCHEMA}\n\nFields to extract:\n${TAX_AND_DEBT_SPEC.map((s) => `- ${s.fieldName} (${s.expectedUnit})`).join("\n")}`, + }, + ], + }, + ], + }); + + // Parse JSON from the first text block. Claude with system instruction + // "Output JSON ONLY" returns a single text block with the JSON. + const block = response.content.find((b) => b.type === "text"); + if (!block || block.type !== "text") { + throw new Error("Claude response had no text block"); + } + const jsonText = block.text.trim(); + // Defensive: strip code fences if present. + const cleaned = jsonText + .replace(/^```json\s*/i, "") + .replace(/^```\s*/i, "") + .replace(/```\s*$/i, "") + .trim(); + + let parsed: ClaudeExtractionResponse; + try { + parsed = JSON.parse(cleaned) as ClaudeExtractionResponse; + } catch (e) { + throw new Error( + `Claude returned non-JSON (HONEST_STATUS — better to fail than fabricate). First 200 chars: ${cleaned.slice(0, 200)}`, + ); + } + + // Map Claude's response to our ExtractedField shape, classifying status. + const fields: ExtractedField[] = parsed.fields.map((f) => ({ + fieldName: f.fieldName, + value: f.value, + unit: f.unit, + sourceRef: f.sourceRef, + confidence: typeof f.confidence === "number" ? f.confidence : 0, + status: + f.value === null + ? "unresolved" + : f.confidence >= 0.9 + ? "verified" + : f.confidence >= 0.5 + ? "needs_review" + : "unresolved", + reviewNote: f.notes ?? undefined, + })); + + return { + fields, + modelConfidenceOverall: parsed.modelConfidenceOverall ?? 0, + unresolvedFields: parsed.unresolvedFields ?? [], + }; +} + +/** + * End-to-end real run: takes a PDF storageId, runs Claude extraction, + * validates, computes in sandbox, emits the same typed step stream as + * the fixture demo. This is the production path. + * + * Caller flow (typical): user uploads PDF → file goes to Convex + * `_storage` → caller passes the resulting `Id<"_storage">` here. + */ +export const runRealCostOfDebtFromPdf = action({ + args: { + userId: v.optional(v.id("users")), + threadId: v.optional(v.string()), + pdfStorageId: v.id("_storage"), + pdfFileName: v.optional(v.string()), + }, + handler: async (ctx, args): Promise<{ runId: Id<"financialOperatorRuns"> }> => { + const fileName = args.pdfFileName ?? "uploaded.pdf"; + const goal = `Extract ETR + after-tax cost of debt from ${fileName} (real PDF, Claude extraction, sandbox compute).`; + + // 1. Create run + const runId: Id<"financialOperatorRuns"> = await ctx.runMutation( + api.domains.financialOperator.runOps.createRun, + { + userId: args.userId, + threadId: args.threadId, + taskType: "financial_metric_extraction", + goal, + files: [{ name: fileName, kind: "pdf" }], + totalSteps: 7, + }, + ); + + // 2. Plan + await ctx.runMutation(api.domains.financialOperator.runOps.updateRunStatus, { + runId, + status: "planning", + }); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "run_brief", + status: "complete", + title: "Plan", + payload: { + goal, + numberedSteps: [ + "Fetch uploaded PDF from storage", + "Send PDF to Claude with structured extraction schema", + "Validate extracted fields (schema + units + range)", + "Compute ETR + after-tax cost of debt in JS sandbox", + "Surface source excerpts as evidence", + "Emit a notebook artifact + reviewer summary", + ], + outputFormat: "Reviewable notebook + sandbox-locked calculation", + } satisfies RunBriefPayload, + }); + + await ctx.runMutation(api.domains.financialOperator.runOps.updateRunStatus, { + runId, + status: "running", + }); + + try { + // 3. Fetch PDF from storage + await sleep(STEP_PACING_MS); + const fetchStart = Date.now(); + const pdfBlob = await ctx.storage.get(args.pdfStorageId); + if (!pdfBlob) { + throw new Error(`PDF not found in storage: ${args.pdfStorageId}`); + } + const pdfBuffer = await pdfBlob.arrayBuffer(); + // BOUND_READ — refuse oversized PDFs to protect the LLM call + memory. + if (pdfBuffer.byteLength > MAX_PDF_BYTES) { + throw new Error( + `PDF too large: ${pdfBuffer.byteLength} bytes (max ${MAX_PDF_BYTES}). Crop to the relevant sections first.`, + ); + } + const pdfBase64 = Buffer.from(pdfBuffer).toString("base64"); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "tool_call", + status: "complete", + title: "Fetch PDF from storage", + payload: { + toolName: "convex.storage.get", + inputSummary: `storageId=${args.pdfStorageId}`, + outputSummary: `${pdfBuffer.byteLength.toLocaleString()} bytes`, + } satisfies ToolCallPayload, + durationMs: Date.now() - fetchStart, + }); + + // 4. Extraction via Claude + await sleep(STEP_PACING_MS); + const extractStart = Date.now(); + const { fields, modelConfidenceOverall, unresolvedFields } = + await extractTaxAndDebtFromPdf(pdfBase64); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "tool_call", + status: "complete", + title: "Claude PDF extraction", + payload: { + toolName: "anthropic.messages.create (PDF input)", + inputSummary: `${TAX_AND_DEBT_SPEC.length} target fields, structured-JSON schema`, + outputSummary: `${fields.length} fields returned; overall confidence ${modelConfidenceOverall.toFixed(2)}; ${unresolvedFields.length} unresolved`, + } satisfies ToolCallPayload, + durationMs: Date.now() - extractStart, + }); + + const extractionPayload: ExtractionPayload = { + schemaName: "tax_and_debt_inputs", + fields, + totalFound: fields.length, + needsReviewCount: fields.filter((f) => f.status === "needs_review").length, + }; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "extraction", + status: + extractionPayload.needsReviewCount > 0 + ? "needs_review" + : fields.some((f) => f.status === "unresolved") + ? "error" + : "complete", + title: "Extracted values", + payload: extractionPayload, + }); + + // 5. Validation + await sleep(STEP_PACING_MS); + const validation = validateExtraction({ + fields, + spec: TAX_AND_DEBT_SPEC, + }); + const validationPayload: ValidationPayload = { + schemaPassed: validation.schemaPassed, + unitsNormalized: validation.unitsNormalized, + findings: validation.findings, + checksRun: validation.checksRun, + checksPassed: validation.checksPassed, + }; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "validation", + status: validation.schemaPassed ? "complete" : "error", + title: "Validate extraction", + payload: validationPayload, + }); + if (!validation.schemaPassed) { + await ctx.runMutation( + api.domains.financialOperator.runOps.updateRunStatus, + { + runId, + status: "awaiting_approval", + }, + ); + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "approval_request", + status: "pending", + title: "Required fields missing — operator review", + payload: { + question: "Required fields could not be extracted. How should we proceed?", + context: `Unresolved: ${unresolvedFields.join(", ") || "—"}. Sandbox cannot compute without these inputs.`, + options: [ + { id: "narrow", label: "Re-extract with narrower section hints", description: "Re-prompt Claude with explicit page hints." }, + { id: "override", label: "Manual entry", description: "Operator types the missing values; sandbox compute proceeds." }, + { id: "reject", label: "Mark run failed", description: "No artifact saved." }, + ], + } satisfies ApprovalRequestPayload, + }); + return { runId }; + } + + // 6. Sandbox compute + await sleep(STEP_PACING_MS); + const ibt = (fields.find((f) => f.fieldName === "Income before income taxes") + ?.value ?? 0) as number; + const ite = (fields.find((f) => f.fieldName === "Income tax expense") + ?.value ?? 0) as number; + const debtRate = (fields.find((f) => f.fieldName === "Weighted average debt rate") + ?.value ?? 0) as number; + const etrR = computeETR({ incomeBeforeTaxes: ibt, incomeTaxExpense: ite }); + const atR = computeAfterTaxCostOfDebt({ + preTaxDebtRate: debtRate, + effectiveTaxRate: etrR.outputs.etr, + }); + const calcPayload: CalculationPayload = { + formulaLabel: "Effective tax rate + after-tax cost of debt", + formulaText: [etrR.formulaText, atR.formulaText].join("\n"), + inputs: { + incomeBeforeTaxes: ibt, + incomeTaxExpense: ite, + preTaxDebtRate: debtRate, + }, + outputs: { + effectiveTaxRate: etrR.outputs.etr, + afterTaxCostOfDebt: atR.outputs.afterTaxCostOfDebt, + }, + formattedOutputs: { + effectiveTaxRate: etrR.formattedOutputs.etr, + afterTaxCostOfDebt: atR.formattedOutputs.afterTaxCostOfDebt, + }, + sandboxKind: "js_pure", + computedAt: Date.now(), + }; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "calculation", + status: "complete", + title: "Sandbox calculation", + payload: calcPayload, + }); + + // 7. Evidence (excerpts already on each ExtractedField via reviewNote / sourceRef) + await sleep(STEP_PACING_MS); + const anchors = fields + .filter((f) => f.value !== null) + .map((f) => ({ + label: f.fieldName, + sourceRef: f.sourceRef, + excerpt: f.reviewNote, + })); + const evidencePayload: EvidencePayload = { + anchors, + totalSources: anchors.length, + }; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "evidence", + status: "complete", + title: "Source anchors", + payload: evidencePayload, + }); + + // 8. Artifact + result + await sleep(STEP_PACING_MS); + const artifactPayload: ArtifactPayload = { + kind: "notebook", + label: `${fileName.replace(/\.pdf$/i, "")} — After-Tax Cost of Debt`, + description: + "Notebook with Claude-extracted inputs, sandbox-locked calculation, source excerpts, and reviewer notes.", + diffSummary: [ + `Source: ${fileName}`, + `Extraction: Claude (${fields.length} fields, ${unresolvedFields.length} unresolved)`, + `Sandbox: deterministic JS (no LLM math)`, + ], + }; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "artifact", + status: "complete", + title: "Notebook artifact", + payload: artifactPayload, + }); + + const result: ResultPayload = { + headline: `${fileName}: ETR ${etrR.formattedOutputs.etr}; after-tax cost of debt ${atR.formattedOutputs.afterTaxCostOfDebt}.`, + prose: + "Values were extracted by Claude directly from the uploaded PDF (no intermediate parse). Math ran deterministically in JS sandbox. All values cite the source page; reviewer should verify excerpts before sign-off.", + metrics: { + "Effective tax rate": etrR.formattedOutputs.etr, + "After-tax cost of debt": atR.formattedOutputs.afterTaxCostOfDebt, + "Model confidence": modelConfidenceOverall.toFixed(2), + }, + openIssues: + unresolvedFields.length > 0 + ? [`${unresolvedFields.length} field(s) unresolved by extractor: ${unresolvedFields.join(", ")}`] + : [], + nextActions: [ + { id: "open_notebook", label: "Open notebook", kind: "open" }, + { id: "view_sources", label: "View sources", kind: "open" }, + { id: "ask_followup", label: "Ask follow-up", kind: "follow_up" }, + ], + }; + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "result", + status: "complete", + title: "Result", + payload: result, + }); + await ctx.runMutation(api.domains.financialOperator.runOps.updateRunStatus, { + runId, + status: "completed", + finalSummary: result.headline, + }); + + return { runId }; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + // HONEST_STATUS — surface the failure verbatim, never silently mark complete. + await ctx.runMutation(api.domains.financialOperator.runOps.appendStep, { + runId, + kind: "tool_call", + status: "error", + title: "Extraction failed", + payload: { + toolName: "anthropic.messages.create (PDF input)", + inputSummary: "PDF + structured-JSON schema", + outputSummary: "—", + } satisfies ToolCallPayload, + errorMessage: msg, + }); + await ctx.runMutation(api.domains.financialOperator.runOps.updateRunStatus, { + runId, + status: "error", + errorMessage: msg, + }); + return { runId }; + } + }, +}); diff --git a/docs/architecture/FINANCIAL_OPERATOR_DESIGN_ALIGNMENT.md b/docs/architecture/FINANCIAL_OPERATOR_DESIGN_ALIGNMENT.md new file mode 100644 index 000000000..db62192f9 --- /dev/null +++ b/docs/architecture/FINANCIAL_OPERATOR_DESIGN_ALIGNMENT.md @@ -0,0 +1,157 @@ +# Financial Operator Console — Design Alignment + +How the new `/finance-demo` route, typed cards, and `FinancialOperatorOverlay` build on top of the existing NodeBench UI kit per surface (web, mobile, workspace, CLI/MCP). + +The shared discipline: **same tokens, same primitives, surface-specific entry**. No new design language; only new content types (typed cards) that compose existing primitives. + +--- + +## 1. Web (desktop, `nodebenchai.com`) + +**Existing kit baseline** +- Glass-DNA card: `border border-edge bg-surface/50` (utility class `.nb-card`) +- Card row: `.nb-card-row` +- Section header: `text-[11px] font-semibold uppercase tracking-[0.18em] text-content-muted` (`.nb-section-title`) +- Pill: `.nb-badge` (border + bg/5 fill) +- Color tokens: `border-edge`, `bg-surface`, `bg-surface-hover`, `text-content`, `text-content-secondary`, `text-content-muted` +- Accent: terracotta `#d97757` for selected/CTA states +- Type: Manrope (UI) + JetBrains Mono (data, formulas, tool names) +- Background: `--bg-primary: #151413` + +**What the financial cards reuse (verbatim)** +- Every card body uses `.nb-card` chrome with a colored left-border accent stripe per `kind` (`border-l-emerald-400` for calculation, `border-l-[#d97757]` for approval, etc.). The accent stripe is a 1-line addition; everything else is the existing card class. +- Badges use `.nb-badge` shape (rounded-full + border + small text). +- Section headers (`Plan`, `Tool`, `Extraction`, …) follow the existing `text-[11px] uppercase tracking-[0.2em]` pattern. +- Mono-font for: tool names (`document.locate_sections`), formula text, sequence numbers, source refs. +- Standard `max-w-3xl mx-auto p-6` layout matches `/developers`, `/changelog`, etc. + +**What's new (purely additive)** +- 9 typed step kinds (`run_brief`, `tool_call`, `extraction`, `validation`, `calculation`, `evidence`, `artifact`, `approval_request`, `result`) — each is a new content shape, but all rendered through `.nb-card` chrome. +- Status pills in 7 states (`pending`, `running`, `complete`, `error`, `needs_review`, `approved`, `rejected`) — color-coded but use existing badge chrome. +- 1 sandbox-disclosure note ("Math executed in JS sandbox, not by the language model.") — emerald accent, single instance per calculation card. + +**Where it lives on web** +- Standalone view: `/finance-demo` (aliases `/financial-operator`, `/finops`) — full-width 4-tile picker + live timeline. +- Global overlay: `` mounted in `App.tsx`. Reads `?finRun=` from URL → renders timeline as a right-side drawer alongside whatever surface the user is on. Collapsible to a corner pill. +- No changes to existing routes' visual language; the overlay docks beside them. + +--- + +## 2. Mobile (375 px viewport) + +**Existing kit baseline** +- Bottom nav (Home/Reports/Chat/Inbox/Me) — `xl:hidden`, fixed bottom +- Agent panel: full-screen slide-over on `xl:hidden` +- `QuickCommandChips`: mobile-only chip strip above input, dispatches `cmd.query` on tap +- Capture FAB +- All grids degrade to `grid-cols-1` below `sm:` + +**What financial cards do at this width** +- All cards stack vertically (cards are `space-y-3` lists by default). +- Approval card grid: `sm:grid-cols-2` → falls back to single column on phones, so 4 options stack. +- Calculation inputs/outputs: `sm:grid-cols-2` → single column on small. Long mono formulas use `whitespace-pre-wrap` so they wrap rather than horizontal-scroll. +- The overlay drawer is `max-w-md` on phones (~448 px) and full-screen-width-with-margin on truly small viewports — matches the existing agent slide-over behavior. + +**Mobile-only entry point** +- Added to `QuickCommandChips`: `{ id: "fin-att-demo", label: "AT&T cost of debt", icon: Calculator, navigate: "/finance-demo" }`. +- The chip type was extended with an optional `navigate?: string` field (single-line schema change). When set, click navigates instead of dispatching the chat query — same fork the existing chip flow already understood, just a new branch. +- Chip placement is identical to all existing mobile chips (above input, scroll-x). + +**What's intentionally NOT mobile** +- The picker grid on `/finance-demo` collapses fine, but reading 9 cards on a phone is dense. Mobile users should generally trigger from the chip → see the run in the right-drawer overlay, not the standalone picker. + +--- + +## 3. Workspace (`workspace.nodebenchai.com`) + +**Existing kit baseline** +- Workspace is a separate deployed surface (its own subdomain alias) with its own layout: ExactChatSurface, ProductTopNav, document-as-canvas pattern. +- Reports/notebooks live as documents in `convex/schema.ts:documents`. +- Workspace tabs (`brief`, `cards`, `notebook`, `sources`, `chat`, `map`) are typed in `productEventWorkspaces.defaultTabs`. + +**Current state of the financial operator on Workspace** +- The `FinancialOperatorOverlay` is mounted in `App.tsx` and is surface-agnostic — it works on workspace.nodebenchai.com just by appending `?finRun=` to any URL. +- The artifact card is shaped to drop a notebook entry — but the `artifactRef` field is currently unused. The wire-up to actually create a `documents` row + back-link the run lives in a follow-up. + +**Planned (next PR)** +- `Artifact.kind === "notebook" | "memo"` will create a `documents` row tagged `kind: "financial_run"` and store the runId on it. Workspace's existing notebook tab can then surface the financial runs as document entries. +- A new workspace tab `financial` would render `FinancialOperatorTimeline` for runs scoped to a given event workspace — same component, new mount point. + +**Token-level fit** +- Workspace already uses the same `nb-card`/`border-edge`/etc tokens, so the cards drop in without restyling. +- The overlay's `bg-[#151413]/95 backdrop-blur-md` matches the workspace dark stage. + +--- + +## 4. CLI / MCP (`packages/mcp-local`, `nodebench-mcp`) + +**Existing kit baseline** +- ~304 MCP tools, gated by preset (`starter`, `founder`, `banker`, `operator`, `researcher`, `web_dev`, `data`, `full`). +- Tool schema: `{ name, description, inputSchema, handler }` (the `McpTool` type). +- CLI subcommands: `discover`, `setup`, `workflow`, `quickref`, `call`, `demo`. +- Progressive discovery: each tool entry has `nextTools` + `relatedTools` for one-hop navigation. + +**Status of the financial operator on CLI** +- Currently the financial actions exist server-side as Convex actions (`runAttCostOfDebtDemo`, `runCrmCleanupDemo`, `runCovenantComplianceDemo`, `runVarianceAnalysisDemo`, `runRealCostOfDebtFromPdf`). +- They are NOT yet exposed as MCP tools — agents (Claude Code, Cursor, Windsurf) cannot drive runs from outside the browser today. + +**Planned exposure (next PR — sketched here for design parity)** + +Surface as 4 demo tools + 1 production tool + 4 helpers: + +| Tool name | Purpose | Returns | +|---|---|---| +| `finance_start_att_demo` | Trigger Example A run | `{ runId }` | +| `finance_start_crm_cleanup` | Trigger Example B run | `{ runId }` | +| `finance_start_covenant_compliance` | Trigger Example C run | `{ runId }` | +| `finance_start_variance_analysis` | Trigger Example D run | `{ runId }` | +| `finance_extract_from_pdf` | Production: take a PDF storageId, return runId | `{ runId }` | +| `finance_get_run` | Inspect run header | `{ runId, status, finalSummary, … }` | +| `finance_list_steps` | Get the typed step stream | `Step[]` | +| `finance_record_decision` | Approve/reject/override an approval gate | `{ stepId, status }` | +| `finance_open_in_chat` | Return a deep-link URL `/?surface=ask&finRun=` | `{ url }` | + +These would live in `packages/mcp-local/src/tools/financialOperatorTools.ts`, register under domain key `finance_ops`, and slot into the `banker` and `operator` presets by default. Each tool lists `relatedTools` pointing to the other tools in the same chain (e.g. `finance_start_att_demo.relatedTools = ["finance_get_run", "finance_list_steps", "finance_record_decision"]`). + +**Token-level fit** +- CLI doesn't have visual tokens — it's text. The "design alignment" for CLI is the **schema language**: same `kind` enum, same status enum, same field names as the frontend cards. So an MCP client can render its own CLI representation of a step stream and the text layout maps 1:1 to what the web cards render. +- Output formatting in `finance_list_steps` should reuse the same labels (`Plan`, `Tool`, `Extraction`, …) so a Claude Code session reads the same vocabulary the web user sees. + +--- + +## Composition matrix + +| Surface | Existing primitive reused | New primitive added | Entry point | +|---|---|---|---| +| Web (desktop) | `.nb-card`, `.nb-section-title`, `.nb-badge`, terracotta accent, Manrope/JetBrains Mono | 9 typed step shapes; sandbox disclosure; overlay drawer | `/finance-demo` route + global overlay | +| Mobile | `QuickCommandChips`, slide-over drawer, `xl:hidden` bottom nav | Chip `navigate` field; overlay max-w-md | Mobile chip → overlay | +| Workspace | Document model, `productEventWorkspaces` tabs, ExactChatSurface | (Pending) `Artifact.kind === "notebook"` writes a documents row; new `financial` workspace tab | Same overlay; future workspace tab | +| CLI / MCP | `McpTool` schema, presets, progressive discovery | (Pending) `finance_*` tool family in `finance_ops` domain | `nodebench-mcp call finance_start_att_demo` | + +The shared invariants across all four surfaces: + +1. **Same step kinds** — `run_brief / tool_call / extraction / validation / calculation / evidence / artifact / approval_request / result` everywhere. +2. **Same status enum** — `pending / running / complete / error / needs_review / approved / rejected`. +3. **Same source-attribution rule** — every value carries `sourceRef` + `confidence`. +4. **Same sandbox guarantee** — math runs in `convex/domains/financialOperator/sandbox.ts`, never in the LLM, regardless of surface. + +The result: a financial workflow that started in Claude Code via MCP can be picked up in the web overlay via deep-link, viewed mid-run on mobile, and ultimately archived as a workspace notebook — without any rendering surface needing to know the others exist. The runId + step stream is the contract. + +--- + +## Anti-patterns this design avoids + +- **Surface-specific styling** — no per-surface tweaks to the cards. If a card needs to change, it changes everywhere. +- **Parallel chat protocols** — the overlay reads URL params, which any surface already supports. No new postMessage bus, no new context provider, no new agent-panel internals. +- **Tokens duplicated as inline values** — every color/spacing reads from the existing token alias (`border-edge`, `text-content-muted`, etc). A future theme change ripples through the new cards automatically. +- **Hidden math** — the calculation card forces the sandbox disclosure on screen; agents reading the same step record see the same `sandboxKind: "js_pure"` guarantee. No surface can lie about who computed what. + +## Pointers + +- Cards: [src/features/financialOperator/components/](../../src/features/financialOperator/components/) +- Overlay: [src/features/financialOperator/components/FinancialOperatorOverlay.tsx](../../src/features/financialOperator/components/FinancialOperatorOverlay.tsx) +- Demo view: [src/features/financialOperator/views/FinancialOperatorDemo.tsx](../../src/features/financialOperator/views/FinancialOperatorDemo.tsx) +- Mobile chip: [src/features/agents/components/FastAgentPanel/QuickCommandChips.tsx](../../src/features/agents/components/FastAgentPanel/QuickCommandChips.tsx) +- Backend orchestrators: [convex/domains/financialOperator/orchestrator.ts](../../convex/domains/financialOperator/orchestrator.ts), [orchestratorExamples.ts](../../convex/domains/financialOperator/orchestratorExamples.ts), [realExtractors.ts](../../convex/domains/financialOperator/realExtractors.ts) +- Sandbox: [convex/domains/financialOperator/sandbox.ts](../../convex/domains/financialOperator/sandbox.ts) +- Token reference: [src/shared/ui/surface-tokens.css](../../src/shared/ui/surface-tokens.css) diff --git a/src/App.tsx b/src/App.tsx index 3b416d1c7..049033cfe 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -19,6 +19,9 @@ import { getReportWorkspaceRouteFromPath } from "@/features/reports/lib/reportNo import type { MainView } from "@/lib/registry/viewRegistry"; import { buildCockpitPathForView } from "@/lib/registry/viewRegistry"; import { initErrorReporting } from "@/lib/errorReporting"; +import { FinancialOperatorOverlay } from "@/features/financialOperator/components/FinancialOperatorOverlay"; +import { WorkspaceModeToggle } from "@/features/financialOperator/components/WorkspaceModeToggle"; +import { WorkspaceModePane } from "@/features/financialOperator/components/WorkspaceModePane"; const ShareableMemoView = lazy(() => import("@/features/founder/views/ShareableMemoView")); const PublicEntityShareView = lazy(() => import("@/features/share/views/PublicEntityShareView")); @@ -386,6 +389,9 @@ function App() { data-mcp-compat="webmcp chrome-devtools-mcp" data-webmcp-enabled={webmcpEnabled ? "true" : "false"} > + + + diff --git a/src/features/financialOperator/components/ApprovalCard.tsx b/src/features/financialOperator/components/ApprovalCard.tsx index 252f42440..cb6a4b301 100644 --- a/src/features/financialOperator/components/ApprovalCard.tsx +++ b/src/features/financialOperator/components/ApprovalCard.tsx @@ -60,11 +60,11 @@ export function ApprovalCard({ runId, stepId, status, data, selectedOptionId }: onClick={() => handleClick(opt.id)} disabled={isLocked || pendingId !== null} aria-pressed={isSelected} - className={`group rounded border px-3 py-2 text-left transition-colors focus-visible:ring-2 focus-visible:ring-[#d97757]/50 focus-visible:outline-none disabled:cursor-not-allowed disabled:opacity-60 ${ + className={`group rounded border px-3 py-2 text-left transition-colors focus-visible:ring-2 focus-visible:ring-[var(--accent-primary)]/50 focus-visible:outline-none disabled:cursor-not-allowed disabled:opacity-60 ${ isPrimary - ? "border-[#d97757]/40 bg-[#d97757]/10 hover:bg-[#d97757]/15 text-[#f5d0b8]" + ? "border-[var(--accent-primary)]/40 bg-[var(--accent-primary)]/10 hover:bg-[var(--accent-primary)]/15 text-[#f5d0b8]" : "border-edge bg-surface/50 hover:bg-surface-hover text-content" - } ${isSelected ? "ring-2 ring-[#d97757]/40" : ""}`} + } ${isSelected ? "ring-2 ring-[var(--accent-primary)]/40" : ""}`} >
{opt.label} @@ -74,7 +74,7 @@ export function ApprovalCard({ runId, stepId, status, data, selectedOptionId }: )} {isSelected && !isPending && ( - + chosen )} diff --git a/src/features/financialOperator/components/ArtifactCard.tsx b/src/features/financialOperator/components/ArtifactCard.tsx index 2e4fb3cea..68bf2ec42 100644 --- a/src/features/financialOperator/components/ArtifactCard.tsx +++ b/src/features/financialOperator/components/ArtifactCard.tsx @@ -52,7 +52,7 @@ export function ArtifactCard({ data }: Props) { href={data.url} target="_blank" rel="noreferrer noopener" - className="inline-flex items-center gap-1 rounded border border-edge bg-surface/50 px-2.5 py-1 text-[12px] text-content hover:bg-surface-hover focus-visible:ring-2 focus-visible:ring-[#d97757]/50 focus-visible:outline-none" + className="inline-flex items-center gap-1 rounded border border-edge bg-surface/50 px-2.5 py-1 text-[12px] text-content hover:bg-surface-hover focus-visible:ring-2 focus-visible:ring-[var(--accent-primary)]/50 focus-visible:outline-none" > Open artifact diff --git a/src/features/financialOperator/components/CalculationCard.tsx b/src/features/financialOperator/components/CalculationCard.tsx index d01542679..005bae4e7 100644 --- a/src/features/financialOperator/components/CalculationCard.tsx +++ b/src/features/financialOperator/components/CalculationCard.tsx @@ -73,7 +73,7 @@ function KVList({ return (
{k}
Open source
+ )} + + {/* Field row */} +
+
+ + + +
+