From 7feca566a9fa1522485cc190ee4fa19c0953d92a Mon Sep 17 00:00:00 2001
From: Enreign <taysic@gmail.com>
Date: Wed, 11 Mar 2026 20:29:51 +0100
Subject: [PATCH 1/2] Add token/cost estimation (Step 15) and 4 new PM tool
 integrations

Token estimation computes per-task token consumption and optional API cost
across economy/standard/premium model tiers. Four new tracker mappings
(Asana, Azure DevOps, Zenhub, Shortcut) bring supported tools to 10.
Bumps version to 0.4.0.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 README.md                   |   9 +-
 SKILL.md                    |   7 +-
 references/formulas.md      |  87 +++++++++++++++++++-
 references/output-schema.md | 107 ++++++++++++++++++++++--
 references/questionnaire.md |   9 ++
 tests/test_formulas.py      | 159 ++++++++++++++++++++++++++++++++++++
 6 files changed, 361 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 3d5028c..1381ecd 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,8 @@ Research-backed formulas. PERT statistics. Calibration feedback loops. Zero depe
 - Supports **single tasks or batches** (paste 5 issues or 500)
 - Produces **PERT expected values** with confidence bands, not just ranges
 - Separates **"expected"** from **"committed"** estimates at your chosen confidence level
-- Outputs in formats ready for **Linear, JIRA, ClickUp, GitHub Issues, Monday, and GitLab**
+- Estimates **token consumption and API cost** per model tier (economy/standard/premium)
+- Outputs in formats ready for **Linear, JIRA, ClickUp, GitHub Issues, Monday, GitLab, Asana, Azure DevOps, Zenhub, and Shortcut**
 - Includes a **calibration system** to improve accuracy over time with actuals
 
 ## Quick Start
@@ -247,7 +248,7 @@ Estimates can be output in two modes for any supported tracker:
 | **Embedded** (default) | Markdown table in description/body | None |
 | **Native** | Maps to tracker-specific fields | Custom fields |
 
-**Supported:** Linear, JIRA, ClickUp, GitHub Issues, Monday, GitLab
+**Supported:** Linear, JIRA, ClickUp, GitHub Issues, Monday, GitLab, Asana, Azure DevOps, Zenhub, Shortcut
 
 Embedded mode works everywhere immediately. Native mode requires custom fields for agent-specific metrics.
 
@@ -343,7 +344,7 @@ Evaluation prompts per the [Claude Skills 2.0](https://claude.com/blog/improving
 | `eval-quick.md` | Quick path produces valid PERT output with minimal input |
 | `eval-hybrid.md` | Detailed path handles multi-team, confidence levels, org overhead |
 | `eval-batch.md` | Batch mode with mixed types, dependencies, and rollup |
-| `eval-regression.md` | 6 baseline cases to detect drift after formula changes |
+| `eval-regression.md` | 8 baseline cases to detect drift after formula changes |
 
 Run evals after any change to formulas, frameworks, or the skill workflow.
 
@@ -354,7 +355,7 @@ Run evals after any change to formulas, frameworks, or the skill workflow.
 Contributions welcome — see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines. Key areas:
 
 - **Calibration data** — Share anonymized estimated vs. actual results to improve default ratios
-- **Tracker mappings** — Additional tracker support (Asana, Notion, Shortcut, etc.)
+- **Tracker mappings** — Additional tracker support (Notion, Basecamp, etc.)
 - **Task types** — New multipliers for work categories not yet covered
 - **Formulas** — Improvements backed by data or research
 - **Evals** — Additional test cases, especially edge cases
diff --git a/SKILL.md b/SKILL.md
index 798da2a..a8f84bf 100644
--- a/SKILL.md
+++ b/SKILL.md
@@ -4,7 +4,7 @@ description: "Adapts to your team's working mode — human-only, hybrid, or agen
 license: MIT
 metadata:
   author: Enreign
-  version: "0.3.0"
+  version: "0.4.0"
 ---
 
 # Progressive Estimation
@@ -131,7 +131,8 @@ The computation pipeline:
 6. Apply cone of uncertainty spread to widen/narrow range
 7. Compute PERT expected value and standard deviation
 8. Apply confidence multiplier for committed estimate
-9. Check anti-pattern guards and generate warnings
+9. Compute token & cost estimates (Step 15)
+10. Check anti-pattern guards and generate warnings
 
 If the user requests a standalone deterministic calculator, generate one from
 `formulas.md` in their preferred language. The generated script must:
@@ -183,7 +184,7 @@ Then provide:
 - Tracker-formatted output (if requested)
 
 Ask which tracker and mode:
-- **Tracker**: Linear, JIRA, ClickUp, GitHub Issues, Monday, GitLab, or generic
+- **Tracker**: Linear, JIRA, ClickUp, GitHub Issues, Monday, GitLab, Asana, Azure DevOps, Zenhub, Shortcut, or generic
 - **Mode**: Native fields or embedded in description (default: embedded)
 
 For batch output, produce a summary table first, then rollup, then warnings,
diff --git a/references/formulas.md b/references/formulas.md
index 4d78d5c..9b02e3f 100644
--- a/references/formulas.md
+++ b/references/formulas.md
@@ -21,6 +21,8 @@ standalone calculator scripts in any language.
 | confidence_level | 50/80/90 | 80 | — |
 | definition_phase | concept/requirements/design/ready | ready | — |
 | org_size | solo-startup/growth/enterprise | solo-startup | — |
+| model_tier | economy/standard/premium or specific model | standard | — |
+| show_cost | boolean | false | — |
 
 ## Lookup Tables
 
@@ -164,6 +166,52 @@ enterprise:    1.3    (formal review, compliance, multi-team coordination)
 
 Applied to human time only (planning, review, fix), not agent time.
 
+### Tokens Per Round (thousands, by complexity × maturity)
+
+```
+                    S       M       L       XL
+exploratory:        8k      15k     25k     40k
+partial:            6k      12k     20k     35k
+mostly-automated:   5k      10k     18k     30k
+```
+
+### Output Token Ratio (by complexity)
+
+```
+S: 0.25    M: 0.28    L: 0.30    XL: 0.35
+```
+
+### Model Pricing (per 1M tokens, USD — last verified March 2026)
+
+Representative models so users can pick the closest match:
+
+```
+Model                    Input       Output      Tier
+─────────────────────────────────────────────────────
+GPT-4o Mini              $0.15       $0.60       economy
+Gemini 2.5 Flash         $0.30       $2.50       economy
+Claude Haiku 4.5         $1.00       $5.00       economy
+Gemini 2.5 Pro           $1.25       $10.00      standard
+GPT-4o                   $2.50       $10.00      standard
+Claude Sonnet 4.6        $3.00       $15.00      standard
+Claude Opus 4.6          $5.00       $25.00      premium
+GPT-5                    $1.25       $10.00      premium (capability, not price)
+```
+
+For the tier-based formula, use these representative rates:
+
+```
+                Input       Output
+economy:        $0.50       $2.50       (Haiku, GPT-4o-mini, Gemini Flash)
+standard:       $2.50       $12.00      (Sonnet, GPT-4o, Gemini 2.5 Pro)
+premium:        $5.00       $25.00      (Opus, GPT-5)
+```
+
+Note: "Premium" reflects capability tier (best available models), not
+necessarily highest price. GPT-5 is premium-capability at standard pricing.
+Pricing changes frequently — check provider pages before committing to
+cost-based decisions.
+
 ## Formulas
 
 ### Step 1: Agent Rounds
@@ -300,6 +348,29 @@ communication_overhead = 0.15 × (num_humans - 1)
 adjusted_human_time = adjusted_human_time × (1 + communication_overhead)
 ```
 
+### Step 15: Token & Cost Estimation
+
+```
+tokens_per_round = tokens_per_round_table[complexity][maturity]
+output_ratio = output_token_ratio[complexity]
+
+total_tokens_min = adjusted_rounds_min × tokens_per_round × num_agents
+total_tokens_max = adjusted_rounds_max × tokens_per_round × num_agents
+
+input_tokens_min = total_tokens_min × (1 - output_ratio)
+input_tokens_max = total_tokens_max × (1 - output_ratio)
+output_tokens_min = total_tokens_min × output_ratio
+output_tokens_max = total_tokens_max × output_ratio
+
+token_midpoint = (total_tokens_min + total_tokens_max) / 2
+pert_expected_tokens = (total_tokens_min + 4 × token_midpoint + total_tokens_max) / 6
+
+# Cost (only if show_cost == true)
+cost_min = (input_tokens_min × input_price + output_tokens_min × output_price) / 1_000_000
+cost_max = (input_tokens_max × input_price + output_tokens_max × output_price) / 1_000_000
+pert_expected_cost = (cost_min + 4 × (cost_min + cost_max) / 2 + cost_max) / 6
+```
+
 ## Anti-Pattern Guards
 
 After computing estimates, check for these patterns and append warnings:
@@ -427,7 +498,16 @@ Every estimation must produce these canonical fields:
     "humans": int,
     "agents": int
   },
-  "story_points":       int | null
+  "story_points":       int | null,
+  "token_estimate": {
+    "total_tokens":           { "min": int, "max": int },
+    "input_tokens":           { "min": int, "max": int },
+    "output_tokens":          { "min": int, "max": int },
+    "pert_expected_tokens":   int,
+    "model_tier":             "economy" | "standard" | "premium",
+    "cost_usd":               { "min": float, "max": float } | null,
+    "pert_expected_cost_usd": float | null
+  }
 }
 ```
 
@@ -444,7 +524,10 @@ For batch, wrap in:
     "critical_path":        string[],
     "task_count":           int,
     "size_distribution":    { "S": int, "M": int, "L": int, "XL": int },
-    "warnings":             string[]
+    "warnings":             string[],
+    "total_tokens":         int,
+    "pert_expected_tokens": int,
+    "total_cost_usd":       float | null
   }
 }
 ```
diff --git a/references/output-schema.md b/references/output-schema.md
index 3a94b08..f1af76b 100644
--- a/references/output-schema.md
+++ b/references/output-schema.md
@@ -40,7 +40,7 @@ Output format adapts to the detected cooperation mode:
 
 Single task:
 ```
-Expected: ~4 hrs | Committed (80%): ~5.5 hrs | 10-26 agent rounds + 3 hrs human | Risk: medium | Size: M
+Expected: ~4 hrs | Committed (80%): ~5.5 hrs | 10-26 agent rounds (~180k tokens) + 3 hrs human | Risk: medium | Size: M
 ```
 
 Batch:
@@ -90,6 +90,7 @@ Ask the user: "Native fields or embedded in description? (default: embedded)"
 | committed_hours | Custom field | "Committed Estimate (hrs)" |
 | confidence_level | Custom field | "Confidence %" |
 | priority | Priority | 1-4 mapping |
+| token_estimate | Custom field | "Est. Tokens" |
 
 **Embedded:**
 ```markdown
@@ -107,10 +108,17 @@ Ask the user: "Native fields or embedded in description? (default: embedded)"
 | **Expected (PERT)** | **~4 hrs** |
 | **Committed (80%)** | **~5.5 hrs** |
 | Confidence Band (68%) | 3.4-5.0 hrs |
+| Token Estimate | ~180k tokens |
+| Model Tier | standard |
+| Est. Cost | ~$1.20 |
 | Risk | medium |
 | Team | 1 human, 1 agent |
 ```
 
+Token Estimate and Model Tier always appear in the breakdown table.
+Est. Cost only appears if `show_cost == true`.
+Cost does NOT appear in the one-line summary (too noisy).
+
 ### Canonical → JIRA
 
 **Native:**
@@ -126,6 +134,7 @@ Ask the user: "Native fields or embedded in description? (default: embedded)"
 | human_review_minutes | Custom field | number type |
 | pert_expected_hours | Custom field | "Expected Estimate (hrs)" |
 | labels | Labels | array |
+| token_estimate | Custom field | "Est. Tokens" (number) |
 
 **Embedded:** Same markdown table in Description field.
 
@@ -143,6 +152,7 @@ Ask the user: "Native fields or embedded in description? (default: embedded)"
 | agent_rounds | Custom field | number |
 | human_review_minutes | Custom field | number |
 | priority | Priority | 1-4 |
+| token_estimate | Custom field | "Est. Tokens" (number) |
 
 **Embedded:** Same markdown table in Description field.
 
@@ -160,6 +170,7 @@ Ask the user: "Native fields or embedded in description? (default: embedded)"
 | agent_rounds | Body section | no custom fields |
 | human_review_minutes | Body section | no custom fields |
 | labels | Labels | — |
+| token_estimate | Body section | no custom fields |
 
 **Embedded:** Markdown table in issue Body. This is the recommended mode
 for GitHub Issues since it has no custom field support.
@@ -180,6 +191,7 @@ for GitHub Issues since it has no custom field support.
 | human_review_minutes | Numbers column | "Review (min)" |
 | priority | Priority column | — |
 | labels | Tags column | — |
+| token_estimate | Numbers column | "Est. Tokens" |
 
 **Embedded:** Markdown in Updates or Long Text column.
 
@@ -198,22 +210,101 @@ for GitHub Issues since it has no custom field support.
 | agent_rounds | Description section | no custom fields in free tier |
 | human_review_minutes | Description section | — |
 | labels | Labels | scoped labels supported |
+| token_estimate | Description section | no custom fields in free tier |
 
 **Embedded:** Markdown table in Description. Use `/estimate` quick action
 for time tracking integration.
 
+### Canonical → Asana
+
+**Native:**
+| Canonical Field | Asana Field | Notes |
+|----------------|------------|-------|
+| title | Task Name | — |
+| complexity | Custom field (Dropdown) | "Size" — S/M/L/XL |
+| committed_hours | Custom field (Number) | "Committed Estimate (hrs)" |
+| pert_expected_hours | Custom field (Number) | "Expected (hrs)" |
+| risk_level | Custom field (Dropdown) | "Risk" — low/medium/high |
+| risk_notes | Description | appended |
+| subtasks | Subtasks | native |
+| agent_rounds | Custom field (Number) | "Agent Rounds" |
+| human_review_minutes | Custom field (Number) | "Review (min)" |
+| token_estimate | Custom field (Number) | "Est. Tokens" |
+
+**Embedded:** Markdown in Description. Quirks: custom fields are
+project-scoped; time tracking is paid.
+
+### Canonical → Azure DevOps
+
+**Native:**
+| Canonical Field | ADO Field | Notes |
+|----------------|----------|-------|
+| title | Title | — |
+| complexity | Tags | `Size:M` |
+| committed_hours | Original Estimate | hours (native) |
+| pert_expected_hours | Custom field (Decimal) | "Expected Estimate (hrs)" |
+| risk_level | Tags | `Risk:medium` |
+| risk_notes | Description | HTML — use `<table>` |
+| subtasks | Child work items | parent-child link |
+| agent_rounds | Custom field (Integer) | "Agent Rounds" |
+| story_points | Story Points | native on User Story |
+| token_estimate | Custom field (Integer) | "Est. Tokens" |
+
+**Embedded:** HTML table in Description (ADO uses HTML, not markdown).
+Quirks: custom fields via Process customization; work item types matter
+(User Story vs Task).
+
+### Canonical → Zenhub
+
+**Native:**
+| Canonical Field | Zenhub Field | Notes |
+|----------------|-------------|-------|
+| title | Issue Title | GitHub Issue title |
+| complexity | Label | `size/M` (GitHub label) |
+| committed_hours | Estimate | Zenhub story points field |
+| pert_expected_hours | Body section | no custom fields |
+| risk_level | Label | `risk/medium` (GitHub label) |
+| risk_notes | Body | — |
+| subtasks | Task list | `- [ ]` in body, or child issues |
+| agent_rounds | Body section | no custom fields |
+| story_points | Estimate | native Zenhub field (points) |
+| token_estimate | Body section | no custom fields |
+
+**Embedded:** Markdown in GitHub Issue body (recommended). Quirks: Zenhub
+layers on top of GitHub Issues — uses GitHub labels + body for most data;
+Estimate field is points-only; Epics are cross-repo issue collections.
+
+### Canonical → Shortcut
+
+**Native:**
+| Canonical Field | Shortcut Field | Notes |
+|----------------|---------------|-------|
+| title | Story Name | — |
+| complexity | Label | `size:M` |
+| committed_hours | Custom field (Number) | "Committed (hrs)" |
+| pert_expected_hours | Custom field (Number) | "Expected (hrs)" |
+| risk_level | Label | `risk:medium` |
+| risk_notes | Description | markdown supported |
+| subtasks | Tasks (within Story) | checklist-style |
+| agent_rounds | Custom field (Number) | "Agent Rounds" |
+| story_points | Estimate | native field (points) |
+| token_estimate | Custom field (Number) | "Est. Tokens" |
+
+**Embedded:** Markdown in Description. Quirks: custom fields on Team plan+;
+native Estimate is points not hours; Stories have Tasks (checklist items).
+
 ## Batch Output Format
 
 ### Summary Table (Always First)
 
 ```
-| # | Task | Size | Type | Rounds | Agent | Human | Expected | Committed (80%) | Risk | Deps |
-|---|------|------|------|--------|-------|-------|----------|-----------------|------|------|
-| 1 | Auth service | M | coding | 10-26 | 20-78m | 2-3h | ~4h | ~5.5h | med | — |
-| 2 | Payment | L | coding | 26-65 | 52-195m | 4-8h | ~8h | ~11h | high | #1 |
-| 3 | DB migration | L | data-mig | 26-65 | 52-195m | 4-8h | ~16h | ~22h | high | — |
-|---|------|------|------|--------|-------|-------|----------|-----------------|------|------|
-| | **Totals** | | | | | | **~28h** | **~38.5h** | | |
+| # | Task | Size | Type | Rounds | Agent | Human | Tokens | Expected | Committed (80%) | Risk | Deps |
+|---|------|------|------|--------|-------|-------|--------|----------|-----------------|------|------|
+| 1 | Auth service | M | coding | 10-26 | 20-78m | 2-3h | ~180k | ~4h | ~5.5h | med | — |
+| 2 | Payment | L | coding | 26-65 | 52-195m | 4-8h | ~520k | ~8h | ~11h | high | #1 |
+| 3 | DB migration | L | data-mig | 26-65 | 52-195m | 4-8h | ~520k | ~16h | ~22h | high | — |
+|---|------|------|------|--------|-------|-------|--------|----------|-----------------|------|------|
+| | **Totals** | | | | | | **~1.2M** | **~28h** | **~38.5h** | | |
 ```
 
 ### Rollup Block
diff --git a/references/questionnaire.md b/references/questionnaire.md
index a743629..de2ca6e 100644
--- a/references/questionnaire.md
+++ b/references/questionnaire.md
@@ -119,6 +119,14 @@ All quick-path questions, plus:
     - Enterprise (50+ people) — formal review, compliance, multi-team coordination (1.3x)
 13. **Dependencies**: "Is this blocked by or blocking other tasks?"
     → dependency graph for sequencing
+14. **Model & cost**: "Which model tier are you using, and do you want cost estimates?"
+    → `model_tier`, `show_cost`
+    - Economy (Haiku, GPT-4o Mini, Gemini Flash) — cheapest
+    - Standard (Sonnet, GPT-4o, Gemini 2.5 Pro) — default
+    - Premium (Opus, GPT-5) — most capable
+    - Or name a specific model from the pricing table
+    - Show cost: yes/no (default: no)
+    - If user names a specific model, map to its tier for the formula
 
 ## Detailed Path — Batch
 
@@ -159,6 +167,7 @@ User can mark overrides or approve the whole table at once.
 | Definition phase | spread_multiplier | ready (1.0x) | asked |
 | Organization context | org_overhead | solo-startup (1.0x) | asked |
 | Dependencies | sequencing | none | asked |
+| Model & cost | model_tier, show_cost | standard, false | asked |
 
 ## Input Formats Accepted (Batch)
 
diff --git a/tests/test_formulas.py b/tests/test_formulas.py
index aa86d62..89f118f 100644
--- a/tests/test_formulas.py
+++ b/tests/test_formulas.py
@@ -193,6 +193,77 @@ def estimate(
     }
 
 
+# ── Token Estimation ──────────────────────────────────────────
+
+TOKENS_PER_ROUND = {
+    "exploratory":      {"S": 8000,  "M": 15000, "L": 25000, "XL": 40000},
+    "partial":          {"S": 6000,  "M": 12000, "L": 20000, "XL": 35000},
+    "mostly-automated": {"S": 5000,  "M": 10000, "L": 18000, "XL": 30000},
+}
+
+OUTPUT_TOKEN_RATIO = {
+    "S": 0.25,
+    "M": 0.28,
+    "L": 0.30,
+    "XL": 0.35,
+}
+
+TIER_PRICING = {
+    "economy":  {"input": 0.50,  "output": 2.50},
+    "standard": {"input": 2.50,  "output": 12.00},
+    "premium":  {"input": 5.00,  "output": 25.00},
+}
+
+
+def estimate_tokens(
+    complexity,
+    maturity="partial",
+    num_agents=1,
+    model_tier="standard",
+    show_cost=False,
+    risk_coefficient=1.3,
+    domain_familiarity=1.0,
+):
+    """Step 15: Token & cost estimation."""
+    base_min, base_max = BASE_ROUNDS[complexity]
+    rounds_min = round(base_min * risk_coefficient * domain_familiarity)
+    rounds_max = round(base_max * risk_coefficient * domain_familiarity)
+
+    tpr = TOKENS_PER_ROUND[maturity][complexity]
+    output_ratio = OUTPUT_TOKEN_RATIO[complexity]
+
+    total_tokens_min = rounds_min * tpr * num_agents
+    total_tokens_max = rounds_max * tpr * num_agents
+
+    input_tokens_min = total_tokens_min * (1 - output_ratio)
+    input_tokens_max = total_tokens_max * (1 - output_ratio)
+    output_tokens_min = total_tokens_min * output_ratio
+    output_tokens_max = total_tokens_max * output_ratio
+
+    token_midpoint = (total_tokens_min + total_tokens_max) / 2
+    pert_expected_tokens = (total_tokens_min + 4 * token_midpoint + total_tokens_max) / 6
+
+    result = {
+        "total_tokens": {"min": total_tokens_min, "max": total_tokens_max},
+        "input_tokens": {"min": input_tokens_min, "max": input_tokens_max},
+        "output_tokens": {"min": output_tokens_min, "max": output_tokens_max},
+        "pert_expected_tokens": pert_expected_tokens,
+        "model_tier": model_tier,
+        "cost_usd": None,
+        "pert_expected_cost_usd": None,
+    }
+
+    if show_cost:
+        pricing = TIER_PRICING[model_tier]
+        cost_min = (input_tokens_min * pricing["input"] + output_tokens_min * pricing["output"]) / 1_000_000
+        cost_max = (input_tokens_max * pricing["input"] + output_tokens_max * pricing["output"]) / 1_000_000
+        pert_expected_cost = (cost_min + 4 * (cost_min + cost_max) / 2 + cost_max) / 6
+        result["cost_usd"] = {"min": cost_min, "max": cost_max}
+        result["pert_expected_cost_usd"] = pert_expected_cost
+
+    return result
+
+
 # ── Regression Tests ───────────────────────────────────────────
 
 class TestCase1TrivialS(unittest.TestCase):
@@ -390,5 +461,93 @@ def test_committed_ratio(self):
         self.assertAlmostEqual(ratio, 1.8, places=2)
 
 
+class TestCase7TokenMath(unittest.TestCase):
+    """Case 7: Token estimation math for M coding task, partial maturity."""
+
+    def setUp(self):
+        self.t = estimate_tokens(
+            complexity="M", maturity="partial", num_agents=1,
+            model_tier="standard", show_cost=True,
+        )
+
+    def test_total_tokens_range(self):
+        # M partial: 12k tokens/round, rounds 10-26
+        # min = 10 * 12000 = 120000, max = 26 * 12000 = 312000
+        self.assertEqual(self.t["total_tokens"]["min"], 120000)
+        self.assertEqual(self.t["total_tokens"]["max"], 312000)
+
+    def test_output_ratio(self):
+        # M output ratio = 0.28
+        self.assertAlmostEqual(
+            self.t["output_tokens"]["min"] / self.t["total_tokens"]["min"], 0.28
+        )
+
+    def test_input_output_sum(self):
+        self.assertEqual(
+            self.t["input_tokens"]["min"] + self.t["output_tokens"]["min"],
+            self.t["total_tokens"]["min"],
+        )
+        self.assertEqual(
+            self.t["input_tokens"]["max"] + self.t["output_tokens"]["max"],
+            self.t["total_tokens"]["max"],
+        )
+
+    def test_pert_between_min_max(self):
+        self.assertGreaterEqual(
+            self.t["pert_expected_tokens"], self.t["total_tokens"]["min"]
+        )
+        self.assertLessEqual(
+            self.t["pert_expected_tokens"], self.t["total_tokens"]["max"]
+        )
+
+    def test_cost_present(self):
+        self.assertIsNotNone(self.t["cost_usd"])
+        self.assertIsNotNone(self.t["pert_expected_cost_usd"])
+
+    def test_cost_min_less_than_max(self):
+        self.assertLess(self.t["cost_usd"]["min"], self.t["cost_usd"]["max"])
+
+    def test_cost_math(self):
+        # Standard tier: input=$2.50/M, output=$12.00/M
+        # min: input=120000*0.72=86400, output=120000*0.28=33600
+        # cost_min = (86400*2.50 + 33600*12.00) / 1_000_000
+        expected_cost_min = (86400 * 2.50 + 33600 * 12.00) / 1_000_000
+        self.assertAlmostEqual(self.t["cost_usd"]["min"], expected_cost_min, places=4)
+
+
+class TestCase8TokenScaling(unittest.TestCase):
+    """Case 8: Token scaling — XL > L > M > S, multi-agent multiplies."""
+
+    def test_scaling_by_complexity(self):
+        sizes = ["S", "M", "L", "XL"]
+        tokens = []
+        for s in sizes:
+            t = estimate_tokens(complexity=s, maturity="partial")
+            tokens.append(t["total_tokens"]["min"])
+        for i in range(len(tokens) - 1):
+            self.assertLess(tokens[i], tokens[i + 1])
+
+    def test_multi_agent_multiplier(self):
+        t1 = estimate_tokens(complexity="M", maturity="partial", num_agents=1)
+        t3 = estimate_tokens(complexity="M", maturity="partial", num_agents=3)
+        self.assertEqual(
+            t3["total_tokens"]["min"], t1["total_tokens"]["min"] * 3
+        )
+
+    def test_no_cost_by_default(self):
+        t = estimate_tokens(complexity="M", maturity="partial", show_cost=False)
+        self.assertIsNone(t["cost_usd"])
+        self.assertIsNone(t["pert_expected_cost_usd"])
+
+    def test_premium_more_expensive_than_economy(self):
+        te = estimate_tokens(
+            complexity="M", maturity="partial", model_tier="economy", show_cost=True
+        )
+        tp = estimate_tokens(
+            complexity="M", maturity="partial", model_tier="premium", show_cost=True
+        )
+        self.assertLess(te["cost_usd"]["max"], tp["cost_usd"]["max"])
+
+
 if __name__ == "__main__":
     unittest.main()

From be889b272842a11aaff21d6107a2ce0bfc1f5a92 Mon Sep 17 00:00:00 2001
From: Enreign <taysic@gmail.com>
Date: Wed, 11 Mar 2026 20:39:33 +0100
Subject: [PATCH 2/2] Integrate token estimation into main pipeline and expand
 test coverage

Wire estimate_tokens() into estimate() as Step 15 so the core estimator
returns token_estimate in its output. Add TestCase9 for integration
verification, maturity variation test, and PERT cost formula test.
44 tests now pass (was 36).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/test_formulas.py | 81 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 80 insertions(+), 1 deletion(-)

diff --git a/tests/test_formulas.py b/tests/test_formulas.py
index 89f118f..875101f 100644
--- a/tests/test_formulas.py
+++ b/tests/test_formulas.py
@@ -89,8 +89,10 @@ def estimate(
     confidence_level=80,
     definition_phase="ready",
     org_size="solo-startup",
+    model_tier="standard",
+    show_cost=False,
 ):
-    """Run the full 14-step estimation pipeline. Returns dict with all fields."""
+    """Run the full 15-step estimation pipeline. Returns dict with all fields."""
 
     # Step 1: Agent Rounds
     base_min, base_max = BASE_ROUNDS[complexity]
@@ -167,6 +169,17 @@ def estimate(
         human_time_min = human_time_min / num_humans * (1 + communication_overhead)
         human_time_max = human_time_max / num_humans * (1 + communication_overhead)
 
+    # Step 15: Token & Cost Estimation
+    token_est = estimate_tokens(
+        complexity=complexity,
+        maturity=maturity,
+        num_agents=num_agents,
+        model_tier=model_tier,
+        show_cost=show_cost,
+        risk_coefficient=risk_coefficient,
+        domain_familiarity=domain_familiarity,
+    )
+
     return {
         "complexity": complexity,
         "task_type": task_type,
@@ -190,6 +203,7 @@ def estimate(
         "committed_hours": {"min": committed_min / 60, "max": committed_max / 60},
         "spread_multiplier": spread,
         "definition_phase": definition_phase,
+        "token_estimate": token_est,
     }
 
 
@@ -548,6 +562,71 @@ def test_premium_more_expensive_than_economy(self):
         )
         self.assertLess(te["cost_usd"]["max"], tp["cost_usd"]["max"])
 
+    def test_maturity_variation(self):
+        """Exploratory should produce more tokens than mostly-automated."""
+        t_exp = estimate_tokens(complexity="M", maturity="exploratory")
+        t_auto = estimate_tokens(complexity="M", maturity="mostly-automated")
+        self.assertGreater(
+            t_exp["total_tokens"]["min"], t_auto["total_tokens"]["min"]
+        )
+        self.assertGreater(
+            t_exp["total_tokens"]["max"], t_auto["total_tokens"]["max"]
+        )
+
+    def test_pert_expected_cost_math(self):
+        """PERT expected cost = (cost_min + 4*midpoint + cost_max) / 6."""
+        t = estimate_tokens(
+            complexity="M", maturity="partial", model_tier="standard", show_cost=True
+        )
+        cost_min = t["cost_usd"]["min"]
+        cost_max = t["cost_usd"]["max"]
+        expected_pert_cost = (cost_min + 4 * (cost_min + cost_max) / 2 + cost_max) / 6
+        self.assertAlmostEqual(
+            t["pert_expected_cost_usd"], expected_pert_cost, places=6
+        )
+
+
+class TestCase9TokenIntegration(unittest.TestCase):
+    """Case 9: Token estimate is integrated into the main estimate() pipeline."""
+
+    def setUp(self):
+        self.r = estimate(
+            complexity="M", task_type="coding", maturity="partial",
+            model_tier="standard", show_cost=True,
+        )
+
+    def test_token_estimate_present(self):
+        self.assertIn("token_estimate", self.r)
+
+    def test_token_estimate_structure(self):
+        te = self.r["token_estimate"]
+        self.assertIn("total_tokens", te)
+        self.assertIn("input_tokens", te)
+        self.assertIn("output_tokens", te)
+        self.assertIn("pert_expected_tokens", te)
+        self.assertIn("model_tier", te)
+        self.assertIn("cost_usd", te)
+        self.assertIn("pert_expected_cost_usd", te)
+
+    def test_token_estimate_uses_same_rounds(self):
+        """Token estimate should use the same adjusted rounds as the main estimate."""
+        te = self.r["token_estimate"]
+        rounds_min = self.r["agent_rounds"]["min"]
+        rounds_max = self.r["agent_rounds"]["max"]
+        tpr = TOKENS_PER_ROUND["partial"]["M"]
+        self.assertEqual(te["total_tokens"]["min"], rounds_min * tpr)
+        self.assertEqual(te["total_tokens"]["max"], rounds_max * tpr)
+
+    def test_model_tier_passthrough(self):
+        self.assertEqual(self.r["token_estimate"]["model_tier"], "standard")
+
+    def test_cost_present_when_show_cost(self):
+        self.assertIsNotNone(self.r["token_estimate"]["cost_usd"])
+
+    def test_cost_absent_when_not_show_cost(self):
+        r = estimate(complexity="M", show_cost=False)
+        self.assertIsNone(r["token_estimate"]["cost_usd"])
+
 
 if __name__ == "__main__":
     unittest.main()