AnthusAI · endymion · Feb 14, 2026 · Feb 14, 2026
diff --git a/_quarto.yml b/_quarto.yml
@@ -31,6 +31,7 @@ book:
         - chapters/06-your-first-agent.qmd
         - chapters/05-procedures-and-outputs.qmd
         - chapters/07-tools.qmd
+        - chapters/07a-models.qmd
         - chapters/08-state-management.qmd
         - chapters/09-the-agent-loop.qmd
         - chapters/10-human-in-the-loop.qmd

diff --git a/chapters/02-transparent-durability.qmd b/chapters/02-transparent-durability.qmd
@@ -45,7 +45,7 @@ Tactus automatically checkpoints these operations:
 | `Human.approve()` | The approval request and human's response |
 | `Human.input()` | The input request and human's response |
 | `Procedure.run()` | Sub-procedure inputs, outputs, and state |
-| `Model.predict()` | ML model inputs and outputs |
+| `Model("sentiment")({text = "..."})` | Model inputs and outputs |
 
 State changes (`state.*` assignment, `State.increment`) and log entries are also tracked, ensuring consistent replay.
 

diff --git a/chapters/07a-models.qmd b/chapters/07a-models.qmd
@@ -0,0 +1,82 @@
+# Predictable Inference with Models
+
+Agents are great when you want a conversation: multi-turn reasoning, tool use, and flexible behavior.
+
+But sometimes you want something else:
+
+- A single, schema-defined input
+- A single, schema-defined output
+- No tool calls
+- A deterministic, testable control-flow story in *your* code
+
+That is what the **Model primitive** is for.
+
+## Model vs Agent (Quick Rule of Thumb)
+
+Use an **Agent** when you want a conversational loop (LLM turns, tools, planning).
+
+Use a **Model** when you want *inference* (classification, extraction, scoring) that plugs into your procedure like a function call.
+
+In practice, a great pattern is:
+
+1) Use a Model to make a fast, predictable decision (or score).
+2) Use an Agent only for the harder cases (low confidence, missing info, complex reasoning).
+
+## A Trainable (and Mockable) Model Example
+
+This example is deliberately simple: a sentiment classifier (IMDB) that drives a small piece of business logic:
+
+- confident positive => `decision = "yes"`
+- confident negative => `decision = "no"`
+- low confidence => `decision = "review"`
+
+The important part is the *testing story*:
+
+- In CI (and when learning), we run in **mock** mode for deterministic behavior.
+- When you want to see the real thing, you **train** the model and run against the **registry**.
+
+```lua {file="code/chapter-07a/10-model-naive-bayes-imdb.tac" show-path="true"}
+```
+
+## Run It (Mock Mode)
+
+Mock mode never calls external services. It uses the `Mocks { ... }` block in the same file.
+
+```bash
+tactus test code/chapter-07a/10-model-naive-bayes-imdb.tac --mock
+```
+
+You should see all three scenarios pass.
+
+## Train It (Real Model)
+
+Training requires extra dependencies (datasets + scikit-learn). Install:
+
+```bash
+pip install "tactus[ml]"
+```
+
+Then train the model declared in the file:
+
+```bash
+tactus train code/chapter-07a/10-model-naive-bayes-imdb.tac --model imdb_nb
+```
+
+Training writes a versioned artifact to the registry under the model's `name` (here: `imdb_nb`). Runtime reads from the registry when you call `Model("imdb_nb")`.
+
+After training, run the real (non-mocked) test:
+
+```bash
+tactus test code/chapter-07a/10-model-naive-bayes-imdb.tac
+```
+
+## Evaluate It (Optional)
+
+If you want metrics (accuracy/precision/recall/F1) on the test split declared in `Model.training.data`:
+
+```bash
+tactus models evaluate code/chapter-07a/10-model-naive-bayes-imdb.tac --model imdb_nb
+```
+
+This does not re-train; it evaluates a registry-backed model version against the declared test data.
+
diff --git a/code/chapter-07a/10-model-naive-bayes-imdb.tac b/code/chapter-07a/10-model-naive-bayes-imdb.tac
@@ -0,0 +1,122 @@
+-- Example: Train + run a registry-backed Naive Bayes sentiment classifier (IMDB).
+--
+-- Train a real model (requires tactus[ml]):
+--   tactus train code/chapter-07a/10-model-naive-bayes-imdb.tac --model imdb_nb
+--
+-- Test your procedure logic only (deterministic, CI-safe):
+--   tactus test code/chapter-07a/10-model-naive-bayes-imdb.tac --mock
+--
+-- After training, you can run the "real" test (loads from the registry):
+--   tactus test code/chapter-07a/10-model-naive-bayes-imdb.tac
+
+Model "imdb_nb" {
+  type = "registry",
+  name = "imdb_nb",
+  version = "latest",
+  input = { text = "string" },
+  output = { label = "string", confidence = "float" },
+
+  -- Training config lives alongside runtime config.
+  -- The runtime reads from the registry; training writes to the registry.
+  training = {
+    data = {
+      source = "hf",
+      name = "imdb",
+      train = "train",
+      test = "test",
+      shuffle = { train = true, test = true },
+      limit = { train = 5000, test = 5000 },
+      seed = 42,
+      text_field = "text",
+      label_field = "label"
+    },
+    candidates = {
+      {
+        name = "nb-tfidf",
+        trainer = "naive_bayes",
+        hyperparameters = {
+          alpha = 1.0,
+          max_features = 50000,
+          ngram_min = 1,
+          ngram_max = 2
+        }
+      }
+    }
+  }
+}
+
+Procedure {
+  input = {
+    text = field.string{required = true}
+  },
+  output = {
+    label = field.string{required = true},
+    confidence = field.number{required = false},
+    decision = field.string{required = true}
+  },
+  function(input)
+    -- In runtime, this fetches a trained artifact from the registry.
+    local classifier = Model("imdb_nb")
+    local result = classifier({text = input.text})
+    local output = result.output or result
+
+    -- Example "business logic" driven by the model's prediction.
+    -- High-confidence positives => yes
+    -- High-confidence negatives => no
+    -- Low-confidence anything => review
+    local decision = "review"
+    if output.confidence ~= nil and output.confidence >= 0.7 then
+      if output.label == "positive" then
+        decision = "yes"
+      else
+        decision = "no"
+      end
+    end
+
+    return {
+      label = output.label,
+      confidence = output.confidence,
+      decision = decision
+    }
+  end
+}
+
+-- Mocked model responses for deterministic specs.
+-- Run mocked: tactus test code/chapter-07a/10-model-naive-bayes-imdb.tac --mock
+Mocks {
+  imdb_nb = {
+    conditional = {
+      {when = {text = "A wonderful movie with great acting."}, returns = {label = "positive", confidence = 0.92}},
+      {when = {text = "This was a terrible movie with bad acting."}, returns = {label = "negative", confidence = 0.87}},
+      {when = {text = "A confusing movie with uneven pacing."}, returns = {label = "positive", confidence = 0.42}}
+    }
+  }
+}
+
+Specification([[
+Feature: Model primitive (mocked + trainable)
+  Scenario: Positive review routes to yes
+    Given the procedure has started
+    And the input text is "A wonderful movie with great acting."
+    When the procedure runs
+    Then the output decision should be "yes"
+    And the output label should be "positive"
+    And the procedure should complete successfully
+
+  Scenario: Negative review routes to no
+    Given the procedure has started
+    And the input text is "This was a terrible movie with bad acting."
+    When the procedure runs
+    Then the output decision should be "no"
+    And the output label should be "negative"
+    And the procedure should complete successfully
+
+  Scenario: Low confidence routes to review
+    Given the procedure has started
+    And the input text is "A confusing movie with uneven pacing."
+    When the procedure runs
+    Then the output decision should be "review"
+    And the output confidence should exist
+    And the procedure should complete successfully
+]])
+