diff --git a/benchmarks/kaggle/pack_ternary_bonsai/kernel-metadata.json b/benchmarks/kaggle/pack_ternary_bonsai/kernel-metadata.json
new file mode 100644
index 0000000..d341a6c
--- /dev/null
+++ b/benchmarks/kaggle/pack_ternary_bonsai/kernel-metadata.json
@@ -0,0 +1,13 @@
+{
+  "id": "superkaiii/pack-ternary-bonsai-4b-tq1_0",
+  "title": "Pack Ternary-Bonsai-4B TQ1_0",
+  "code_file": "pack_ternary_bonsai.py",
+  "language": "python",
+  "kernel_type": "script",
+  "is_private": true,
+  "enable_gpu": false,
+  "enable_internet": true,
+  "competition_sources": [],
+  "dataset_sources": ["superkaiii/hf-token-private"],
+  "kernel_sources": []
+}
diff --git a/benchmarks/kaggle/pack_ternary_bonsai/pack_ternary_bonsai.py b/benchmarks/kaggle/pack_ternary_bonsai/pack_ternary_bonsai.py
new file mode 100644
index 0000000..ad48c9e
--- /dev/null
+++ b/benchmarks/kaggle/pack_ternary_bonsai/pack_ternary_bonsai.py
@@ -0,0 +1,197 @@
+"""Kaggle kernel: re-quantize Ternary-Bonsai-4B from unpacked FP16 to TQ1_0 GGUF.
+
+Why here: the conversion needs ~17 GB peak disk + a llama.cpp build. Doing it on
+Kaggle keeps it off the dev machine and publishes the result to HF Hub so every
+GraphStore dev pulls the same artifact.
+
+Inputs (Kaggle kernel config):
+  - dataset_sources: ["superkaiii/hf-token-private"]  (provides HF_TOKEN file)
+  - enable_internet: true
+  - enable_gpu: false                                   (conversion is CPU-bound)
+
+Flow:
+  1. Read HF write token from /kaggle/input/hf-token-private/HF_TOKEN
+  2. Download prism-ml/Ternary-Bonsai-4B-unpacked (FP16 safetensors, ~8 GB)
+  3. Clone + build llama.cpp llama-quantize
+  4. convert_hf_to_gguf.py  ->  F16 GGUF (~8 GB)
+  5. llama-quantize TQ1_0   ->  ~1 GB GGUF, the pack step
+  6. Upload result to superkaiii/Ternary-Bonsai-4B-TQ1_0-GGUF on HF Hub
+  7. Delete intermediates so peak disk stays under 20 GB Kaggle quota
+
+Output (Kaggle kernel):
+  /kaggle/working/Ternary-Bonsai-4B-TQ1_0.gguf      (also on HF Hub after upload)
+  /kaggle/working/pack_report.json                  (sizes + checksums)
+"""
+from __future__ import annotations
+
+import hashlib
+import json
+import os
+import shutil
+import subprocess
+from pathlib import Path
+
+
+HF_REPO = "superkaiii/Ternary-Bonsai-4B-TQ1_0-GGUF"
+SOURCE_REPO = "prism-ml/Ternary-Bonsai-4B-unpacked"
+TARGET_QUANT = "TQ1_0"
+OUTPUT_NAME = "Ternary-Bonsai-4B-TQ1_0.gguf"
+
+SRC_DIR = Path("/kaggle/tmp/bonsai-src")
+LC_DIR = Path("/kaggle/tmp/llama.cpp")
+F16_GGUF = Path("/kaggle/tmp/bonsai-4b-f16.gguf")
+WORKING = Path("/kaggle/working")
+OUT_GGUF = WORKING / OUTPUT_NAME
+REPORT = WORKING / "pack_report.json"
+
+
+def run(cmd: list[str] | str, **kw) -> None:
+    """Shell command with live output. Raises on nonzero exit."""
+    shell = isinstance(cmd, str)
+    print(f"$ {cmd if shell else ' '.join(cmd)}", flush=True)
+    subprocess.run(cmd, check=True, shell=shell, **kw)
+
+
+def sha256(path: Path) -> str:
+    h = hashlib.sha256()
+    with path.open("rb") as f:
+        for chunk in iter(lambda: f.read(1 << 20), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def load_hf_token() -> str:
+    """Find HF token matching the kaggle_benchmark.py pattern."""
+    # Primary: same path the working kernel uses.
+    primary = Path("/kaggle/input/hf-token-private/hf_token.txt")
+    if primary.exists():
+        return primary.read_text().strip()
+    # Fallback: glob recursively in case Kaggle mounts at a nested path.
+    for name in ("hf_token.txt", "HF_TOKEN", "token"):
+        hits = list(Path("/kaggle/input").rglob(name))
+        if hits:
+            return hits[0].read_text().strip()
+    raise RuntimeError("HF token not found under /kaggle/input/")
+
+
+def main() -> None:
+    WORKING.mkdir(parents=True, exist_ok=True)
+    SRC_DIR.mkdir(parents=True, exist_ok=True)
+
+    hf_token = load_hf_token()
+    os.environ["HF_TOKEN"] = hf_token
+    os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token
+
+    print("[1/6] pip deps")
+    run("pip install -q 'huggingface_hub>=0.24' 'transformers>=4.44' 'safetensors>=0.4' 'torch>=2.3' 'sentencepiece' 'protobuf'")
+
+    print("[2/6] download source F16 safetensors (Python API, not CLI)")
+    from huggingface_hub import snapshot_download
+    snapshot_download(
+        repo_id=SOURCE_REPO,
+        local_dir=str(SRC_DIR),
+        token=hf_token,
+        allow_patterns=["*.safetensors", "*.json", "*.model", "tokenizer*"],
+    )
+
+    print("[3/6] clone + build llama.cpp llama-quantize target")
+    if not LC_DIR.exists():
+        run(["git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp", str(LC_DIR)])
+    run(f"cd {LC_DIR} && cmake -B build -DLLAMA_BUILD_SERVER=OFF -DLLAMA_BUILD_EXAMPLES=ON -DLLAMA_BUILD_TESTS=OFF && cmake --build build --target llama-quantize -j")
+
+    print("[4/6] convert safetensors -> F16 GGUF")
+    run([
+        "python", str(LC_DIR / "convert_hf_to_gguf.py"),
+        str(SRC_DIR),
+        "--outfile", str(F16_GGUF),
+        "--outtype", "f16",
+    ])
+
+    print("[5/6] quantize F16 -> TQ1_0 (the pack step)")
+    run([
+        str(LC_DIR / "build" / "bin" / "llama-quantize"),
+        str(F16_GGUF),
+        str(OUT_GGUF),
+        TARGET_QUANT,
+    ])
+
+    # Drop intermediates aggressively to stay under the Kaggle 20 GB working quota.
+    if F16_GGUF.exists():
+        F16_GGUF.unlink()
+    shutil.rmtree(SRC_DIR, ignore_errors=True)
+
+    print("[6/6] upload to HF Hub")
+    from huggingface_hub import HfApi
+
+    api = HfApi(token=hf_token)
+    api.create_repo(HF_REPO, repo_type="model", exist_ok=True, private=False)
+
+    readme = WORKING / "README.md"
+    readme.write_text(_readme())
+
+    api.upload_file(
+        path_or_fileobj=str(OUT_GGUF),
+        path_in_repo=OUTPUT_NAME,
+        repo_id=HF_REPO,
+        repo_type="model",
+    )
+    api.upload_file(
+        path_or_fileobj=str(readme),
+        path_in_repo="README.md",
+        repo_id=HF_REPO,
+        repo_type="model",
+    )
+
+    report = {
+        "source_repo": SOURCE_REPO,
+        "target_repo": HF_REPO,
+        "quant": TARGET_QUANT,
+        "output_bytes": OUT_GGUF.stat().st_size,
+        "output_sha256": sha256(OUT_GGUF),
+    }
+    REPORT.write_text(json.dumps(report, indent=2))
+    print(f"\nDONE\n  local:  {OUT_GGUF} ({OUT_GGUF.stat().st_size / 1e9:.2f} GB)")
+    print(f"  hub:    https://huggingface.co/{HF_REPO}")
+    print(f"  report: {REPORT}")
+
+
+def _readme() -> str:
+    return f"""---
+library_name: gguf
+base_model: {SOURCE_REPO}
+quantization_config:
+  method: {TARGET_QUANT}
+tags:
+  - gguf
+  - ternary
+  - bonsai
+  - llama.cpp
+---
+
+# Ternary-Bonsai-4B TQ1_0
+
+Repack of [{SOURCE_REPO}](https://huggingface.co/{SOURCE_REPO}) to llama.cpp's native ternary format ({TARGET_QUANT}).
+
+Ternary-Bonsai weights are trained as {{-1, 0, +1}}. {TARGET_QUANT} packs these losslessly at 1.6875 bits per weight with a single FP16 scale per group of 256 weights. Unlike Q2_K (which k-means clusters generic floats), {TARGET_QUANT} preserves the original trained weight values exactly.
+
+## Quickstart
+
+```python
+from llama_cpp import Llama
+m = Llama(model_path="{OUTPUT_NAME}", n_ctx=4096)
+```
+
+## Conversion pipeline
+
+```
+prism-ml/Ternary-Bonsai-4B-unpacked (F16 safetensors)
+ -> convert_hf_to_gguf.py --outtype f16
+ -> llama-quantize ... {TARGET_QUANT}
+```
+
+Built on Kaggle with ggerganov/llama.cpp master.
+"""
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/graphstore/bonsai_ingestor.py b/src/graphstore/bonsai_ingestor.py
new file mode 100644
index 0000000..f179ec8
--- /dev/null
+++ b/src/graphstore/bonsai_ingestor.py
@@ -0,0 +1,579 @@
+"""Natural-language to DSL ingestor backed by a local llama.cpp GGUF.
+
+Target model: Ternary-Bonsai 4B TQ1_0 (CPU + RAM only, offline).
+
+Download the model once before first use (models/ is gitignored):
+
+    mkdir -p models/Ternary-Bonsai-4B-TQ1_0 && curl -L -o \\
+      models/Ternary-Bonsai-4B-TQ1_0/Ternary-Bonsai-4B-TQ1_0.gguf \\
+      https://huggingface.co/superkaiii/Ternary-Bonsai-4B-TQ1_0-GGUF/resolve/main/Ternary-Bonsai-4B-TQ1_0.gguf
+
+Publication pipeline: benchmarks/kaggle/pack_ternary_bonsai/ converts
+prism-ml/Ternary-Bonsai-4B-unpacked (FP16) to TQ1_0 via a Kaggle kernel and
+publishes the result to superkaiii/Ternary-Bonsai-4B-TQ1_0-GGUF on HF.
+
+The public surface is `BonsaiIngestor`. Every call:
+  1. Loads the skill prompt once, fingerprints it, pins that fingerprint into
+     the system prompt so KV cache stays coherent across calls. When the skill
+     file on disk changes, the next call naturally forces a warm re-process
+     because the system-prompt prefix now differs.
+  2. Serializes access to the `Llama` instance with a lock - llama-cpp-python
+     is NOT thread-safe; concurrent calls corrupt the KV cache / segfault.
+  3. Checks the combined prompt + output budget against `n_ctx` and resets the
+     cache if the request would force a head-trim (which silently evicts the
+     skill prefix and causes quality collapse).
+  4. Treats empty / <think>-only outputs as ingestion errors, not as silent
+     no-ops (the behaviour-analysis audit called this out - silent failures
+     hid real bugs).
+  5. Dedupes UPSERT NODE by id before handing the DSL to the parser, so a
+     BatchRollback from a duplicated entity never takes out an otherwise
+     valid batch.
+  6. Emits one structured log line per ingest with counts for every
+     category: statements emitted, parsed, rejected, executed, duration.
+  7. Supports `dry_run=True` to generate the DSL and return it without
+     touching the GraphStore - used for testing, previewing, and building
+     training data.
+
+It also tracks cross-message belief state. After every successful ingest, the
+executed ASSERT / RETRACT lines are scraped into a running `fact_id -> FactState`
+dict. The next ingest injects the live (non-retracted) facts into the USER
+message (not the system prompt - see below), so the model sees prior fact ids
+and reuses them when updating the same concept. Without this, the model coins
+a new fact_id per message and the graph ends up with multiple beliefs for the
+same underlying concept.
+
+The known-facts block goes in the USER message on purpose: the system prompt
+stays byte-identical across calls, which keeps llama.cpp's prefix-match KV
+cache warm. If we appended facts to the system prompt, every call would be a
+cold one.
+
+Not handled here:
+  - Streaming. Generation is blocking. Callers can run it in a thread.
+  - Multi-user / multi-tenant. Use one BonsaiIngestor per user.
+  - Model swap. Create a new BonsaiIngestor; don't mutate this one's paths.
+"""
+from __future__ import annotations
+
+import hashlib
+import logging
+import re
+import threading
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+_log = logging.getLogger(__name__)
+
+
+# --------------------------------------------------------------------
+# Errors
+# --------------------------------------------------------------------
+
+class BonsaiError(Exception):
+    """Base class for ingestor errors."""
+
+
+class IngestEmpty(BonsaiError):
+    """LLM returned empty or <think>-only output. No DSL to execute."""
+
+
+class IngestOverflow(BonsaiError):
+    """Requested prompt + output would exceed n_ctx even after a cache reset."""
+
+
+# --------------------------------------------------------------------
+# Result
+# --------------------------------------------------------------------
+
+@dataclass
+class FactState:
+    """One live belief tracked across messages within this ingestor.
+
+    Updated by `_scrape_belief_updates` after every successful ingest and
+    rendered back into the next ingest's user message by
+    `_render_known_facts_block`, so the model reuses the same fact_id for the
+    same concept instead of coining a new one each call.
+    """
+
+    fact_id: str
+    kind: str = ""
+    value: str = ""
+    confidence: float = 1.0
+    source: str = ""
+    retracted: bool = False
+    retract_reason: str = ""
+
+
+@dataclass
+class IngestResult:
+    """Everything an ingest call produced, for inspection and tracing."""
+
+    statements: list[str] = field(default_factory=list)
+    executed: int = 0
+    rejected: list[tuple[str, str]] = field(default_factory=list)
+    entities_new: list[str] = field(default_factory=list)
+    beliefs_changed: list[tuple[str, str]] = field(default_factory=list)
+    duration_ms: int = 0
+    raw_output: str = ""
+    skill_fingerprint: str = ""
+    dry_run: bool = False
+
+
+# --------------------------------------------------------------------
+# Post-processing helpers
+# --------------------------------------------------------------------
+
+_THINK_RE = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
+_FENCE_RE = re.compile(r"^```[a-zA-Z]*\s*$|^```\s*$")
+_UPSERT_RE = re.compile(r'^\s*UPSERT\s+NODE\s+"([^"\\]+(?:\\.[^"\\]*)*)"', re.IGNORECASE)
+_ASSERT_LINE_RE = re.compile(
+    r'^\s*ASSERT\s+"([^"\\]+(?:\\.[^"\\]*)*)"\s+(.*)$',
+    re.IGNORECASE,
+)
+_ASSERT_RE = re.compile(r'^\s*ASSERT\s+"([^"\\]+(?:\\.[^"\\]*)*)"', re.IGNORECASE)
+_RETRACT_RE = re.compile(
+    r'^\s*RETRACT\s+"([^"\\]+(?:\\.[^"\\]*)*)"(?:\s+REASON\s+"([^"\\]*(?:\\.[^"\\]*)*)")?\s*$',
+    re.IGNORECASE,
+)
+_CREATE_NODE_RE = re.compile(r'^\s*CREATE\s+NODE\s+"([^"\\]+(?:\\.[^"\\]*)*)"', re.IGNORECASE)
+_ENT_FROM_ID_RE = re.compile(r'"(ent:[^"\\]+)"')
+_KV_VALUE_RE = re.compile(r'value\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', re.IGNORECASE)
+_KV_KIND_RE = re.compile(r'kind\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', re.IGNORECASE)
+_KV_CONFIDENCE_RE = re.compile(r'CONFIDENCE\s+([0-9.]+)', re.IGNORECASE)
+_KV_SOURCE_RE = re.compile(r'SOURCE\s+"([^"\\]*(?:\\.[^"\\]*)*)"', re.IGNORECASE)
+
+
+def _strip_think(raw: str) -> str:
+    """Remove reasoning-model `<think>...</think>` wrappers."""
+    return _THINK_RE.sub("", raw).strip()
+
+
+def _split_lines(cleaned: str) -> list[str]:
+    """One statement per line. Drop markdown fences and blanks."""
+    out: list[str] = []
+    for raw_ln in cleaned.splitlines():
+        ln = raw_ln.strip()
+        if not ln or _FENCE_RE.match(ln):
+            continue
+        out.append(ln)
+    return out
+
+
+def _dedupe_upserts(stmts: list[str]) -> tuple[list[str], list[tuple[str, str]]]:
+    """Keep the first UPSERT per entity id. Drop later duplicates.
+
+    Duplicate UPSERT NODE with the same id inside a BEGIN/COMMIT batch makes
+    the rollback snapshot logic trip over itself - entire batch errors out.
+    Pre-filter here so the transaction never sees the duplicate.
+    """
+    seen: set[str] = set()
+    kept: list[str] = []
+    dropped: list[tuple[str, str]] = []
+    for ln in stmts:
+        m = _UPSERT_RE.match(ln)
+        if m:
+            eid = m.group(1)
+            if eid in seen:
+                dropped.append((ln, f"duplicate UPSERT of {eid!r}"))
+                continue
+            seen.add(eid)
+        kept.append(ln)
+    return kept, dropped
+
+
+def _scrape_belief_updates(
+    executed_lines: list[str],
+    facts: dict[str, FactState],
+) -> None:
+    """Walk successfully-executed DSL lines and update running fact state.
+
+    Only ASSERT / RETRACT contribute. Other writes ignored. Mutates `facts`
+    in place.
+    """
+    for line in executed_lines:
+        m = _ASSERT_LINE_RE.match(line)
+        if m:
+            fact_id = m.group(1)
+            rest = m.group(2)
+            st = facts.get(fact_id) or FactState(fact_id=fact_id)
+            km = _KV_KIND_RE.search(rest)
+            if km:
+                st.kind = km.group(1)
+            vm = _KV_VALUE_RE.search(rest)
+            if vm:
+                st.value = vm.group(1)
+            cm = _KV_CONFIDENCE_RE.search(rest)
+            if cm:
+                try:
+                    st.confidence = float(cm.group(1))
+                except ValueError:
+                    pass
+            sm = _KV_SOURCE_RE.search(rest)
+            if sm:
+                st.source = sm.group(1)
+            st.retracted = False
+            st.retract_reason = ""
+            facts[fact_id] = st
+            continue
+        m = _RETRACT_RE.match(line)
+        if m:
+            fact_id = m.group(1)
+            reason = m.group(2) or ""
+            st = facts.get(fact_id) or FactState(fact_id=fact_id)
+            st.retracted = True
+            st.retract_reason = reason
+            facts[fact_id] = st
+
+
+def _render_known_facts_block(facts: dict[str, FactState], max_facts: int = 40) -> str:
+    """Format non-retracted facts into a block the LLM reads before the input.
+
+    Placed in the USER message (not the system prompt) to keep the system
+    prompt byte-identical across calls for llama.cpp KV-cache prefix reuse.
+
+    Retracted facts are hidden so the model cannot re-assert a superseded
+    belief. If the dict grows beyond `max_facts`, the most-recently-touched
+    entries win (dict preserves insertion order since Python 3.7).
+    """
+    alive = [f for f in facts.values() if not f.retracted]
+    if not alive:
+        return ""
+    alive = alive[-max_facts:]
+    lines = [
+        "### KNOWN FACTS (reuse these fact_ids; emit RETRACT + ASSERT to update)",
+    ]
+    for f in alive:
+        bits = [f'[{f.fact_id}]']
+        if f.kind:
+            bits.append(f'kind="{f.kind}"')
+        if f.value:
+            bits.append(f'value="{f.value}"')
+        bits.append(f'confidence={f.confidence:.2f}')
+        if f.source:
+            bits.append(f'source="{f.source}"')
+        lines.append(" ".join(bits))
+    return "\n".join(lines)
+
+
+# --------------------------------------------------------------------
+# Ingestor
+# --------------------------------------------------------------------
+
+_DEFAULT_SKILL_PATH = (
+    Path(__file__).resolve().parent.parent.parent
+    / "tools" / "skills" / "graphstore-bonsai-dsl" / "SKILL.md"
+)
+
+
+class BonsaiIngestor:
+    """NL -> DSL via a local llama.cpp GGUF, with correctness guards.
+
+    Parameters
+    ----------
+    model_path : str | Path
+        Path to a .gguf file. The matching manifest under the same directory
+        is not required; this class talks to llama.cpp directly.
+    gs : GraphStore | None
+        Target store. Required for non-dry-run ingests. Dry-runs don't need one.
+    skill_path : str | Path | None
+        Prompt file. Defaults to tools/skills/graphstore-bonsai-dsl/SKILL.md.
+    n_ctx : int
+        Context window. 2048 is enough for ~500-token skill + 200-token user
+        message + 400-token output + headroom.
+    n_threads : int | None
+        Physical core count works best on memory-bandwidth-bound CPU inference.
+        Defaults to os.cpu_count() // 2 via llama.cpp.
+    chat_format : str
+        Matches the GGUF. Qwen3-based Bonsai works with 'qwen'.
+    max_output_tokens : int
+        Hard cap per call. Generation stops either here or at the model's
+        natural stop.
+    temperature : float
+        Default 0.0 for reproducible DSL.
+    """
+
+    # Headroom we leave between (prompt + output) and n_ctx. Below this we
+    # force a reset so llama.cpp never auto-evicts the skill prefix.
+    _CTX_HEADROOM = 128
+
+    def __init__(
+        self,
+        model_path: str | Path,
+        *,
+        gs: Any | None = None,
+        skill_path: str | Path | None = None,
+        n_ctx: int = 2048,
+        n_threads: int | None = None,
+        chat_format: str = "qwen",
+        max_output_tokens: int = 400,
+        temperature: float = 0.0,
+    ) -> None:
+        self._model_path = Path(model_path)
+        if not self._model_path.exists():
+            raise FileNotFoundError(f"bonsai model not found: {self._model_path}")
+        self._gs = gs
+        self._skill_path = Path(skill_path) if skill_path else _DEFAULT_SKILL_PATH
+        self._n_ctx = n_ctx
+        self._max_output_tokens = max_output_tokens
+        self._temperature = temperature
+        self._chat_format = chat_format
+        self._n_threads = n_threads
+
+        self._skill_text = ""
+        self._skill_fingerprint = ""
+        self._system_prompt = ""
+        self._reload_skill()
+
+        self._llm: Any | None = None
+        self._lock = threading.Lock()
+
+        # Cross-message belief tracking. fact_id -> FactState. Fed into the
+        # user message of the next ingest so the model reuses ids.
+        self._facts: dict[str, FactState] = {}
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def _reload_skill(self) -> None:
+        """Read skill from disk, compute fingerprint, pin into system prompt.
+
+        Pinning the fingerprint into the prompt means if the file changes on
+        disk the system-prompt prefix changes too, which naturally invalidates
+        the llama.cpp prefix-match KV cache without us having to call reset.
+        """
+        if not self._skill_path.exists():
+            raise FileNotFoundError(f"bonsai skill not found: {self._skill_path}")
+        body = self._skill_path.read_text()
+        if body.startswith("---"):
+            _, _, body = body.partition("---")
+            _, _, body = body.partition("---")
+            body = body.strip()
+        self._skill_text = body
+        self._skill_fingerprint = hashlib.sha256(body.encode()).hexdigest()[:12]
+        self._system_prompt = f"# skill-sha256={self._skill_fingerprint}\n\n{body}"
+
+    def _ensure_llm(self) -> Any:
+        """Lazy-load the Llama instance on first use."""
+        if self._llm is not None:
+            return self._llm
+        from llama_cpp import Llama
+        kwargs: dict[str, Any] = {
+            "model_path": str(self._model_path),
+            "n_ctx": self._n_ctx,
+            "chat_format": self._chat_format,
+            "verbose": False,
+        }
+        if self._n_threads is not None:
+            kwargs["n_threads"] = self._n_threads
+        _log.info(
+            "bonsai: loading %s n_ctx=%d threads=%s chat_format=%s",
+            self._model_path.name, self._n_ctx, self._n_threads, self._chat_format,
+        )
+        self._llm = Llama(**kwargs)
+        return self._llm
+
+    def reset(self) -> None:
+        """Drop the Llama instance so the next call reloads from scratch.
+
+        Use when the skill file changed and you want to force a cold start,
+        or when KV state is suspected corrupt (e.g. after a thread crash).
+        Automatic: only called from internal guards.
+        """
+        with self._lock:
+            self._llm = None
+
+    # ------------------------------------------------------------------
+    # Observability
+    # ------------------------------------------------------------------
+
+    @property
+    def skill_fingerprint(self) -> str:
+        """12-hex-char sha256 prefix of the loaded skill text. Stable across
+        processes for the same skill bytes. Emitted in every ingest log line."""
+        return self._skill_fingerprint
+
+    @property
+    def facts(self) -> dict[str, FactState]:
+        """Live fact state accumulated from successful ingests. Read-only view.
+
+        Use reset_facts() to clear. Dry-run ingests don't update this.
+        """
+        return dict(self._facts)
+
+    def reset_facts(self) -> None:
+        """Clear the running fact state so the next ingest starts fresh."""
+        with self._lock:
+            self._facts.clear()
+
+    # ------------------------------------------------------------------
+    # Ingest
+    # ------------------------------------------------------------------
+
+    def warmup(self) -> None:
+        """Process the system prompt once so the skill-prefix KV is warm.
+
+        Optional - the first real ingest pays this cost anyway. Separate
+        because long-running daemons want the cost up-front, not on the
+        first user-facing request.
+        """
+        llm = self._ensure_llm()
+        with self._lock:
+            llm.create_chat_completion(
+                messages=[
+                    {"role": "system", "content": self._system_prompt},
+                    {"role": "user", "content": "ready"},
+                ],
+                max_tokens=1,
+                temperature=0.0,
+            )
+
+    def ingest(self, text: str, *, dry_run: bool = False) -> IngestResult:
+        """Convert `text` to DSL statements and (optionally) execute them.
+
+        `dry_run=True` returns the DSL without touching the store - useful
+        for previewing or building training data without committing.
+        """
+        if not text or not text.strip():
+            raise IngestEmpty("input text is empty or whitespace-only")
+        if not dry_run and self._gs is None:
+            raise ValueError("ingest requires a GraphStore (pass gs=...) or dry_run=True")
+
+        self._reload_skill()
+        with self._lock:
+            return self._ingest_locked(text, dry_run=dry_run)
+
+    def _ingest_locked(self, text: str, *, dry_run: bool) -> IngestResult:
+        t0 = time.perf_counter()
+        llm = self._ensure_llm()
+
+        # Compose the user message: prior facts block + user text. Keeps the
+        # system prompt byte-identical so the skill stays KV-cache warm.
+        facts_block = _render_known_facts_block(self._facts)
+        user_msg = f"{facts_block}\n\n{text}" if facts_block else text
+
+        est = self._estimate_tokens(self._system_prompt) + self._estimate_tokens(user_msg)
+        budget = est + self._max_output_tokens
+        if budget > self._n_ctx - self._CTX_HEADROOM:
+            if est + self._max_output_tokens > self._n_ctx - self._CTX_HEADROOM:
+                raise IngestOverflow(
+                    f"prompt+output ({budget}) exceeds n_ctx-headroom "
+                    f"({self._n_ctx - self._CTX_HEADROOM}); increase n_ctx or "
+                    f"shorten input"
+                )
+            _log.warning("bonsai: KV would overflow, resetting before ingest")
+            self._llm = None
+            llm = self._ensure_llm()
+
+        response = llm.create_chat_completion(
+            messages=[
+                {"role": "system", "content": self._system_prompt},
+                {"role": "user", "content": user_msg},
+            ],
+            max_tokens=self._max_output_tokens,
+            temperature=self._temperature,
+        )
+        raw = response["choices"][0]["message"]["content"] or ""
+
+        cleaned = _strip_think(raw)
+        if not cleaned:
+            duration_ms = int((time.perf_counter() - t0) * 1000)
+            self._log_event(text, raw, [], 0, [], [], [], duration_ms, dry_run)
+            raise IngestEmpty(
+                "LLM returned empty or <think>-only output. "
+                f"raw={raw!r}"
+            )
+
+        raw_lines = _split_lines(cleaned)
+        deduped, dup_dropped = _dedupe_upserts(raw_lines)
+
+        from graphstore.dsl.parser import parse as _dsl_parse
+
+        valid: list[str] = []
+        rejected: list[tuple[str, str]] = list(dup_dropped)
+        for ln in deduped:
+            try:
+                _dsl_parse(ln)
+                valid.append(ln)
+            except Exception as err:
+                rejected.append((ln, f"parse error: {err}"))
+
+        entities_new: list[str] = []
+        beliefs_changed: list[tuple[str, str]] = []
+        for ln in valid:
+            if _UPSERT_RE.match(ln):
+                m = _ENT_FROM_ID_RE.search(ln)
+                if m:
+                    entities_new.append(m.group(1))
+            elif _ASSERT_RE.match(ln):
+                m = _ASSERT_RE.match(ln)
+                if m:
+                    beliefs_changed.append((m.group(1), "assert"))
+            elif _RETRACT_RE.match(ln):
+                m = _RETRACT_RE.match(ln)
+                if m:
+                    beliefs_changed.append((m.group(1), "retract"))
+
+        executed = 0
+        executed_lines: list[str] = []
+        if not dry_run:
+            for ln in valid:
+                try:
+                    self._gs.execute(ln)
+                    executed += 1
+                    executed_lines.append(ln)
+                except Exception as err:
+                    rejected.append((ln, f"execute error: {err}"))
+            # Scrape belief updates so the next ingest sees the current fact
+            # state. Only lines that actually executed contribute - failed
+            # ones leave the running state unchanged.
+            _scrape_belief_updates(executed_lines, self._facts)
+
+        duration_ms = int((time.perf_counter() - t0) * 1000)
+        self._log_event(
+            text, raw, valid, executed, rejected, entities_new,
+            beliefs_changed, duration_ms, dry_run,
+        )
+        return IngestResult(
+            statements=list(valid),
+            executed=executed,
+            rejected=rejected,
+            entities_new=entities_new,
+            beliefs_changed=beliefs_changed,
+            duration_ms=duration_ms,
+            raw_output=raw,
+            skill_fingerprint=self._skill_fingerprint,
+            dry_run=dry_run,
+        )
+
+    # ------------------------------------------------------------------
+    # Internals
+    # ------------------------------------------------------------------
+
+    def _estimate_tokens(self, text: str) -> int:
+        """Fast char/4 estimate. llama-cpp-python's tokenize() is authoritative
+        but costs a forward through the vocab table; for budget guards the
+        cheap estimate is fine and conservative-enough (chars/4 over-counts
+        for ASCII, under-counts for dense tokens - net neutral)."""
+        return len(text) // 4 + 8
+
+    def _log_event(
+        self,
+        input_text: str,
+        raw: str,
+        valid: list[str],
+        executed: int,
+        rejected: list[tuple[str, str]],
+        entities_new: list[str],
+        beliefs_changed: list[tuple[str, str]],
+        duration_ms: int,
+        dry_run: bool,
+    ) -> None:
+        _log.info(
+            "bonsai.ingest: input_chars=%d raw_chars=%d stmts=%d exec=%d "
+            "rejected=%d entities=%d beliefs=%d dur_ms=%d skill=%s dry_run=%s",
+            len(input_text), len(raw), len(valid), executed, len(rejected),
+            len(entities_new), len(beliefs_changed), duration_ms,
+            self._skill_fingerprint, dry_run,
+        )
diff --git a/tests/test_bonsai_ingestor.py b/tests/test_bonsai_ingestor.py
new file mode 100644
index 0000000..dfd7f13
--- /dev/null
+++ b/tests/test_bonsai_ingestor.py
@@ -0,0 +1,327 @@
+"""Unit tests for bonsai_ingestor correctness guards.
+
+The live LLM path needs the 1.09 GB TQ1_0 GGUF on disk and is skipped by
+default. These tests exercise the post-processing and guard logic without
+touching llama.cpp.
+"""
+from __future__ import annotations
+
+import hashlib
+from pathlib import Path
+
+import pytest
+
+from graphstore.bonsai_ingestor import (
+    BonsaiIngestor,
+    FactState,
+    IngestEmpty,
+    IngestOverflow,
+    IngestResult,
+    _dedupe_upserts,
+    _render_known_facts_block,
+    _scrape_belief_updates,
+    _split_lines,
+    _strip_think,
+)
+
+
+# --------------------------------------------------------------------
+# Post-processing helpers
+# --------------------------------------------------------------------
+
+def test_strip_think_removes_single_block():
+    out = _strip_think("<think>reasoning</think>CREATE NODE \"x\" kind = \"k\"")
+    assert "think" not in out.lower()
+    assert "CREATE NODE" in out
+
+
+def test_strip_think_removes_multiple_blocks():
+    inp = "<think>a</think>line1<think>b</think>line2"
+    assert _strip_think(inp) == "line1line2"
+
+
+def test_strip_think_empty_on_only_think():
+    assert _strip_think("<think>foo</think>") == ""
+
+
+def test_split_lines_drops_fences_and_blanks():
+    inp = "```dsl\nCREATE NODE \"a\" kind = \"k\"\n\n```\nCREATE NODE \"b\" kind = \"k\""
+    lines = _split_lines(inp)
+    assert lines == [
+        'CREATE NODE "a" kind = "k"',
+        'CREATE NODE "b" kind = "k"',
+    ]
+
+
+def test_dedupe_upserts_keeps_first_drops_later():
+    stmts = [
+        'UPSERT NODE "ent:priya" kind = "entity" name = "Priya"',
+        'UPSERT NODE "ent:openai" kind = "entity" name = "OpenAI"',
+        'UPSERT NODE "ent:priya" kind = "entity" name = "Priya"',
+        'CREATE EDGE "m:0" -> "ent:priya" kind = "mentions"',
+    ]
+    kept, dropped = _dedupe_upserts(stmts)
+    assert len(kept) == 3
+    assert 'ent:priya' in kept[0]
+    assert 'ent:openai' in kept[1]
+    assert 'CREATE EDGE' in kept[2]
+    assert len(dropped) == 1
+    assert "duplicate" in dropped[0][1]
+
+
+def test_dedupe_upserts_passes_non_upsert():
+    stmts = ['CREATE NODE "m:0" kind = "message"', 'CREATE EDGE "a" -> "b" kind = "k"']
+    kept, dropped = _dedupe_upserts(stmts)
+    assert kept == stmts
+    assert dropped == []
+
+
+# --------------------------------------------------------------------
+# Skill fingerprint
+# --------------------------------------------------------------------
+
+def test_skill_fingerprint_is_stable_across_instances(tmp_path: Path):
+    skill = tmp_path / "skill.md"
+    skill.write_text("content A")
+    model = tmp_path / "fake.gguf"
+    model.write_bytes(b"ignored; no llama init yet")
+
+    ing1 = BonsaiIngestor(model_path=model, skill_path=skill)
+    ing2 = BonsaiIngestor(model_path=model, skill_path=skill)
+    assert ing1.skill_fingerprint == ing2.skill_fingerprint
+
+    expected = hashlib.sha256(b"content A").hexdigest()[:12]
+    assert ing1.skill_fingerprint == expected
+
+
+def test_skill_fingerprint_changes_on_skill_edit(tmp_path: Path):
+    skill = tmp_path / "skill.md"
+    skill.write_text("v1")
+    model = tmp_path / "fake.gguf"
+    model.write_bytes(b"")
+
+    ing = BonsaiIngestor(model_path=model, skill_path=skill)
+    fp_v1 = ing.skill_fingerprint
+
+    skill.write_text("v2")
+    ing._reload_skill()
+    assert ing.skill_fingerprint != fp_v1
+
+
+def test_skill_fingerprint_pinned_into_system_prompt(tmp_path: Path):
+    skill = tmp_path / "skill.md"
+    skill.write_text("rule body")
+    model = tmp_path / "fake.gguf"
+    model.write_bytes(b"")
+
+    ing = BonsaiIngestor(model_path=model, skill_path=skill)
+    assert f"skill-sha256={ing.skill_fingerprint}" in ing._system_prompt
+    assert "rule body" in ing._system_prompt
+
+
+# --------------------------------------------------------------------
+# Input validation
+# --------------------------------------------------------------------
+
+def test_empty_input_raises_ingest_empty(tmp_path: Path):
+    skill = tmp_path / "skill.md"
+    skill.write_text("any")
+    model = tmp_path / "fake.gguf"
+    model.write_bytes(b"")
+
+    ing = BonsaiIngestor(model_path=model, skill_path=skill)
+    with pytest.raises(IngestEmpty):
+        ing.ingest("")
+    with pytest.raises(IngestEmpty):
+        ing.ingest("   \n\t  ")
+
+
+def test_non_dry_run_without_store_raises(tmp_path: Path):
+    skill = tmp_path / "skill.md"
+    skill.write_text("any")
+    model = tmp_path / "fake.gguf"
+    model.write_bytes(b"")
+
+    ing = BonsaiIngestor(model_path=model, skill_path=skill)
+    with pytest.raises(ValueError, match="requires a GraphStore"):
+        ing.ingest("hello")
+
+
+def test_missing_model_file_raises(tmp_path: Path):
+    skill = tmp_path / "skill.md"
+    skill.write_text("any")
+    with pytest.raises(FileNotFoundError):
+        BonsaiIngestor(model_path=tmp_path / "nope.gguf", skill_path=skill)
+
+
+def test_missing_skill_file_raises(tmp_path: Path):
+    model = tmp_path / "fake.gguf"
+    model.write_bytes(b"")
+    with pytest.raises(FileNotFoundError):
+        BonsaiIngestor(model_path=model, skill_path=tmp_path / "nope.md")
+
+
+# --------------------------------------------------------------------
+# Frontmatter strip
+# --------------------------------------------------------------------
+
+def test_yaml_frontmatter_stripped_from_skill(tmp_path: Path):
+    skill = tmp_path / "skill.md"
+    skill.write_text("---\nname: x\n---\n\nactual rules")
+    model = tmp_path / "fake.gguf"
+    model.write_bytes(b"")
+
+    ing = BonsaiIngestor(model_path=model, skill_path=skill)
+    assert "name: x" not in ing._skill_text
+    assert "actual rules" in ing._skill_text
+
+
+# --------------------------------------------------------------------
+# IngestResult shape
+# --------------------------------------------------------------------
+
+def test_ingest_result_defaults():
+    r = IngestResult()
+    assert r.statements == []
+    assert r.executed == 0
+    assert r.rejected == []
+    assert r.entities_new == []
+    assert r.beliefs_changed == []
+    assert r.duration_ms == 0
+    assert r.raw_output == ""
+    assert r.skill_fingerprint == ""
+    assert r.dry_run is False
+
+
+# --------------------------------------------------------------------
+# Fact state tracking (cross-message belief identity)
+# --------------------------------------------------------------------
+
+def test_scrape_single_assert_creates_factstate():
+    facts: dict[str, FactState] = {}
+    _scrape_belief_updates(
+        ['ASSERT "fact:color" kind = "belief" value = "blue" CONFIDENCE 0.9 SOURCE "m:0"'],
+        facts,
+    )
+    assert "fact:color" in facts
+    st = facts["fact:color"]
+    assert st.kind == "belief"
+    assert st.value == "blue"
+    assert st.confidence == 0.9
+    assert st.source == "m:0"
+    assert st.retracted is False
+
+
+def test_scrape_assert_then_retract_marks_retracted():
+    facts: dict[str, FactState] = {}
+    _scrape_belief_updates(
+        ['ASSERT "fact:x" kind = "belief" value = "v" CONFIDENCE 0.9 SOURCE "m:0"'],
+        facts,
+    )
+    _scrape_belief_updates(
+        ['RETRACT "fact:x" REASON "wrong"'],
+        facts,
+    )
+    assert facts["fact:x"].retracted is True
+    assert facts["fact:x"].retract_reason == "wrong"
+
+
+def test_scrape_retract_then_reassert_un_retracts():
+    facts: dict[str, FactState] = {}
+    _scrape_belief_updates(
+        ['ASSERT "fact:x" kind = "belief" value = "old" CONFIDENCE 0.9 SOURCE "m:0"',
+         'RETRACT "fact:x" REASON "wrong"',
+         'ASSERT "fact:x" kind = "belief" value = "new" CONFIDENCE 0.9 SOURCE "m:1"'],
+        facts,
+    )
+    st = facts["fact:x"]
+    assert st.value == "new"
+    assert st.retracted is False
+    assert st.retract_reason == ""
+
+
+def test_scrape_ignores_non_belief_lines():
+    facts: dict[str, FactState] = {}
+    _scrape_belief_updates(
+        ['CREATE NODE "m:0" kind = "message"',
+         'UPSERT NODE "ent:x" kind = "entity" name = "X"',
+         'CREATE EDGE "a" -> "b" kind = "mentions"'],
+        facts,
+    )
+    assert facts == {}
+
+
+def test_render_known_facts_block_empty_when_no_facts():
+    assert _render_known_facts_block({}) == ""
+
+
+def test_render_known_facts_block_hides_retracted():
+    facts = {
+        "fact:a": FactState(fact_id="fact:a", kind="belief", value="alive", confidence=0.9),
+        "fact:b": FactState(fact_id="fact:b", kind="belief", value="dead", retracted=True),
+    }
+    block = _render_known_facts_block(facts)
+    assert "fact:a" in block
+    assert "fact:b" not in block
+    assert "alive" in block
+    assert "dead" not in block
+
+
+def test_render_known_facts_block_formats_each_fact():
+    facts = {
+        "fact:color": FactState(
+            fact_id="fact:color",
+            kind="belief",
+            value="blue",
+            confidence=0.9,
+            source="m:s1:0",
+        ),
+    }
+    block = _render_known_facts_block(facts)
+    assert "[fact:color]" in block
+    assert 'kind="belief"' in block
+    assert 'value="blue"' in block
+    assert "confidence=0.90" in block
+    assert 'source="m:s1:0"' in block
+    assert "KNOWN FACTS" in block
+
+
+def test_render_known_facts_trims_to_max_facts():
+    facts = {
+        f"fact:{i}": FactState(fact_id=f"fact:{i}", value=str(i), confidence=0.9)
+        for i in range(60)
+    }
+    block = _render_known_facts_block(facts, max_facts=5)
+    for i in range(55, 60):
+        assert f"[fact:{i}]" in block
+    for i in range(0, 55):
+        assert f"[fact:{i}]" not in block
+
+
+def test_ingestor_facts_property_returns_copy(tmp_path: Path):
+    skill = tmp_path / "skill.md"
+    skill.write_text("any")
+    model = tmp_path / "fake.gguf"
+    model.write_bytes(b"")
+
+    ing = BonsaiIngestor(model_path=model, skill_path=skill)
+    assert ing.facts == {}
+    # Simulate state set by an earlier ingest:
+    ing._facts["fact:x"] = FactState(fact_id="fact:x", value="v")
+    snapshot = ing.facts
+    assert "fact:x" in snapshot
+    # Mutating the snapshot should not affect internal state
+    snapshot["fact:y"] = FactState(fact_id="fact:y")
+    assert "fact:y" not in ing._facts
+
+
+def test_ingestor_reset_facts_clears_state(tmp_path: Path):
+    skill = tmp_path / "skill.md"
+    skill.write_text("any")
+    model = tmp_path / "fake.gguf"
+    model.write_bytes(b"")
+
+    ing = BonsaiIngestor(model_path=model, skill_path=skill)
+    ing._facts["fact:x"] = FactState(fact_id="fact:x", value="v")
+    ing.reset_facts()
+    assert ing._facts == {}
diff --git a/tools/skills/graphstore-bonsai-dsl/SKILL.md b/tools/skills/graphstore-bonsai-dsl/SKILL.md
new file mode 100644
index 0000000..17fe6b0
--- /dev/null
+++ b/tools/skills/graphstore-bonsai-dsl/SKILL.md
@@ -0,0 +1,90 @@
+---
+name: graphstore-bonsai-dsl
+description: Minimal ingest-only DSL cheat sheet for tiny local LLMs (Ternary-Bonsai 1.7B / 4B / 8B GGUF). Uses few-shot examples instead of reference text. Pair with GBNF grammar-constrained decoding for guaranteed-parseable output.
+compatibility: graphstore >= 0.4.0
+metadata:
+  author: orkait
+  version: "2.0"
+  target_tokens: 400
+---
+
+Convert one user message into GraphStore DSL. Output **only** DSL lines. No prose, no markdown, no `<think>` tags.
+
+Valid verbs:
+```
+CREATE NODE "id" kind = "K" f = v ... [DOCUMENT "text"]
+UPSERT NODE "id" kind = "K" name = "N"
+CREATE EDGE "a" -> "b" kind = "K"
+ASSERT "fact:X" kind = "belief" value = "V" CONFIDENCE 0.9 SOURCE "msg:id"
+RETRACT "fact:X" REASON "why"
+```
+
+Escape `"` as `\"` inside strings.
+
+Emit each entity once per message. Entity id = `ent:<lower_slug>`. Belief id = `fact:<topic>`.
+
+Required per message (copy this pattern):
+1. One `CREATE NODE "m:<session>:<idx>" kind = "message" session = "..." role = "..." DOCUMENT "..."`
+2. For every person / org / named thing in the text: one `UPSERT NODE "ent:<slug>" kind = "entity" name = "..."`
+3. For every entity from step 2: one `CREATE EDGE "m:<session>:<idx>" -> "ent:<slug>" kind = "mentions"` (always emit both the UPSERT and the matching EDGE, not one without the other)
+4. If the user states a personal preference, belief, or fact about themselves, add one `ASSERT "fact:<topic>" ...`
+5. If the new message contradicts a KNOWN FACT above, add one `RETRACT "<existing_id>" REASON "..."` before its `ASSERT`
+
+**Fact id reuse (critical).** If the user turn starts with `### KNOWN FACTS`, those lines list existing beliefs that already have ids in the store. When the new message updates or contradicts a concept shown there, reuse the same `fact:<id>` from the block. Do NOT coin a new fact_id for the same underlying belief. On update emit `RETRACT "<existing_id>" REASON "..."` followed by `ASSERT "<existing_id>" ... value = "<new>" ...`.
+
+---
+
+**Input:**
+Session s1, msg m:s1:0, user: "Kailash joined OpenAI as DB engineer."
+
+**Output:**
+```
+CREATE NODE "m:s1:0" kind = "message" session = "s1" role = "user" DOCUMENT "Kailash joined OpenAI as DB engineer."
+UPSERT NODE "ent:kailash" kind = "entity" name = "Kailash"
+UPSERT NODE "ent:openai" kind = "entity" name = "OpenAI"
+CREATE EDGE "m:s1:0" -> "ent:kailash" kind = "mentions"
+CREATE EDGE "m:s1:0" -> "ent:openai" kind = "mentions"
+```
+
+---
+
+**Input:**
+Session s2, msg m:s2:0, user: "My favorite color is blue."
+
+**Output:**
+```
+CREATE NODE "m:s2:0" kind = "message" session = "s2" role = "user" DOCUMENT "My favorite color is blue."
+ASSERT "fact:favorite_color" kind = "belief" value = "blue" CONFIDENCE 0.9 SOURCE "m:s2:0"
+```
+
+---
+
+**Input:**
+Session s2, msg m:s2:1, user: "Actually my favorite color is green now."
+
+**Output:**
+```
+CREATE NODE "m:s2:1" kind = "message" session = "s2" role = "user" DOCUMENT "Actually my favorite color is green now."
+RETRACT "fact:favorite_color" REASON "superseded by m:s2:1"
+ASSERT "fact:favorite_color" kind = "belief" value = "green" CONFIDENCE 0.9 SOURCE "m:s2:1"
+```
+
+---
+
+**Input with known facts (reuse the existing fact_id, do not invent a new one):**
+
+```
+### KNOWN FACTS (reuse these fact_ids; emit RETRACT + ASSERT to update)
+[fact:favorite_drink] kind="belief" value="coffee" confidence=0.90 source="m:s3:0"
+
+Session s3, msg m:s3:1, user: "Actually I prefer tea now."
+```
+
+**Output:**
+```
+CREATE NODE "m:s3:1" kind = "message" session = "s3" role = "user" DOCUMENT "Actually I prefer tea now."
+RETRACT "fact:favorite_drink" REASON "superseded by m:s3:1"
+ASSERT "fact:favorite_drink" kind = "belief" value = "tea" CONFIDENCE 0.9 SOURCE "m:s3:1"
+```
+
+Wrong: coining `fact:preference` or `fact:drink_pref` when `fact:favorite_drink` already exists in KNOWN FACTS.
diff --git a/tools/skills/graphstore-bonsai-dsl/grammar.gbnf b/tools/skills/graphstore-bonsai-dsl/grammar.gbnf
new file mode 100644
index 0000000..0ae1175
--- /dev/null
+++ b/tools/skills/graphstore-bonsai-dsl/grammar.gbnf
@@ -0,0 +1,29 @@
+# GBNF for graphstore ingest-only DSL.
+# Locks the model to emit ONLY valid statements from the ingest subset.
+# Kills <think> tags, markdown fences, duplicate whitespace by construction.
+#
+# Use with llama_cpp.LlamaGrammar.from_string(open(...).read()) and pass
+# `grammar=` to create_chat_completion. Works with any Qwen/Bonsai model.
+
+root ::= stmt ( "\n" stmt )*
+
+stmt ::= create-node | upsert-node | create-edge | assert-stmt | retract-stmt
+
+create-node ::= "CREATE NODE " id " kind = " string field* document?
+upsert-node ::= "UPSERT NODE " id " kind = " string " name = " string
+create-edge ::= "CREATE EDGE " id " -> " id " kind = " string
+assert-stmt ::= "ASSERT " id " kind = \"belief\" value = " string " CONFIDENCE " confidence " SOURCE " id
+retract-stmt ::= "RETRACT " id " REASON " string
+
+field ::= " " key " = " value
+key ::= [a-z] [a-z0-9_]*
+value ::= string | number
+
+document ::= " DOCUMENT " string
+
+id ::= string
+string ::= "\"" char* "\""
+char ::= [^"\\] | "\\\"" | "\\\\"
+
+number ::= [0-9]+ ("." [0-9]+)?
+confidence ::= "0" "." [0-9] [0-9]?