diff --git a/benchmarks/kaggle/pack_ternary_bonsai/kernel-metadata.json b/benchmarks/kaggle/pack_ternary_bonsai/kernel-metadata.json new file mode 100644 index 0000000..d341a6c --- /dev/null +++ b/benchmarks/kaggle/pack_ternary_bonsai/kernel-metadata.json @@ -0,0 +1,13 @@ +{ + "id": "superkaiii/pack-ternary-bonsai-4b-tq1_0", + "title": "Pack Ternary-Bonsai-4B TQ1_0", + "code_file": "pack_ternary_bonsai.py", + "language": "python", + "kernel_type": "script", + "is_private": true, + "enable_gpu": false, + "enable_internet": true, + "competition_sources": [], + "dataset_sources": ["superkaiii/hf-token-private"], + "kernel_sources": [] +} diff --git a/benchmarks/kaggle/pack_ternary_bonsai/pack_ternary_bonsai.py b/benchmarks/kaggle/pack_ternary_bonsai/pack_ternary_bonsai.py new file mode 100644 index 0000000..ad48c9e --- /dev/null +++ b/benchmarks/kaggle/pack_ternary_bonsai/pack_ternary_bonsai.py @@ -0,0 +1,197 @@ +"""Kaggle kernel: re-quantize Ternary-Bonsai-4B from unpacked FP16 to TQ1_0 GGUF. + +Why here: the conversion needs ~17 GB peak disk + a llama.cpp build. Doing it on +Kaggle keeps it off the dev machine and publishes the result to HF Hub so every +GraphStore dev pulls the same artifact. + +Inputs (Kaggle kernel config): + - dataset_sources: ["superkaiii/hf-token-private"] (provides HF_TOKEN file) + - enable_internet: true + - enable_gpu: false (conversion is CPU-bound) + +Flow: + 1. Read HF write token from /kaggle/input/hf-token-private/HF_TOKEN + 2. Download prism-ml/Ternary-Bonsai-4B-unpacked (FP16 safetensors, ~8 GB) + 3. Clone + build llama.cpp llama-quantize + 4. convert_hf_to_gguf.py -> F16 GGUF (~8 GB) + 5. llama-quantize TQ1_0 -> ~1 GB GGUF, the pack step + 6. Upload result to superkaiii/Ternary-Bonsai-4B-TQ1_0-GGUF on HF Hub + 7. Delete intermediates so peak disk stays under 20 GB Kaggle quota + +Output (Kaggle kernel): + /kaggle/working/Ternary-Bonsai-4B-TQ1_0.gguf (also on HF Hub after upload) + /kaggle/working/pack_report.json (sizes + checksums) +""" +from __future__ import annotations + +import hashlib +import json +import os +import shutil +import subprocess +from pathlib import Path + + +HF_REPO = "superkaiii/Ternary-Bonsai-4B-TQ1_0-GGUF" +SOURCE_REPO = "prism-ml/Ternary-Bonsai-4B-unpacked" +TARGET_QUANT = "TQ1_0" +OUTPUT_NAME = "Ternary-Bonsai-4B-TQ1_0.gguf" + +SRC_DIR = Path("/kaggle/tmp/bonsai-src") +LC_DIR = Path("/kaggle/tmp/llama.cpp") +F16_GGUF = Path("/kaggle/tmp/bonsai-4b-f16.gguf") +WORKING = Path("/kaggle/working") +OUT_GGUF = WORKING / OUTPUT_NAME +REPORT = WORKING / "pack_report.json" + + +def run(cmd: list[str] | str, **kw) -> None: + """Shell command with live output. Raises on nonzero exit.""" + shell = isinstance(cmd, str) + print(f"$ {cmd if shell else ' '.join(cmd)}", flush=True) + subprocess.run(cmd, check=True, shell=shell, **kw) + + +def sha256(path: Path) -> str: + h = hashlib.sha256() + with path.open("rb") as f: + for chunk in iter(lambda: f.read(1 << 20), b""): + h.update(chunk) + return h.hexdigest() + + +def load_hf_token() -> str: + """Find HF token matching the kaggle_benchmark.py pattern.""" + # Primary: same path the working kernel uses. + primary = Path("/kaggle/input/hf-token-private/hf_token.txt") + if primary.exists(): + return primary.read_text().strip() + # Fallback: glob recursively in case Kaggle mounts at a nested path. + for name in ("hf_token.txt", "HF_TOKEN", "token"): + hits = list(Path("/kaggle/input").rglob(name)) + if hits: + return hits[0].read_text().strip() + raise RuntimeError("HF token not found under /kaggle/input/") + + +def main() -> None: + WORKING.mkdir(parents=True, exist_ok=True) + SRC_DIR.mkdir(parents=True, exist_ok=True) + + hf_token = load_hf_token() + os.environ["HF_TOKEN"] = hf_token + os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token + + print("[1/6] pip deps") + run("pip install -q 'huggingface_hub>=0.24' 'transformers>=4.44' 'safetensors>=0.4' 'torch>=2.3' 'sentencepiece' 'protobuf'") + + print("[2/6] download source F16 safetensors (Python API, not CLI)") + from huggingface_hub import snapshot_download + snapshot_download( + repo_id=SOURCE_REPO, + local_dir=str(SRC_DIR), + token=hf_token, + allow_patterns=["*.safetensors", "*.json", "*.model", "tokenizer*"], + ) + + print("[3/6] clone + build llama.cpp llama-quantize target") + if not LC_DIR.exists(): + run(["git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp", str(LC_DIR)]) + run(f"cd {LC_DIR} && cmake -B build -DLLAMA_BUILD_SERVER=OFF -DLLAMA_BUILD_EXAMPLES=ON -DLLAMA_BUILD_TESTS=OFF && cmake --build build --target llama-quantize -j") + + print("[4/6] convert safetensors -> F16 GGUF") + run([ + "python", str(LC_DIR / "convert_hf_to_gguf.py"), + str(SRC_DIR), + "--outfile", str(F16_GGUF), + "--outtype", "f16", + ]) + + print("[5/6] quantize F16 -> TQ1_0 (the pack step)") + run([ + str(LC_DIR / "build" / "bin" / "llama-quantize"), + str(F16_GGUF), + str(OUT_GGUF), + TARGET_QUANT, + ]) + + # Drop intermediates aggressively to stay under the Kaggle 20 GB working quota. + if F16_GGUF.exists(): + F16_GGUF.unlink() + shutil.rmtree(SRC_DIR, ignore_errors=True) + + print("[6/6] upload to HF Hub") + from huggingface_hub import HfApi + + api = HfApi(token=hf_token) + api.create_repo(HF_REPO, repo_type="model", exist_ok=True, private=False) + + readme = WORKING / "README.md" + readme.write_text(_readme()) + + api.upload_file( + path_or_fileobj=str(OUT_GGUF), + path_in_repo=OUTPUT_NAME, + repo_id=HF_REPO, + repo_type="model", + ) + api.upload_file( + path_or_fileobj=str(readme), + path_in_repo="README.md", + repo_id=HF_REPO, + repo_type="model", + ) + + report = { + "source_repo": SOURCE_REPO, + "target_repo": HF_REPO, + "quant": TARGET_QUANT, + "output_bytes": OUT_GGUF.stat().st_size, + "output_sha256": sha256(OUT_GGUF), + } + REPORT.write_text(json.dumps(report, indent=2)) + print(f"\nDONE\n local: {OUT_GGUF} ({OUT_GGUF.stat().st_size / 1e9:.2f} GB)") + print(f" hub: https://huggingface.co/{HF_REPO}") + print(f" report: {REPORT}") + + +def _readme() -> str: + return f"""--- +library_name: gguf +base_model: {SOURCE_REPO} +quantization_config: + method: {TARGET_QUANT} +tags: + - gguf + - ternary + - bonsai + - llama.cpp +--- + +# Ternary-Bonsai-4B TQ1_0 + +Repack of [{SOURCE_REPO}](https://huggingface.co/{SOURCE_REPO}) to llama.cpp's native ternary format ({TARGET_QUANT}). + +Ternary-Bonsai weights are trained as {{-1, 0, +1}}. {TARGET_QUANT} packs these losslessly at 1.6875 bits per weight with a single FP16 scale per group of 256 weights. Unlike Q2_K (which k-means clusters generic floats), {TARGET_QUANT} preserves the original trained weight values exactly. + +## Quickstart + +```python +from llama_cpp import Llama +m = Llama(model_path="{OUTPUT_NAME}", n_ctx=4096) +``` + +## Conversion pipeline + +``` +prism-ml/Ternary-Bonsai-4B-unpacked (F16 safetensors) + -> convert_hf_to_gguf.py --outtype f16 + -> llama-quantize ... {TARGET_QUANT} +``` + +Built on Kaggle with ggerganov/llama.cpp master. +""" + + +if __name__ == "__main__": + main() diff --git a/src/graphstore/bonsai_ingestor.py b/src/graphstore/bonsai_ingestor.py new file mode 100644 index 0000000..f179ec8 --- /dev/null +++ b/src/graphstore/bonsai_ingestor.py @@ -0,0 +1,579 @@ +"""Natural-language to DSL ingestor backed by a local llama.cpp GGUF. + +Target model: Ternary-Bonsai 4B TQ1_0 (CPU + RAM only, offline). + +Download the model once before first use (models/ is gitignored): + + mkdir -p models/Ternary-Bonsai-4B-TQ1_0 && curl -L -o \\ + models/Ternary-Bonsai-4B-TQ1_0/Ternary-Bonsai-4B-TQ1_0.gguf \\ + https://huggingface.co/superkaiii/Ternary-Bonsai-4B-TQ1_0-GGUF/resolve/main/Ternary-Bonsai-4B-TQ1_0.gguf + +Publication pipeline: benchmarks/kaggle/pack_ternary_bonsai/ converts +prism-ml/Ternary-Bonsai-4B-unpacked (FP16) to TQ1_0 via a Kaggle kernel and +publishes the result to superkaiii/Ternary-Bonsai-4B-TQ1_0-GGUF on HF. + +The public surface is `BonsaiIngestor`. Every call: + 1. Loads the skill prompt once, fingerprints it, pins that fingerprint into + the system prompt so KV cache stays coherent across calls. When the skill + file on disk changes, the next call naturally forces a warm re-process + because the system-prompt prefix now differs. + 2. Serializes access to the `Llama` instance with a lock - llama-cpp-python + is NOT thread-safe; concurrent calls corrupt the KV cache / segfault. + 3. Checks the combined prompt + output budget against `n_ctx` and resets the + cache if the request would force a head-trim (which silently evicts the + skill prefix and causes quality collapse). + 4. Treats empty / -only outputs as ingestion errors, not as silent + no-ops (the behaviour-analysis audit called this out - silent failures + hid real bugs). + 5. Dedupes UPSERT NODE by id before handing the DSL to the parser, so a + BatchRollback from a duplicated entity never takes out an otherwise + valid batch. + 6. Emits one structured log line per ingest with counts for every + category: statements emitted, parsed, rejected, executed, duration. + 7. Supports `dry_run=True` to generate the DSL and return it without + touching the GraphStore - used for testing, previewing, and building + training data. + +It also tracks cross-message belief state. After every successful ingest, the +executed ASSERT / RETRACT lines are scraped into a running `fact_id -> FactState` +dict. The next ingest injects the live (non-retracted) facts into the USER +message (not the system prompt - see below), so the model sees prior fact ids +and reuses them when updating the same concept. Without this, the model coins +a new fact_id per message and the graph ends up with multiple beliefs for the +same underlying concept. + +The known-facts block goes in the USER message on purpose: the system prompt +stays byte-identical across calls, which keeps llama.cpp's prefix-match KV +cache warm. If we appended facts to the system prompt, every call would be a +cold one. + +Not handled here: + - Streaming. Generation is blocking. Callers can run it in a thread. + - Multi-user / multi-tenant. Use one BonsaiIngestor per user. + - Model swap. Create a new BonsaiIngestor; don't mutate this one's paths. +""" +from __future__ import annotations + +import hashlib +import logging +import re +import threading +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +_log = logging.getLogger(__name__) + + +# -------------------------------------------------------------------- +# Errors +# -------------------------------------------------------------------- + +class BonsaiError(Exception): + """Base class for ingestor errors.""" + + +class IngestEmpty(BonsaiError): + """LLM returned empty or -only output. No DSL to execute.""" + + +class IngestOverflow(BonsaiError): + """Requested prompt + output would exceed n_ctx even after a cache reset.""" + + +# -------------------------------------------------------------------- +# Result +# -------------------------------------------------------------------- + +@dataclass +class FactState: + """One live belief tracked across messages within this ingestor. + + Updated by `_scrape_belief_updates` after every successful ingest and + rendered back into the next ingest's user message by + `_render_known_facts_block`, so the model reuses the same fact_id for the + same concept instead of coining a new one each call. + """ + + fact_id: str + kind: str = "" + value: str = "" + confidence: float = 1.0 + source: str = "" + retracted: bool = False + retract_reason: str = "" + + +@dataclass +class IngestResult: + """Everything an ingest call produced, for inspection and tracing.""" + + statements: list[str] = field(default_factory=list) + executed: int = 0 + rejected: list[tuple[str, str]] = field(default_factory=list) + entities_new: list[str] = field(default_factory=list) + beliefs_changed: list[tuple[str, str]] = field(default_factory=list) + duration_ms: int = 0 + raw_output: str = "" + skill_fingerprint: str = "" + dry_run: bool = False + + +# -------------------------------------------------------------------- +# Post-processing helpers +# -------------------------------------------------------------------- + +_THINK_RE = re.compile(r".*?", re.DOTALL | re.IGNORECASE) +_FENCE_RE = re.compile(r"^```[a-zA-Z]*\s*$|^```\s*$") +_UPSERT_RE = re.compile(r'^\s*UPSERT\s+NODE\s+"([^"\\]+(?:\\.[^"\\]*)*)"', re.IGNORECASE) +_ASSERT_LINE_RE = re.compile( + r'^\s*ASSERT\s+"([^"\\]+(?:\\.[^"\\]*)*)"\s+(.*)$', + re.IGNORECASE, +) +_ASSERT_RE = re.compile(r'^\s*ASSERT\s+"([^"\\]+(?:\\.[^"\\]*)*)"', re.IGNORECASE) +_RETRACT_RE = re.compile( + r'^\s*RETRACT\s+"([^"\\]+(?:\\.[^"\\]*)*)"(?:\s+REASON\s+"([^"\\]*(?:\\.[^"\\]*)*)")?\s*$', + re.IGNORECASE, +) +_CREATE_NODE_RE = re.compile(r'^\s*CREATE\s+NODE\s+"([^"\\]+(?:\\.[^"\\]*)*)"', re.IGNORECASE) +_ENT_FROM_ID_RE = re.compile(r'"(ent:[^"\\]+)"') +_KV_VALUE_RE = re.compile(r'value\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', re.IGNORECASE) +_KV_KIND_RE = re.compile(r'kind\s*=\s*"([^"\\]*(?:\\.[^"\\]*)*)"', re.IGNORECASE) +_KV_CONFIDENCE_RE = re.compile(r'CONFIDENCE\s+([0-9.]+)', re.IGNORECASE) +_KV_SOURCE_RE = re.compile(r'SOURCE\s+"([^"\\]*(?:\\.[^"\\]*)*)"', re.IGNORECASE) + + +def _strip_think(raw: str) -> str: + """Remove reasoning-model `...` wrappers.""" + return _THINK_RE.sub("", raw).strip() + + +def _split_lines(cleaned: str) -> list[str]: + """One statement per line. Drop markdown fences and blanks.""" + out: list[str] = [] + for raw_ln in cleaned.splitlines(): + ln = raw_ln.strip() + if not ln or _FENCE_RE.match(ln): + continue + out.append(ln) + return out + + +def _dedupe_upserts(stmts: list[str]) -> tuple[list[str], list[tuple[str, str]]]: + """Keep the first UPSERT per entity id. Drop later duplicates. + + Duplicate UPSERT NODE with the same id inside a BEGIN/COMMIT batch makes + the rollback snapshot logic trip over itself - entire batch errors out. + Pre-filter here so the transaction never sees the duplicate. + """ + seen: set[str] = set() + kept: list[str] = [] + dropped: list[tuple[str, str]] = [] + for ln in stmts: + m = _UPSERT_RE.match(ln) + if m: + eid = m.group(1) + if eid in seen: + dropped.append((ln, f"duplicate UPSERT of {eid!r}")) + continue + seen.add(eid) + kept.append(ln) + return kept, dropped + + +def _scrape_belief_updates( + executed_lines: list[str], + facts: dict[str, FactState], +) -> None: + """Walk successfully-executed DSL lines and update running fact state. + + Only ASSERT / RETRACT contribute. Other writes ignored. Mutates `facts` + in place. + """ + for line in executed_lines: + m = _ASSERT_LINE_RE.match(line) + if m: + fact_id = m.group(1) + rest = m.group(2) + st = facts.get(fact_id) or FactState(fact_id=fact_id) + km = _KV_KIND_RE.search(rest) + if km: + st.kind = km.group(1) + vm = _KV_VALUE_RE.search(rest) + if vm: + st.value = vm.group(1) + cm = _KV_CONFIDENCE_RE.search(rest) + if cm: + try: + st.confidence = float(cm.group(1)) + except ValueError: + pass + sm = _KV_SOURCE_RE.search(rest) + if sm: + st.source = sm.group(1) + st.retracted = False + st.retract_reason = "" + facts[fact_id] = st + continue + m = _RETRACT_RE.match(line) + if m: + fact_id = m.group(1) + reason = m.group(2) or "" + st = facts.get(fact_id) or FactState(fact_id=fact_id) + st.retracted = True + st.retract_reason = reason + facts[fact_id] = st + + +def _render_known_facts_block(facts: dict[str, FactState], max_facts: int = 40) -> str: + """Format non-retracted facts into a block the LLM reads before the input. + + Placed in the USER message (not the system prompt) to keep the system + prompt byte-identical across calls for llama.cpp KV-cache prefix reuse. + + Retracted facts are hidden so the model cannot re-assert a superseded + belief. If the dict grows beyond `max_facts`, the most-recently-touched + entries win (dict preserves insertion order since Python 3.7). + """ + alive = [f for f in facts.values() if not f.retracted] + if not alive: + return "" + alive = alive[-max_facts:] + lines = [ + "### KNOWN FACTS (reuse these fact_ids; emit RETRACT + ASSERT to update)", + ] + for f in alive: + bits = [f'[{f.fact_id}]'] + if f.kind: + bits.append(f'kind="{f.kind}"') + if f.value: + bits.append(f'value="{f.value}"') + bits.append(f'confidence={f.confidence:.2f}') + if f.source: + bits.append(f'source="{f.source}"') + lines.append(" ".join(bits)) + return "\n".join(lines) + + +# -------------------------------------------------------------------- +# Ingestor +# -------------------------------------------------------------------- + +_DEFAULT_SKILL_PATH = ( + Path(__file__).resolve().parent.parent.parent + / "tools" / "skills" / "graphstore-bonsai-dsl" / "SKILL.md" +) + + +class BonsaiIngestor: + """NL -> DSL via a local llama.cpp GGUF, with correctness guards. + + Parameters + ---------- + model_path : str | Path + Path to a .gguf file. The matching manifest under the same directory + is not required; this class talks to llama.cpp directly. + gs : GraphStore | None + Target store. Required for non-dry-run ingests. Dry-runs don't need one. + skill_path : str | Path | None + Prompt file. Defaults to tools/skills/graphstore-bonsai-dsl/SKILL.md. + n_ctx : int + Context window. 2048 is enough for ~500-token skill + 200-token user + message + 400-token output + headroom. + n_threads : int | None + Physical core count works best on memory-bandwidth-bound CPU inference. + Defaults to os.cpu_count() // 2 via llama.cpp. + chat_format : str + Matches the GGUF. Qwen3-based Bonsai works with 'qwen'. + max_output_tokens : int + Hard cap per call. Generation stops either here or at the model's + natural stop. + temperature : float + Default 0.0 for reproducible DSL. + """ + + # Headroom we leave between (prompt + output) and n_ctx. Below this we + # force a reset so llama.cpp never auto-evicts the skill prefix. + _CTX_HEADROOM = 128 + + def __init__( + self, + model_path: str | Path, + *, + gs: Any | None = None, + skill_path: str | Path | None = None, + n_ctx: int = 2048, + n_threads: int | None = None, + chat_format: str = "qwen", + max_output_tokens: int = 400, + temperature: float = 0.0, + ) -> None: + self._model_path = Path(model_path) + if not self._model_path.exists(): + raise FileNotFoundError(f"bonsai model not found: {self._model_path}") + self._gs = gs + self._skill_path = Path(skill_path) if skill_path else _DEFAULT_SKILL_PATH + self._n_ctx = n_ctx + self._max_output_tokens = max_output_tokens + self._temperature = temperature + self._chat_format = chat_format + self._n_threads = n_threads + + self._skill_text = "" + self._skill_fingerprint = "" + self._system_prompt = "" + self._reload_skill() + + self._llm: Any | None = None + self._lock = threading.Lock() + + # Cross-message belief tracking. fact_id -> FactState. Fed into the + # user message of the next ingest so the model reuses ids. + self._facts: dict[str, FactState] = {} + + # ------------------------------------------------------------------ + # Lifecycle + # ------------------------------------------------------------------ + + def _reload_skill(self) -> None: + """Read skill from disk, compute fingerprint, pin into system prompt. + + Pinning the fingerprint into the prompt means if the file changes on + disk the system-prompt prefix changes too, which naturally invalidates + the llama.cpp prefix-match KV cache without us having to call reset. + """ + if not self._skill_path.exists(): + raise FileNotFoundError(f"bonsai skill not found: {self._skill_path}") + body = self._skill_path.read_text() + if body.startswith("---"): + _, _, body = body.partition("---") + _, _, body = body.partition("---") + body = body.strip() + self._skill_text = body + self._skill_fingerprint = hashlib.sha256(body.encode()).hexdigest()[:12] + self._system_prompt = f"# skill-sha256={self._skill_fingerprint}\n\n{body}" + + def _ensure_llm(self) -> Any: + """Lazy-load the Llama instance on first use.""" + if self._llm is not None: + return self._llm + from llama_cpp import Llama + kwargs: dict[str, Any] = { + "model_path": str(self._model_path), + "n_ctx": self._n_ctx, + "chat_format": self._chat_format, + "verbose": False, + } + if self._n_threads is not None: + kwargs["n_threads"] = self._n_threads + _log.info( + "bonsai: loading %s n_ctx=%d threads=%s chat_format=%s", + self._model_path.name, self._n_ctx, self._n_threads, self._chat_format, + ) + self._llm = Llama(**kwargs) + return self._llm + + def reset(self) -> None: + """Drop the Llama instance so the next call reloads from scratch. + + Use when the skill file changed and you want to force a cold start, + or when KV state is suspected corrupt (e.g. after a thread crash). + Automatic: only called from internal guards. + """ + with self._lock: + self._llm = None + + # ------------------------------------------------------------------ + # Observability + # ------------------------------------------------------------------ + + @property + def skill_fingerprint(self) -> str: + """12-hex-char sha256 prefix of the loaded skill text. Stable across + processes for the same skill bytes. Emitted in every ingest log line.""" + return self._skill_fingerprint + + @property + def facts(self) -> dict[str, FactState]: + """Live fact state accumulated from successful ingests. Read-only view. + + Use reset_facts() to clear. Dry-run ingests don't update this. + """ + return dict(self._facts) + + def reset_facts(self) -> None: + """Clear the running fact state so the next ingest starts fresh.""" + with self._lock: + self._facts.clear() + + # ------------------------------------------------------------------ + # Ingest + # ------------------------------------------------------------------ + + def warmup(self) -> None: + """Process the system prompt once so the skill-prefix KV is warm. + + Optional - the first real ingest pays this cost anyway. Separate + because long-running daemons want the cost up-front, not on the + first user-facing request. + """ + llm = self._ensure_llm() + with self._lock: + llm.create_chat_completion( + messages=[ + {"role": "system", "content": self._system_prompt}, + {"role": "user", "content": "ready"}, + ], + max_tokens=1, + temperature=0.0, + ) + + def ingest(self, text: str, *, dry_run: bool = False) -> IngestResult: + """Convert `text` to DSL statements and (optionally) execute them. + + `dry_run=True` returns the DSL without touching the store - useful + for previewing or building training data without committing. + """ + if not text or not text.strip(): + raise IngestEmpty("input text is empty or whitespace-only") + if not dry_run and self._gs is None: + raise ValueError("ingest requires a GraphStore (pass gs=...) or dry_run=True") + + self._reload_skill() + with self._lock: + return self._ingest_locked(text, dry_run=dry_run) + + def _ingest_locked(self, text: str, *, dry_run: bool) -> IngestResult: + t0 = time.perf_counter() + llm = self._ensure_llm() + + # Compose the user message: prior facts block + user text. Keeps the + # system prompt byte-identical so the skill stays KV-cache warm. + facts_block = _render_known_facts_block(self._facts) + user_msg = f"{facts_block}\n\n{text}" if facts_block else text + + est = self._estimate_tokens(self._system_prompt) + self._estimate_tokens(user_msg) + budget = est + self._max_output_tokens + if budget > self._n_ctx - self._CTX_HEADROOM: + if est + self._max_output_tokens > self._n_ctx - self._CTX_HEADROOM: + raise IngestOverflow( + f"prompt+output ({budget}) exceeds n_ctx-headroom " + f"({self._n_ctx - self._CTX_HEADROOM}); increase n_ctx or " + f"shorten input" + ) + _log.warning("bonsai: KV would overflow, resetting before ingest") + self._llm = None + llm = self._ensure_llm() + + response = llm.create_chat_completion( + messages=[ + {"role": "system", "content": self._system_prompt}, + {"role": "user", "content": user_msg}, + ], + max_tokens=self._max_output_tokens, + temperature=self._temperature, + ) + raw = response["choices"][0]["message"]["content"] or "" + + cleaned = _strip_think(raw) + if not cleaned: + duration_ms = int((time.perf_counter() - t0) * 1000) + self._log_event(text, raw, [], 0, [], [], [], duration_ms, dry_run) + raise IngestEmpty( + "LLM returned empty or -only output. " + f"raw={raw!r}" + ) + + raw_lines = _split_lines(cleaned) + deduped, dup_dropped = _dedupe_upserts(raw_lines) + + from graphstore.dsl.parser import parse as _dsl_parse + + valid: list[str] = [] + rejected: list[tuple[str, str]] = list(dup_dropped) + for ln in deduped: + try: + _dsl_parse(ln) + valid.append(ln) + except Exception as err: + rejected.append((ln, f"parse error: {err}")) + + entities_new: list[str] = [] + beliefs_changed: list[tuple[str, str]] = [] + for ln in valid: + if _UPSERT_RE.match(ln): + m = _ENT_FROM_ID_RE.search(ln) + if m: + entities_new.append(m.group(1)) + elif _ASSERT_RE.match(ln): + m = _ASSERT_RE.match(ln) + if m: + beliefs_changed.append((m.group(1), "assert")) + elif _RETRACT_RE.match(ln): + m = _RETRACT_RE.match(ln) + if m: + beliefs_changed.append((m.group(1), "retract")) + + executed = 0 + executed_lines: list[str] = [] + if not dry_run: + for ln in valid: + try: + self._gs.execute(ln) + executed += 1 + executed_lines.append(ln) + except Exception as err: + rejected.append((ln, f"execute error: {err}")) + # Scrape belief updates so the next ingest sees the current fact + # state. Only lines that actually executed contribute - failed + # ones leave the running state unchanged. + _scrape_belief_updates(executed_lines, self._facts) + + duration_ms = int((time.perf_counter() - t0) * 1000) + self._log_event( + text, raw, valid, executed, rejected, entities_new, + beliefs_changed, duration_ms, dry_run, + ) + return IngestResult( + statements=list(valid), + executed=executed, + rejected=rejected, + entities_new=entities_new, + beliefs_changed=beliefs_changed, + duration_ms=duration_ms, + raw_output=raw, + skill_fingerprint=self._skill_fingerprint, + dry_run=dry_run, + ) + + # ------------------------------------------------------------------ + # Internals + # ------------------------------------------------------------------ + + def _estimate_tokens(self, text: str) -> int: + """Fast char/4 estimate. llama-cpp-python's tokenize() is authoritative + but costs a forward through the vocab table; for budget guards the + cheap estimate is fine and conservative-enough (chars/4 over-counts + for ASCII, under-counts for dense tokens - net neutral).""" + return len(text) // 4 + 8 + + def _log_event( + self, + input_text: str, + raw: str, + valid: list[str], + executed: int, + rejected: list[tuple[str, str]], + entities_new: list[str], + beliefs_changed: list[tuple[str, str]], + duration_ms: int, + dry_run: bool, + ) -> None: + _log.info( + "bonsai.ingest: input_chars=%d raw_chars=%d stmts=%d exec=%d " + "rejected=%d entities=%d beliefs=%d dur_ms=%d skill=%s dry_run=%s", + len(input_text), len(raw), len(valid), executed, len(rejected), + len(entities_new), len(beliefs_changed), duration_ms, + self._skill_fingerprint, dry_run, + ) diff --git a/tests/test_bonsai_ingestor.py b/tests/test_bonsai_ingestor.py new file mode 100644 index 0000000..dfd7f13 --- /dev/null +++ b/tests/test_bonsai_ingestor.py @@ -0,0 +1,327 @@ +"""Unit tests for bonsai_ingestor correctness guards. + +The live LLM path needs the 1.09 GB TQ1_0 GGUF on disk and is skipped by +default. These tests exercise the post-processing and guard logic without +touching llama.cpp. +""" +from __future__ import annotations + +import hashlib +from pathlib import Path + +import pytest + +from graphstore.bonsai_ingestor import ( + BonsaiIngestor, + FactState, + IngestEmpty, + IngestOverflow, + IngestResult, + _dedupe_upserts, + _render_known_facts_block, + _scrape_belief_updates, + _split_lines, + _strip_think, +) + + +# -------------------------------------------------------------------- +# Post-processing helpers +# -------------------------------------------------------------------- + +def test_strip_think_removes_single_block(): + out = _strip_think("reasoningCREATE NODE \"x\" kind = \"k\"") + assert "think" not in out.lower() + assert "CREATE NODE" in out + + +def test_strip_think_removes_multiple_blocks(): + inp = "aline1bline2" + assert _strip_think(inp) == "line1line2" + + +def test_strip_think_empty_on_only_think(): + assert _strip_think("foo") == "" + + +def test_split_lines_drops_fences_and_blanks(): + inp = "```dsl\nCREATE NODE \"a\" kind = \"k\"\n\n```\nCREATE NODE \"b\" kind = \"k\"" + lines = _split_lines(inp) + assert lines == [ + 'CREATE NODE "a" kind = "k"', + 'CREATE NODE "b" kind = "k"', + ] + + +def test_dedupe_upserts_keeps_first_drops_later(): + stmts = [ + 'UPSERT NODE "ent:priya" kind = "entity" name = "Priya"', + 'UPSERT NODE "ent:openai" kind = "entity" name = "OpenAI"', + 'UPSERT NODE "ent:priya" kind = "entity" name = "Priya"', + 'CREATE EDGE "m:0" -> "ent:priya" kind = "mentions"', + ] + kept, dropped = _dedupe_upserts(stmts) + assert len(kept) == 3 + assert 'ent:priya' in kept[0] + assert 'ent:openai' in kept[1] + assert 'CREATE EDGE' in kept[2] + assert len(dropped) == 1 + assert "duplicate" in dropped[0][1] + + +def test_dedupe_upserts_passes_non_upsert(): + stmts = ['CREATE NODE "m:0" kind = "message"', 'CREATE EDGE "a" -> "b" kind = "k"'] + kept, dropped = _dedupe_upserts(stmts) + assert kept == stmts + assert dropped == [] + + +# -------------------------------------------------------------------- +# Skill fingerprint +# -------------------------------------------------------------------- + +def test_skill_fingerprint_is_stable_across_instances(tmp_path: Path): + skill = tmp_path / "skill.md" + skill.write_text("content A") + model = tmp_path / "fake.gguf" + model.write_bytes(b"ignored; no llama init yet") + + ing1 = BonsaiIngestor(model_path=model, skill_path=skill) + ing2 = BonsaiIngestor(model_path=model, skill_path=skill) + assert ing1.skill_fingerprint == ing2.skill_fingerprint + + expected = hashlib.sha256(b"content A").hexdigest()[:12] + assert ing1.skill_fingerprint == expected + + +def test_skill_fingerprint_changes_on_skill_edit(tmp_path: Path): + skill = tmp_path / "skill.md" + skill.write_text("v1") + model = tmp_path / "fake.gguf" + model.write_bytes(b"") + + ing = BonsaiIngestor(model_path=model, skill_path=skill) + fp_v1 = ing.skill_fingerprint + + skill.write_text("v2") + ing._reload_skill() + assert ing.skill_fingerprint != fp_v1 + + +def test_skill_fingerprint_pinned_into_system_prompt(tmp_path: Path): + skill = tmp_path / "skill.md" + skill.write_text("rule body") + model = tmp_path / "fake.gguf" + model.write_bytes(b"") + + ing = BonsaiIngestor(model_path=model, skill_path=skill) + assert f"skill-sha256={ing.skill_fingerprint}" in ing._system_prompt + assert "rule body" in ing._system_prompt + + +# -------------------------------------------------------------------- +# Input validation +# -------------------------------------------------------------------- + +def test_empty_input_raises_ingest_empty(tmp_path: Path): + skill = tmp_path / "skill.md" + skill.write_text("any") + model = tmp_path / "fake.gguf" + model.write_bytes(b"") + + ing = BonsaiIngestor(model_path=model, skill_path=skill) + with pytest.raises(IngestEmpty): + ing.ingest("") + with pytest.raises(IngestEmpty): + ing.ingest(" \n\t ") + + +def test_non_dry_run_without_store_raises(tmp_path: Path): + skill = tmp_path / "skill.md" + skill.write_text("any") + model = tmp_path / "fake.gguf" + model.write_bytes(b"") + + ing = BonsaiIngestor(model_path=model, skill_path=skill) + with pytest.raises(ValueError, match="requires a GraphStore"): + ing.ingest("hello") + + +def test_missing_model_file_raises(tmp_path: Path): + skill = tmp_path / "skill.md" + skill.write_text("any") + with pytest.raises(FileNotFoundError): + BonsaiIngestor(model_path=tmp_path / "nope.gguf", skill_path=skill) + + +def test_missing_skill_file_raises(tmp_path: Path): + model = tmp_path / "fake.gguf" + model.write_bytes(b"") + with pytest.raises(FileNotFoundError): + BonsaiIngestor(model_path=model, skill_path=tmp_path / "nope.md") + + +# -------------------------------------------------------------------- +# Frontmatter strip +# -------------------------------------------------------------------- + +def test_yaml_frontmatter_stripped_from_skill(tmp_path: Path): + skill = tmp_path / "skill.md" + skill.write_text("---\nname: x\n---\n\nactual rules") + model = tmp_path / "fake.gguf" + model.write_bytes(b"") + + ing = BonsaiIngestor(model_path=model, skill_path=skill) + assert "name: x" not in ing._skill_text + assert "actual rules" in ing._skill_text + + +# -------------------------------------------------------------------- +# IngestResult shape +# -------------------------------------------------------------------- + +def test_ingest_result_defaults(): + r = IngestResult() + assert r.statements == [] + assert r.executed == 0 + assert r.rejected == [] + assert r.entities_new == [] + assert r.beliefs_changed == [] + assert r.duration_ms == 0 + assert r.raw_output == "" + assert r.skill_fingerprint == "" + assert r.dry_run is False + + +# -------------------------------------------------------------------- +# Fact state tracking (cross-message belief identity) +# -------------------------------------------------------------------- + +def test_scrape_single_assert_creates_factstate(): + facts: dict[str, FactState] = {} + _scrape_belief_updates( + ['ASSERT "fact:color" kind = "belief" value = "blue" CONFIDENCE 0.9 SOURCE "m:0"'], + facts, + ) + assert "fact:color" in facts + st = facts["fact:color"] + assert st.kind == "belief" + assert st.value == "blue" + assert st.confidence == 0.9 + assert st.source == "m:0" + assert st.retracted is False + + +def test_scrape_assert_then_retract_marks_retracted(): + facts: dict[str, FactState] = {} + _scrape_belief_updates( + ['ASSERT "fact:x" kind = "belief" value = "v" CONFIDENCE 0.9 SOURCE "m:0"'], + facts, + ) + _scrape_belief_updates( + ['RETRACT "fact:x" REASON "wrong"'], + facts, + ) + assert facts["fact:x"].retracted is True + assert facts["fact:x"].retract_reason == "wrong" + + +def test_scrape_retract_then_reassert_un_retracts(): + facts: dict[str, FactState] = {} + _scrape_belief_updates( + ['ASSERT "fact:x" kind = "belief" value = "old" CONFIDENCE 0.9 SOURCE "m:0"', + 'RETRACT "fact:x" REASON "wrong"', + 'ASSERT "fact:x" kind = "belief" value = "new" CONFIDENCE 0.9 SOURCE "m:1"'], + facts, + ) + st = facts["fact:x"] + assert st.value == "new" + assert st.retracted is False + assert st.retract_reason == "" + + +def test_scrape_ignores_non_belief_lines(): + facts: dict[str, FactState] = {} + _scrape_belief_updates( + ['CREATE NODE "m:0" kind = "message"', + 'UPSERT NODE "ent:x" kind = "entity" name = "X"', + 'CREATE EDGE "a" -> "b" kind = "mentions"'], + facts, + ) + assert facts == {} + + +def test_render_known_facts_block_empty_when_no_facts(): + assert _render_known_facts_block({}) == "" + + +def test_render_known_facts_block_hides_retracted(): + facts = { + "fact:a": FactState(fact_id="fact:a", kind="belief", value="alive", confidence=0.9), + "fact:b": FactState(fact_id="fact:b", kind="belief", value="dead", retracted=True), + } + block = _render_known_facts_block(facts) + assert "fact:a" in block + assert "fact:b" not in block + assert "alive" in block + assert "dead" not in block + + +def test_render_known_facts_block_formats_each_fact(): + facts = { + "fact:color": FactState( + fact_id="fact:color", + kind="belief", + value="blue", + confidence=0.9, + source="m:s1:0", + ), + } + block = _render_known_facts_block(facts) + assert "[fact:color]" in block + assert 'kind="belief"' in block + assert 'value="blue"' in block + assert "confidence=0.90" in block + assert 'source="m:s1:0"' in block + assert "KNOWN FACTS" in block + + +def test_render_known_facts_trims_to_max_facts(): + facts = { + f"fact:{i}": FactState(fact_id=f"fact:{i}", value=str(i), confidence=0.9) + for i in range(60) + } + block = _render_known_facts_block(facts, max_facts=5) + for i in range(55, 60): + assert f"[fact:{i}]" in block + for i in range(0, 55): + assert f"[fact:{i}]" not in block + + +def test_ingestor_facts_property_returns_copy(tmp_path: Path): + skill = tmp_path / "skill.md" + skill.write_text("any") + model = tmp_path / "fake.gguf" + model.write_bytes(b"") + + ing = BonsaiIngestor(model_path=model, skill_path=skill) + assert ing.facts == {} + # Simulate state set by an earlier ingest: + ing._facts["fact:x"] = FactState(fact_id="fact:x", value="v") + snapshot = ing.facts + assert "fact:x" in snapshot + # Mutating the snapshot should not affect internal state + snapshot["fact:y"] = FactState(fact_id="fact:y") + assert "fact:y" not in ing._facts + + +def test_ingestor_reset_facts_clears_state(tmp_path: Path): + skill = tmp_path / "skill.md" + skill.write_text("any") + model = tmp_path / "fake.gguf" + model.write_bytes(b"") + + ing = BonsaiIngestor(model_path=model, skill_path=skill) + ing._facts["fact:x"] = FactState(fact_id="fact:x", value="v") + ing.reset_facts() + assert ing._facts == {} diff --git a/tools/skills/graphstore-bonsai-dsl/SKILL.md b/tools/skills/graphstore-bonsai-dsl/SKILL.md new file mode 100644 index 0000000..17fe6b0 --- /dev/null +++ b/tools/skills/graphstore-bonsai-dsl/SKILL.md @@ -0,0 +1,90 @@ +--- +name: graphstore-bonsai-dsl +description: Minimal ingest-only DSL cheat sheet for tiny local LLMs (Ternary-Bonsai 1.7B / 4B / 8B GGUF). Uses few-shot examples instead of reference text. Pair with GBNF grammar-constrained decoding for guaranteed-parseable output. +compatibility: graphstore >= 0.4.0 +metadata: + author: orkait + version: "2.0" + target_tokens: 400 +--- + +Convert one user message into GraphStore DSL. Output **only** DSL lines. No prose, no markdown, no `` tags. + +Valid verbs: +``` +CREATE NODE "id" kind = "K" f = v ... [DOCUMENT "text"] +UPSERT NODE "id" kind = "K" name = "N" +CREATE EDGE "a" -> "b" kind = "K" +ASSERT "fact:X" kind = "belief" value = "V" CONFIDENCE 0.9 SOURCE "msg:id" +RETRACT "fact:X" REASON "why" +``` + +Escape `"` as `\"` inside strings. + +Emit each entity once per message. Entity id = `ent:`. Belief id = `fact:`. + +Required per message (copy this pattern): +1. One `CREATE NODE "m::" kind = "message" session = "..." role = "..." DOCUMENT "..."` +2. For every person / org / named thing in the text: one `UPSERT NODE "ent:" kind = "entity" name = "..."` +3. For every entity from step 2: one `CREATE EDGE "m::" -> "ent:" kind = "mentions"` (always emit both the UPSERT and the matching EDGE, not one without the other) +4. If the user states a personal preference, belief, or fact about themselves, add one `ASSERT "fact:" ...` +5. If the new message contradicts a KNOWN FACT above, add one `RETRACT "" REASON "..."` before its `ASSERT` + +**Fact id reuse (critical).** If the user turn starts with `### KNOWN FACTS`, those lines list existing beliefs that already have ids in the store. When the new message updates or contradicts a concept shown there, reuse the same `fact:` from the block. Do NOT coin a new fact_id for the same underlying belief. On update emit `RETRACT "" REASON "..."` followed by `ASSERT "" ... value = "" ...`. + +--- + +**Input:** +Session s1, msg m:s1:0, user: "Kailash joined OpenAI as DB engineer." + +**Output:** +``` +CREATE NODE "m:s1:0" kind = "message" session = "s1" role = "user" DOCUMENT "Kailash joined OpenAI as DB engineer." +UPSERT NODE "ent:kailash" kind = "entity" name = "Kailash" +UPSERT NODE "ent:openai" kind = "entity" name = "OpenAI" +CREATE EDGE "m:s1:0" -> "ent:kailash" kind = "mentions" +CREATE EDGE "m:s1:0" -> "ent:openai" kind = "mentions" +``` + +--- + +**Input:** +Session s2, msg m:s2:0, user: "My favorite color is blue." + +**Output:** +``` +CREATE NODE "m:s2:0" kind = "message" session = "s2" role = "user" DOCUMENT "My favorite color is blue." +ASSERT "fact:favorite_color" kind = "belief" value = "blue" CONFIDENCE 0.9 SOURCE "m:s2:0" +``` + +--- + +**Input:** +Session s2, msg m:s2:1, user: "Actually my favorite color is green now." + +**Output:** +``` +CREATE NODE "m:s2:1" kind = "message" session = "s2" role = "user" DOCUMENT "Actually my favorite color is green now." +RETRACT "fact:favorite_color" REASON "superseded by m:s2:1" +ASSERT "fact:favorite_color" kind = "belief" value = "green" CONFIDENCE 0.9 SOURCE "m:s2:1" +``` + +--- + +**Input with known facts (reuse the existing fact_id, do not invent a new one):** + +``` +### KNOWN FACTS (reuse these fact_ids; emit RETRACT + ASSERT to update) +[fact:favorite_drink] kind="belief" value="coffee" confidence=0.90 source="m:s3:0" + +Session s3, msg m:s3:1, user: "Actually I prefer tea now." +``` + +**Output:** +``` +CREATE NODE "m:s3:1" kind = "message" session = "s3" role = "user" DOCUMENT "Actually I prefer tea now." +RETRACT "fact:favorite_drink" REASON "superseded by m:s3:1" +ASSERT "fact:favorite_drink" kind = "belief" value = "tea" CONFIDENCE 0.9 SOURCE "m:s3:1" +``` + +Wrong: coining `fact:preference` or `fact:drink_pref` when `fact:favorite_drink` already exists in KNOWN FACTS. diff --git a/tools/skills/graphstore-bonsai-dsl/grammar.gbnf b/tools/skills/graphstore-bonsai-dsl/grammar.gbnf new file mode 100644 index 0000000..0ae1175 --- /dev/null +++ b/tools/skills/graphstore-bonsai-dsl/grammar.gbnf @@ -0,0 +1,29 @@ +# GBNF for graphstore ingest-only DSL. +# Locks the model to emit ONLY valid statements from the ingest subset. +# Kills tags, markdown fences, duplicate whitespace by construction. +# +# Use with llama_cpp.LlamaGrammar.from_string(open(...).read()) and pass +# `grammar=` to create_chat_completion. Works with any Qwen/Bonsai model. + +root ::= stmt ( "\n" stmt )* + +stmt ::= create-node | upsert-node | create-edge | assert-stmt | retract-stmt + +create-node ::= "CREATE NODE " id " kind = " string field* document? +upsert-node ::= "UPSERT NODE " id " kind = " string " name = " string +create-edge ::= "CREATE EDGE " id " -> " id " kind = " string +assert-stmt ::= "ASSERT " id " kind = \"belief\" value = " string " CONFIDENCE " confidence " SOURCE " id +retract-stmt ::= "RETRACT " id " REASON " string + +field ::= " " key " = " value +key ::= [a-z] [a-z0-9_]* +value ::= string | number + +document ::= " DOCUMENT " string + +id ::= string +string ::= "\"" char* "\"" +char ::= [^"\\] | "\\\"" | "\\\\" + +number ::= [0-9]+ ("." [0-9]+)? +confidence ::= "0" "." [0-9] [0-9]?