Hugo0 · Hugo0 · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026
diff --git a/tests/test_definitions.py b/tests/test_definitions.py
@@ -322,6 +322,69 @@ def test_no_cache_dir_still_works(self):
         assert result is not None
         assert result["definition"] == "A building"
 
+    def test_kaikki_fallback_when_llm_returns_none(self):
+        """Kaikki is used as fallback when LLM returns None."""
+        kaikki_result = {
+            "definition": "A native definition",
+            "part_of_speech": None,
+            "source": "kaikki",
+            "url": None,
+        }
+        with patch("definitions._call_llm_definition", return_value=None):
+            with patch("definitions.lookup_kaikki_native", return_value=kaikki_result):
+                result = fetch_definition("word", "nl", cache_dir=None)
+
+        assert result is not None
+        assert result["source"] == "kaikki"
+        assert result["definition"] == "A native definition"
+
+    def test_kaikki_english_fallback_when_native_missing(self):
+        """Kaikki English is used when both LLM and kaikki native return None."""
+        kaikki_en_result = {
+            "definition": "An English gloss",
+            "part_of_speech": None,
+            "source": "kaikki-en",
+            "url": None,
+        }
+        with patch("definitions._call_llm_definition", return_value=None):
+            with patch("definitions.lookup_kaikki_native", return_value=None):
+                with patch("definitions.lookup_kaikki_english", return_value=kaikki_en_result):
+                    result = fetch_definition("word", "ro", cache_dir=None)
+
+        assert result is not None
+        assert result["source"] == "kaikki-en"
+
+    def test_kaikki_fallback_is_cached(self, tmp_path):
+        """Kaikki fallback results get written to disk cache."""
+        cache_dir = str(tmp_path)
+        kaikki_result = {
+            "definition": "A native definition",
+            "part_of_speech": None,
+            "source": "kaikki",
+            "url": None,
+        }
+        with patch("definitions._call_llm_definition", return_value=None):
+            with patch("definitions.lookup_kaikki_native", return_value=kaikki_result):
+                fetch_definition("word", "nl", cache_dir=cache_dir)
+
+        cache_file = tmp_path / "nl" / "word.json"
+        assert cache_file.exists()
+        cached = json.loads(cache_file.read_text())
+        assert cached["source"] == "kaikki"
+
+    def test_negative_cache_only_when_all_tiers_fail(self, tmp_path):
+        """Negative cache is written only when LLM AND kaikki both fail."""
+        cache_dir = str(tmp_path)
+        with patch("definitions._call_llm_definition", return_value=None):
+            with patch("definitions.lookup_kaikki_native", return_value=None):
+                with patch("definitions.lookup_kaikki_english", return_value=None):
+                    fetch_definition("xyzzy", "zz", cache_dir=cache_dir)
+
+        cache_file = tmp_path / "zz" / "xyzzy.json"
+        assert cache_file.exists()
+        cached = json.loads(cache_file.read_text())
+        assert cached["not_found"] is True
+
 
 # ---------------------------------------------------------------------------
 # LLM_LANG_NAMES coverage

diff --git a/webapp/definitions.py b/webapp/definitions.py
@@ -1,14 +1,13 @@
 """
 Definition fetching for Wordle Global.
 
-Simple 2-tier system: disk cache → LLM (GPT-5.2).
+3-tier system: disk cache → LLM (GPT-5.2) → kaikki (offline Wiktionary).
 Definitions are pre-generated daily via scripts/pregenerate_definitions.py.
 """
 
 import json
 import logging
 import os
-import re
 import time
 import urllib.parse
 import urllib.request as urlreq
@@ -41,36 +40,41 @@ def _load_kaikki_file(cache_key, file_path):
     return _kaikki_cache[cache_key]
 
 
-def lookup_kaikki_native(word, lang_code):
-    """Look up a word in native-language kaikki definitions."""
-    defs = _load_kaikki_file(
-        f"{lang_code}_native", os.path.join(_DEFINITIONS_DIR, f"{lang_code}.json")
-    )
+def _lookup_kaikki(word, lang_code, variant):
+    """Look up a word in kaikki definitions.
+
+    Args:
+        variant: "native" for native-language defs, "en" for English glosses.
+    """
+    if variant == "native":
+        cache_key = f"{lang_code}_native"
+        file_name = f"{lang_code}.json"
+        source = "kaikki"
+    else:
+        cache_key = f"{lang_code}_en"
+        file_name = f"{lang_code}_en.json"
+        source = "kaikki-en"
+
+    defs = _load_kaikki_file(cache_key, os.path.join(_DEFINITIONS_DIR, file_name))
     definition = defs.get(word.lower())
     if definition:
         return {
             "definition": definition,
             "part_of_speech": None,
-            "source": "kaikki",
-            "url": None,
+            "source": source,
+            "url": _wiktionary_url(word, lang_code),
         }
     return None
 
 
+def lookup_kaikki_native(word, lang_code):
+    """Look up a word in native-language kaikki definitions."""
+    return _lookup_kaikki(word, lang_code, "native")
+
+
 def lookup_kaikki_english(word, lang_code):
     """Look up a word in English-gloss kaikki definitions."""
-    defs = _load_kaikki_file(
-        f"{lang_code}_en", os.path.join(_DEFINITIONS_DIR, f"{lang_code}_en.json")
-    )
-    definition = defs.get(word.lower())
-    if definition:
-        return {
-            "definition": definition,
-            "part_of_speech": None,
-            "source": "kaikki-en",
-            "url": None,
-        }
-    return None
+    return _lookup_kaikki(word, lang_code, "en")
 
 
 # ---------------------------------------------------------------------------
@@ -87,11 +91,6 @@ def _wiktionary_url(word, lang_code):
     return f"https://{wikt_lang}.wiktionary.org/wiki/{urllib.parse.quote(word)}"
 
 
-def strip_html(text):
-    """Strip HTML tags from a string."""
-    return re.sub(r"<[^>]+>", "", text).strip()
-
-
 # ---------------------------------------------------------------------------
 # LLM definition generation (GPT-5.2)
 # ---------------------------------------------------------------------------
@@ -235,14 +234,14 @@ def _call_llm_definition(word, lang_code):
                 )
                 return None
 
+            def_en = definition_en[:300]
+            def_native = ((definition_native or definition_en))[:300]
             wikt_url = _wiktionary_url(word, lang_code)
             return {
-                # New fields
-                "definition_native": (definition_native or definition_en)[:300],
-                "definition_en": definition_en[:300],
+                "definition_native": def_native,
+                "definition_en": def_en,
+                "definition": def_en,  # backward compat
                 "confidence": confidence,
-                # Backward-compatible fields
-                "definition": definition_en[:300],
                 "part_of_speech": result.get("part_of_speech"),
                 "source": "llm",
                 "url": wikt_url,
@@ -265,7 +264,7 @@ def _call_llm_definition(word, lang_code):
 
 
 def fetch_definition(word, lang_code, cache_dir=None, skip_negative_cache=False):
-    """Fetch a word definition. 2-tier: disk cache → LLM.
+    """Fetch a word definition. 3-tier: disk cache → LLM → kaikki.
 
     Args:
         word: The word to define.
@@ -305,6 +304,10 @@ def fetch_definition(word, lang_code, cache_dir=None, skip_negative_cache=False)
     # --- Tier 2: LLM ---
     result = _call_llm_definition(word, lang_code)
 
+    # --- Tier 3: Kaikki fallback (offline Wiktionary data) ---
+    if not result:
+        result = lookup_kaikki_native(word, lang_code) or lookup_kaikki_english(word, lang_code)
+
     # Cache result (including negative results)
     if lang_cache_dir:
         try: