From 8ee5697695eef90c72b4976a3969122423498356 Mon Sep 17 00:00:00 2001 From: Hugo Montenegro Date: Thu, 26 Feb 2026 14:43:52 +0000 Subject: [PATCH] fix: use kaikki pre-built definitions in words hub and word pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The words hub (//words) and individual word pages (//word/) only read from the disk cache for definitions, missing the 206K pre-built kaikki definitions that exist in memory. This caused most words on the words hub to show no definition (e.g. Spanish had 3/7405 cached). Now both routes fall back to kaikki native → kaikki English lookups on cache miss. These are in-memory dict lookups with zero latency. Also adds 5 zero-coverage languages (pau, ie, rw, tlh, qya) to the LLM fallback allowlist — they had no kaikki files, UNRELIABLE parser confidence, AND were missing from the LLM allowlist, so every definition request returned None. --- webapp/app.py | 18 +++++++++++++++--- webapp/wiktionary.py | 5 +++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/webapp/app.py b/webapp/app.py index 4bde14b..de5ed00 100644 --- a/webapp/app.py +++ b/webapp/app.py @@ -20,7 +20,11 @@ import urllib.request as urlreq import logging from pathlib import Path -from wiktionary import fetch_definition_cached as _fetch_definition_cached_impl +from wiktionary import ( + fetch_definition_cached as _fetch_definition_cached_impl, + lookup_kaikki_native, + lookup_kaikki_english, +) # Load .env file if it exists (for local development) _env_path = Path(__file__).resolve().parent.parent / ".env" @@ -1160,7 +1164,7 @@ def language_words_hub(lang_code): word = get_word_for_day(lang_code, day_idx) word_date = idx_to_date(day_idx) - # Load cached definition (fast disk read) + # Load definition: disk cache first, then kaikki pre-built definition = None def_path = os.path.join(WORD_DEFS_DIR, lang_code, f"{word.lower()}.json") if os.path.exists(def_path): @@ -1171,6 +1175,10 @@ def language_words_hub(lang_code): definition = loaded except Exception: pass + if not definition: + definition = lookup_kaikki_native(word, lang_code) + if not definition: + definition = lookup_kaikki_english(word, lang_code) word_stats = _load_word_stats(lang_code, day_idx) @@ -1407,7 +1415,7 @@ def word_page(lang_code, day_idx): lang_name = config.get("name", lang_code) lang_name_native = config.get("name_native", lang_name) - # Read cached definition if available (fast disk read, no HTTP) + # Read definition: disk cache first, then kaikki pre-built definition = None cache_path = os.path.join(WORD_DEFS_DIR, lang_code, f"{word.lower()}.json") if os.path.exists(cache_path): @@ -1417,6 +1425,10 @@ def word_page(lang_code, day_idx): definition = loaded if loaded else None except Exception: pass + if not definition: + definition = lookup_kaikki_native(word, lang_code) + if not definition: + definition = lookup_kaikki_english(word, lang_code) # Map language code to Wiktionary subdomain wikt_lang_map = {"nb": "no", "nn": "no", "hyw": "hy", "ckb": "ku"} diff --git a/webapp/wiktionary.py b/webapp/wiktionary.py index 19a3f54..641e4d8 100644 --- a/webapp/wiktionary.py +++ b/webapp/wiktionary.py @@ -741,6 +741,11 @@ def _follow_form_of(definition, lang_code): "mt": "Maltese", "hyw": "Western Armenian", "ckb": "Central Kurdish", + "pau": "Palauan", + "ie": "Interlingue", + "rw": "Kinyarwanda", + "tlh": "Klingon", + "qya": "Quenya", }