From 3bebacaa9e87884113b61e98225f5357ef75ae53 Mon Sep 17 00:00:00 2001 From: Kailas Mahavarkar <66670953+KailasMahavarkar@users.noreply.github.com> Date: Mon, 20 Apr 2026 15:27:28 +0530 Subject: [PATCH] chore(config): dedup CONFIG_PATH + fix misleading "legacy" labels Two unrelated config-layer cleanups: 1. One CONFIG_PATH, not two tools/autoresearch/providers.py and tools/autoresearch/run_loop.py each computed Path(__file__).parent / "config.json" independently (under two different names, _CONFIG_PATH and CONFIG_FILE). Promoted to providers.CONFIG_PATH and aliased as CONFIG_FILE in run_loop via import so both files point at the same constant now. Also added a docstring on providers.load_config clarifying it is the one-shot variant (use run_loop.load_config for the autoresearch long-running mtime-cached + migrated variant). 2. "legacy kwargs" labels were wrong config.merge_kwargs and store.py both called GraphStore(embedder=...), ingest_root=..., vault=..., retention=... "legacy" kwargs. They are in fact the primary public API - every test + docs page uses them. Relabeled as "top-level convenience kwargs" so future reviewers do not incorrectly conclude they are removable. No functional change. 27 tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/graphstore/config.py | 18 ++++++++---------- src/graphstore/store.py | 4 ++-- tools/autoresearch/providers.py | 12 +++++++++--- tools/autoresearch/run_loop.py | 2 +- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/graphstore/config.py b/src/graphstore/config.py index 5ce4663..e2351a0 100644 --- a/src/graphstore/config.py +++ b/src/graphstore/config.py @@ -344,17 +344,19 @@ def apply_env_overrides(config: GraphStoreConfig) -> GraphStoreConfig: def merge_kwargs(config: GraphStoreConfig, **kwargs) -> GraphStoreConfig: - """Override config fields from constructor kwargs. + """Override config fields from GraphStore(...) constructor kwargs. - Supports flat shortcuts for common tuning knobs: - ceiling_mb, eviction_target_ratio, remember_weights, recall_decay, - search_oversample, similarity_threshold, duplicate_threshold, fts_tokenizer + Two shapes of kwargs are accepted: - Plus legacy kwargs: embedder, ingest_root, vault, retention (dict). + 1. Flat shortcuts for tuning knobs (see _KWARG_SHORTCUTS), e.g. + ceiling_mb, remember_weights, recall_decay, search_oversample. + 2. Top-level convenience kwargs that map to multi-field config + updates: embedder, ingest_root, vault, retention (dict). + These are the primary public API - GraphStore(embedder=...) is + how users construct the store. """ updates: dict[str, dict[str, object]] = {} - # Flat shortcuts -> section overrides for kwarg_name, (section, field) in _KWARG_SHORTCUTS.items(): if kwarg_name in kwargs: val = kwargs[kwarg_name] @@ -362,7 +364,6 @@ def merge_kwargs(config: GraphStoreConfig, **kwargs) -> GraphStoreConfig: if val != current_val: updates.setdefault(section, {})[field] = val - # Legacy: embedder (string or object -> vector.embedder name) if "embedder" in kwargs: emb = kwargs["embedder"] emb_name = emb if isinstance(emb, str) else "custom" @@ -370,16 +371,13 @@ def merge_kwargs(config: GraphStoreConfig, **kwargs) -> GraphStoreConfig: emb_name = "none" updates.setdefault("vector", {})["embedder"] = emb_name - # Legacy: ingest_root -> server.ingest_root if "ingest_root" in kwargs and kwargs["ingest_root"] is not None: updates.setdefault("server", {})["ingest_root"] = kwargs["ingest_root"] - # Legacy: vault -> vault.enabled + vault.path if "vault" in kwargs and kwargs["vault"] is not None: updates.setdefault("vault", {})["enabled"] = True updates["vault"]["path"] = kwargs["vault"] - # Legacy: retention (dict) if "retention" in kwargs and kwargs["retention"] is not None: r = kwargs["retention"] for key in ("blob_warm_days", "blob_archive_days", "blob_delete_days"): diff --git a/src/graphstore/store.py b/src/graphstore/store.py index 7556dfb..e2d7b17 100644 --- a/src/graphstore/store.py +++ b/src/graphstore/store.py @@ -130,8 +130,8 @@ def __init__(self, path: str | None = None, ceiling_mb=_UNSET, # Layer 4: constructor kwargs (highest priority). Every kwarg listed # here must also appear in config._KWARG_SHORTCUTS or be one of the - # legacy keys (embedder, ingest_root, vault, retention, auto_optimize, - # enable_wal) that merge_kwargs handles separately. + # top-level convenience keys (embedder, ingest_root, vault, retention, + # auto_optimize, enable_wal) that merge_kwargs handles separately. _kwarg_names = ( "ceiling_mb", "embedder", "ingest_root", "vault", "retention", "remember_weights", "recall_decay", "search_oversample", diff --git a/tools/autoresearch/providers.py b/tools/autoresearch/providers.py index 8c0aa93..620602a 100644 --- a/tools/autoresearch/providers.py +++ b/tools/autoresearch/providers.py @@ -10,12 +10,18 @@ import os from pathlib import Path -_CONFIG_PATH = Path(__file__).resolve().parent / "config.json" +CONFIG_PATH = Path(__file__).resolve().parent / "config.json" def load_config() -> dict: - if _CONFIG_PATH.exists(): - return json.loads(_CONFIG_PATH.read_text()) + """One-shot read of config.json. No caching, no migration. + + Use when you want the providers section now. For the long-running + autoresearch loop use run_loop.load_config which caches + applies + schema migration. + """ + if CONFIG_PATH.exists(): + return json.loads(CONFIG_PATH.read_text()) return {} diff --git a/tools/autoresearch/run_loop.py b/tools/autoresearch/run_loop.py index 8691888..59e2108 100644 --- a/tools/autoresearch/run_loop.py +++ b/tools/autoresearch/run_loop.py @@ -33,7 +33,7 @@ REPO_ROOT = Path(__file__).resolve().parent.parent.parent ALGO_DIR = REPO_ROOT / "src" / "graphstore" / "algos" VENV_PYTHON = REPO_ROOT / ".venv" / "bin" / "python" -CONFIG_FILE = Path(__file__).resolve().parent / "config.json" +from tools.autoresearch.providers import CONFIG_PATH as CONFIG_FILE PROGRAM_FILE = Path(__file__).resolve().parent / "program.md" _current_algo: str = ""