Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,4 @@ INFO_REQUEST_LIMIT=10
INFO_REQUEST_CONTEXT_LINES=5
# INFO_REQUEST_EXPLAIN_DEFAULT=0
# INFO_REQUEST_RELATIONSHIPS=0
COMMIT_VECTOR_SEARCH=0
16 changes: 16 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ FASTMCP_HTTP_PORT=8002
FASTMCP_HTTP_HEALTH_PORT=18002
FASTMCP_INDEXER_HTTP_PORT=8003
FASTMCP_INDEXER_HTTP_HEALTH_PORT=18003
# MCP_INDEXER_URL=http://localhost:8003/mcp


# Optional: local cross-encoder reranker (ONNX)
Expand Down Expand Up @@ -161,6 +162,7 @@ REFRAG_DECODER_MODE=prompt # prompt|soft

# GLM_API_BASE=https://api.z.ai/api/coding/paas/v4/
# GLM_MODEL=glm-4.6
# GLM_API_KEY=your_glm_api_key_here

# GPU Performance Toggle
# Set to 1 to use native GPU-accelerated server on localhost:8081
Expand Down Expand Up @@ -201,6 +203,19 @@ SMART_SYMBOL_REINDEXING=0
# INDEX_UPSERT_BACKOFF=0.5
# Debounce file events to coalesce bursts
# WATCH_DEBOUNCE_SECS=1.5
# Optional fs metadata fast-path for unchanged files (skips re-reading files
# when size/mtime match cache.json in the same workspace).
# INDEX_FS_FASTPATH=0
# Optional 2-phase pseudo/tag mode (disabled by default).
# When enabled, indexer/watcher write base-only vectors and a background
# backfill worker adds pseudo/tags via Qdrant.
# PSEUDO_BACKFILL_ENABLED=1
# PSEUDO_BACKFILL_DEBUG=0
# PSEUDO_BACKFILL_TICK_SECS=60
# PSEUDO_BACKFILL_MAX_POINTS=256

# Development Remote Upload Configuration
# HOST_INDEX_PATH=./dev-workspace

# Remote upload git history (used by upload clients)
# Max number of commits to include per bundle (0 disables git history)
Expand All @@ -209,6 +224,7 @@ SMART_SYMBOL_REINDEXING=0
# REMOTE_UPLOAD_GIT_SINCE=
# Enable commit lineage goals for indexing
REFRAG_COMMIT_DESCRIBE=1
COMMIT_VECTOR_SEARCH=0

STRICT_MEMORY_RESTORE=0

Expand Down
42 changes: 42 additions & 0 deletions deploy/kubernetes/indexer-services.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@ spec:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
initContainers:
- name: wait-for-qdrant
image: context-engine-indexer-service
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
- /app/scripts/wait-for-qdrant.sh
env:
- name: QDRANT_URL
valueFrom:
configMapKeyRef:
name: context-engine-config
key: QDRANT_URL
containers:
- name: watcher
image: context-engine-indexer-service
Expand Down Expand Up @@ -130,6 +144,20 @@ spec:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
initContainers:
- name: wait-for-qdrant
image: context-engine-indexer-service
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
- /app/scripts/wait-for-qdrant.sh
env:
- name: QDRANT_URL
valueFrom:
configMapKeyRef:
name: context-engine-config
key: QDRANT_URL
restartPolicy: OnFailure
containers:
- name: indexer
Expand Down Expand Up @@ -207,6 +235,20 @@ spec:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
initContainers:
- name: wait-for-qdrant
image: context-engine-indexer-service
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
- /app/scripts/wait-for-qdrant.sh
env:
- name: QDRANT_URL
valueFrom:
configMapKeyRef:
name: context-engine-config
key: QDRANT_URL
restartPolicy: OnFailure
containers:
- name: init-payload
Expand Down
2 changes: 1 addition & 1 deletion deploy/kubernetes/qdrant.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ spec:
containers:
- name: qdrant
image: qdrant/qdrant:latest
imagePullPolicy: IfNotPresent
imagePullPolicy: Always
ports:
- name: http
containerPort: 6333
Expand Down
5 changes: 4 additions & 1 deletion docs/CLAUDE.example.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
This file is intended for AI agents (Claude, etc.) using the Context‑Engine Qdrant‑Indexer and Memory MCP tools.

These rules are NOT optional - you may existing tools for file reading, "grep" like searching tools and 'codebase search' tools - unrelated to qdrant-indexer mcp tools - you are to favour the qdrant-indexer tooling at all costs, as per these guidelines

Agentic AI Project Rules: When to Use MCP Qdrant-Indexer vs Grep

Expand Down Expand Up @@ -37,7 +38,9 @@ Agentic AI Project Rules: When to Use MCP Qdrant-Indexer vs Grep

MCP Tool Patterns:

# DO - Use concept/keyword-style queries (short natural-language fragments)
# DO - Use concept/keyword-style queries (short natural-language fragments).
# repo_search is semantic search, not grep, regex, or boolean syntax.
# Write queries as short descriptions, not as "foo OR bar" expressions.
"input validation mechanisms"
"database connection handling"
"performance bottlenecks in request path"
Expand Down
106 changes: 90 additions & 16 deletions scripts/hybrid_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -1454,7 +1454,7 @@ def dense_query(

# In-process API: run hybrid search and return structured items list
# Optional: pass an existing TextEmbedding instance via model to reuse cache

# Optional: pass mode to adjust implementation/docs weighting (code_first/balanced/docs_first)

def run_hybrid_search(
queries: List[str],
Expand All @@ -1473,6 +1473,7 @@ def run_hybrid_search(
expand: bool = True,
model: TextEmbedding | None = None,
collection: str | None = None,
mode: str | None = None,
repo: str | list[str] | None = None, # Filter by repo name(s); "*" to disable auto-filter
) -> List[Dict[str, Any]]:
client = QdrantClient(url=os.environ.get("QDRANT_URL", QDRANT_URL), api_key=API_KEY)
Expand Down Expand Up @@ -1594,6 +1595,7 @@ def _norm_under(u: str | None) -> str | None:
str(_collection()),
_env_truthy(os.environ.get("HYBRID_ADAPTIVE_WEIGHTS"), True),
_env_truthy(os.environ.get("HYBRID_MMR"), True),
str(mode or ""),
)
except Exception:
cache_key = None
Expand Down Expand Up @@ -1690,6 +1692,7 @@ def _norm_under(u: str | None) -> str | None:
'expand': expand,
'collection': _collection(),
'vector_name': vec_name,
'mode': mode,
}
is_duplicate, similar_fp = is_duplicate_request(request_data)
if is_duplicate:
Expand Down Expand Up @@ -1982,7 +1985,15 @@ def _scaled_rrf(rank: int) -> float:
flt_gated = _sanitize_filter_obj(flt_gated)

result_sets: List[List[Any]] = [
dense_query(client, vec_name, v, flt_gated, _scaled_per_query, collection, query_text=queries[i] if i < len(queries) else None)
dense_query(
client,
vec_name,
v,
flt_gated,
_scaled_per_query,
collection,
query_text=queries[i] if i < len(queries) else None,
)
for i, v in enumerate(embedded)
]
if os.environ.get("DEBUG_HYBRID_SEARCH"):
Expand Down Expand Up @@ -2239,6 +2250,18 @@ def _scaled_rrf(rank: int) -> float:

# Lexical + boosts
timestamps: List[int] = []
# Mode-aware tweaks for implementation/docs weighting. Modes:
# - None / "code_first": full IMPLEMENTATION_BOOST and DOCUMENTATION_PENALTY
# - "balanced": keep impl boost, halve doc penalty
# - "docs_first": reduce impl boost slightly and disable doc penalty
eff_mode = (mode or "").strip().lower()
impl_boost = IMPLEMENTATION_BOOST
doc_penalty = DOCUMENTATION_PENALTY
if eff_mode in {"balanced"}:
doc_penalty = DOCUMENTATION_PENALTY * 0.5
elif eff_mode in {"docs_first", "docs-first", "docs"}:
impl_boost = IMPLEMENTATION_BOOST * 0.5
doc_penalty = 0.0
for pid, rec in list(score_map.items()):
payload = rec["pt"].payload or {}
base_md = payload.get("metadata") or {}
Expand Down Expand Up @@ -2288,22 +2311,25 @@ def _scaled_rrf(rank: int) -> float:
if ext in {".json", ".yml", ".yaml", ".toml", ".ini"} or "/.codebase/" in path_lower or "/.kiro/" in path_lower:
rec["cfg"] = float(rec.get("cfg", 0.0)) - CONFIG_FILE_PENALTY
rec["s"] -= CONFIG_FILE_PENALTY
# Boost likely implementation files
if IMPLEMENTATION_BOOST > 0.0 and path:
# Boost likely implementation files (mode-aware)
if impl_boost > 0.0 and path:
if ext in {".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".java", ".rs", ".rb", ".php", ".cs", ".cpp", ".c", ".hpp", ".h"}:
rec["impl"] = float(rec.get("impl", 0.0)) + IMPLEMENTATION_BOOST
rec["s"] += IMPLEMENTATION_BOOST
# Penalize docs for implementation-style questions
qlow = " ".join(qlist).lower()
if DOCUMENTATION_PENALTY > 0.0 and path:
if ("readme" in path_lower or "/docs/" in path_lower or "/documentation/" in path_lower or path_lower.endswith(".md")):
if any(w in qlow for w in ["how does", "explain", "works", "algorithm"]):
rec["doc"] = float(rec.get("doc", 0.0)) - DOCUMENTATION_PENALTY
rec["s"] -= DOCUMENTATION_PENALTY
rec["impl"] = float(rec.get("impl", 0.0)) + impl_boost
rec["s"] += impl_boost
# Penalize docs (README/docs/markdown) relative to implementation files (mode-aware)
if doc_penalty > 0.0 and path:
if (
"readme" in path_lower
or "/docs/" in path_lower
or "/documentation/" in path_lower
or path_lower.endswith(".md")
):
rec["doc"] = float(rec.get("doc", 0.0)) - doc_penalty
rec["s"] -= doc_penalty

if LANG_MATCH_BOOST > 0.0 and path and eff_language:
lang = str(eff_language).lower()
md_lang = str((md.get("language") or "")).lower()
md_lang = str((md.get("language") or "").lower())
if (lang and md_lang and md_lang == lang) or lang_matches_path(lang, path):
rec["langb"] += LANG_MATCH_BOOST
rec["s"] += LANG_MATCH_BOOST
Expand Down Expand Up @@ -2635,6 +2661,20 @@ def _pass_filters(m: Dict[str, Any]) -> bool:
except Exception:
all_paths = set()

# Build path -> host_path map so we can emit related_paths in host space
# when PATH_EMIT_MODE prefers host paths. This keeps human-facing paths
# consistent while still preserving container paths for backend use.
host_map: Dict[str, str] = {}
try:
for _m in merged:
_md = (_m["pt"].payload or {}).get("metadata") or {}
_p = str(_md.get("path") or "").strip()
_h = str(_md.get("host_path") or "").strip()
if _p and _h:
host_map[_p] = _h
except Exception:
host_map = {}

items: List[Dict[str, Any]] = []
if not merged:
if _USE_CACHE and cache_key is not None:
Expand Down Expand Up @@ -2762,6 +2802,22 @@ def _resolve(seg: str) -> list[str]:
pass

_related = sorted(_related_set)[:10]
# Align related_paths with PATH_EMIT_MODE when possible: in host/auto
# modes, prefer host paths when we have a mapping; in container mode,
# keep container/path-space values as-is.
_related_out = _related
try:
_mode_related = str(os.environ.get("PATH_EMIT_MODE", "auto")).strip().lower()
except Exception:
_mode_related = "auto"
if _mode_related in {"host", "auto"}:
try:
_mapped: List[str] = []
for rp in _related:
_mapped.append(host_map.get(rp, rp))
_related_out = _mapped
except Exception:
_related_out = _related
# Best-effort snippet text directly from payload for downstream LLM stitching
_payload = (m["pt"].payload or {}) if m.get("pt") is not None else {}
_metadata = _payload.get("metadata", {}) or {}
Expand All @@ -2772,6 +2828,14 @@ def _resolve(seg: str) -> list[str]:
_metadata.get("text") or
""
)
# Carry through pseudo/tags so downstream consumers (e.g., repo_search reranker)
# can incorporate index-time GLM/llm labels into their own scoring or display.
_pseudo = _payload.get("pseudo")
if _pseudo is None:
_pseudo = _metadata.get("pseudo")
_tags = _payload.get("tags")
if _tags is None:
_tags = _metadata.get("tags")
# Skip memory-like points without a real file path
if not _path or not _path.strip():
if os.environ.get("DEBUG_HYBRID_FILTER"):
Expand Down Expand Up @@ -2831,10 +2895,12 @@ def _resolve(seg: str) -> list[str]:
"components": comp,
"why": why,
"relations": {"imports": _imports, "calls": _calls, "symbol_path": _symp},
"related_paths": _related,
"related_paths": _related_out,
"span_budgeted": bool(m.get("_merged_start") is not None),
"budget_tokens_used": m.get("_budget_tokens"),
"text": _text,
"pseudo": _pseudo,
"tags": _tags,
}
)
if _USE_CACHE and cache_key is not None:
Expand Down Expand Up @@ -3111,7 +3177,15 @@ def _cli_scaled_rrf(rank: int) -> float:

embedded = _embed_queries_cached(model, queries)
result_sets: List[List[Any]] = [
dense_query(client, vec_name, v, flt, _cli_scaled_per_query, eff_collection, query_text=queries[i] if i < len(queries) else None)
dense_query(
client,
vec_name,
v,
flt,
_cli_scaled_per_query,
eff_collection,
query_text=queries[i] if i < len(queries) else None,
)
for i, v in enumerate(embedded)
]

Expand Down
Loading
Loading