Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion .claude/settings.local.json
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,18 @@
"Bash(kill:*)",
"Bash(grep:*)",
"WebFetch(domain:opencode.ai)",
"Bash(find:*)"
"Bash(find:*)",
"WebFetch(domain:www.databricks.com)",
"WebFetch(domain:docs.databricks.com)",
"Bash(env:*)",
"Bash(DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test:*)",
"Bash(DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node:*)",
"Bash(gh pr list:*)",
"Bash(gh pr diff:*)",
"Bash(PREFER_OLLAMA=true node:*)",
"Bash(DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com MODEL_PROVIDER=azure-openai AZURE_OPENAI_ENDPOINT=https://test.openai.azure.com AZURE_OPENAI_API_KEY=test-key node:*)",
"Bash(git stash:*)",
"WebFetch(domain:docs.ollama.com)"
],
"deny": [],
"ask": []
Expand Down
513 changes: 242 additions & 271 deletions .env.example

Large diffs are not rendered by default.

124 changes: 108 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ COPY --from=build --chown=node:node /app/index.js /app/package.json ./
COPY --from=build --chown=node:node /app/node_modules ./node_modules
COPY --from=build --chown=node:node /app/src ./src

VOLUME ["/app/data"]
VOLUME ["/app/data", "/app/logs"]

EXPOSE 8081

Expand All @@ -75,17 +75,27 @@ ENV MODEL_PROVIDER="databricks" \
LOG_LEVEL="info" \
WORKSPACE_ROOT="/workspace" \
WEB_SEARCH_ENDPOINT="http://searxng:8888/search" \
NODE_ENV="production"
NODE_ENV="production" \
REQUEST_JSON_LIMIT="1gb" \
SESSION_DB_PATH="/app/data/sessions.db"

# File Logging (persistent logs with pino-roll rotation)
ENV LOG_FILE_ENABLED="false" \
LOG_FILE_PATH="/app/logs/lynkr.log" \
LOG_FILE_LEVEL="debug" \
LOG_FILE_FREQUENCY="daily" \
LOG_FILE_MAX_FILES="14"

# Databricks Configuration (default provider)
ENV DATABRICKS_API_BASE="https://example.cloud.databricks.com" \
DATABRICKS_API_KEY="replace-with-databricks-pat"

# Ollama Configuration (for hybrid routing)
# Ollama Configuration (for tier-based routing)
# Recommended models: llama3.1:8b, llama3.2, qwen2.5:14b, mistral:7b-instruct
ENV PREFER_OLLAMA="false" \
OLLAMA_ENDPOINT="http://localhost:11434" \
# Configure via TIER_* env vars: TIER_SIMPLE=ollama:llama3.2
ENV OLLAMA_ENDPOINT="http://localhost:11434" \
OLLAMA_MODEL="llama3.1:8b" \
OLLAMA_TIMEOUT_MS="120000" \
OLLAMA_MAX_TOOLS_FOR_ROUTING="3" \
OLLAMA_EMBEDDINGS_MODEL="nomic-embed-text" \
OLLAMA_EMBEDDINGS_ENDPOINT="http://localhost:11434/api/embeddings"
Expand All @@ -99,45 +109,99 @@ ENV OPENROUTER_API_KEY="" \
OPENROUTER_MAX_TOOLS_FOR_ROUTING="15"

# Azure OpenAI Configuration (optional)
# IMPORTANT: Set full endpoint URL including deployment path
# Example: https://your-resource.openai.azure.com/openai/deployments/YOUR-DEPLOYMENT/chat/completions?api-version=2025-01-01-preview
# Deployment options: gpt-4o, gpt-4o-mini, gpt-5-chat, o1-preview, o3-mini
ENV AZURE_OPENAI_ENDPOINT="" \
AZURE_OPENAI_API_KEY="" \
AZURE_OPENAI_DEPLOYMENT="gpt-4o"
AZURE_OPENAI_DEPLOYMENT="gpt-4o" \
AZURE_OPENAI_API_VERSION="2024-08-01-preview"

# Hybrid Routing & Fallback Configuration
# Options: databricks, azure-openai, azure-anthropic, openrouter, bedrock, openai
# Note: Local providers (ollama, llamacpp, lmstudio) cannot be used as fallback
ENV FALLBACK_ENABLED="true" \
FALLBACK_PROVIDER="databricks"

# Azure Anthropic Configuration (optional)
ENV AZURE_ANTHROPIC_ENDPOINT="" \
AZURE_ANTHROPIC_API_KEY=""
AZURE_ANTHROPIC_API_KEY="" \
AZURE_ANTHROPIC_VERSION="2023-06-01"

# AWS Bedrock Configuration (optional)
# Supports Claude, Titan, Llama, Jurassic, Cohere, Mistral models
ENV AWS_BEDROCK_API_KEY="" \
AWS_BEDROCK_REGION="us-east-1" \
AWS_BEDROCK_MODEL_ID="anthropic.claude-3-5-sonnet-20241022-v2:0"

# llama.cpp Configuration (optional - for local GGUF models)
# llama.cpp Configuration (optional)
ENV LLAMACPP_ENDPOINT="http://localhost:8080" \
LLAMACPP_MODEL="default" \
LLAMACPP_EMBEDDINGS_ENDPOINT="http://localhost:8080/embeddings" \
LLAMACPP_TIMEOUT_MS="120000"

# LM Studio Configuration (optional)
ENV LMSTUDIO_ENDPOINT="http://localhost:1234" \
LMSTUDIO_MODEL="default" \
LMSTUDIO_TIMEOUT_MS="120000"

# OpenAI Configuration (optional)
ENV OPENAI_API_KEY="" \
OPENAI_MODEL="gpt-4o" \
OPENAI_ENDPOINT="https://api.openai.com/v1/chat/completions"

# Z.AI Configuration (optional)
ENV ZAI_API_KEY="" \
ZAI_ENDPOINT="https://api.z.ai/api/anthropic/v1/messages" \
ZAI_MODEL="GLM-4.7"

# Google Vertex AI Configuration (optional)
ENV VERTEX_API_KEY="" \
VERTEX_MODEL="gemini-2.0-flash"

# Embeddings Provider Override (optional)
# Options: ollama, llamacpp, openrouter, openai
# By default, uses same provider as MODEL_PROVIDER
ENV EMBEDDINGS_PROVIDER=""

# Tool Injection & Suggestion Mode
ENV INJECT_TOOLS_LLAMACPP="true" \
INJECT_TOOLS_OLLAMA="true" \
SUGGESTION_MODE_MODEL="default"

# Rate Limiting
ENV RATE_LIMIT_ENABLED="true" \
RATE_LIMIT_WINDOW_MS="60000" \
RATE_LIMIT_MAX="100" \
RATE_LIMIT_KEY_BY="session"

# Web Search Configuration
ENV WEB_SEARCH_ALLOW_ALL="true" \
WEB_SEARCH_TIMEOUT_MS="10000" \
WEB_FETCH_BODY_PREVIEW_MAX="10000" \
WEB_SEARCH_RETRY_ENABLED="true" \
WEB_SEARCH_MAX_RETRIES="2"

# Policy Configuration
ENV POLICY_MAX_STEPS="20" \
POLICY_MAX_TOOL_CALLS="12" \
POLICY_TOOL_LOOP_THRESHOLD="10" \
POLICY_GIT_ALLOW_PUSH="false" \
POLICY_GIT_ALLOW_PULL="true" \
POLICY_GIT_ALLOW_COMMIT="true" \
POLICY_GIT_REQUIRE_TESTS="false" \
POLICY_GIT_AUTOSTASH="false" \
POLICY_FILE_BLOCKED_PATHS="/.env,.env,/etc/passwd,/etc/shadow" \
POLICY_SAFE_COMMANDS_ENABLED="true"

# Agents Configuration
ENV AGENTS_ENABLED="true" \
AGENTS_MAX_CONCURRENT="10" \
AGENTS_DEFAULT_MODEL="haiku" \
AGENTS_MAX_STEPS="15" \
AGENTS_TIMEOUT="300000"

# Prompt Cache Configuration
ENV PROMPT_CACHE_ENABLED="true" \
PROMPT_CACHE_MAX_ENTRIES="1000" \
PROMPT_CACHE_TTL_MS="300000"

# Semantic Response Cache
ENV SEMANTIC_CACHE_ENABLED="false" \
SEMANTIC_CACHE_THRESHOLD="0.95"

# Production Hardening Defaults
ENV CIRCUIT_BREAKER_FAILURE_THRESHOLD="5" \
CIRCUIT_BREAKER_SUCCESS_THRESHOLD="2" \
Expand All @@ -160,6 +224,34 @@ ENV MEMORY_ENABLED="true" \
MEMORY_DEDUP_ENABLED="true" \
MEMORY_DEDUP_LOOKBACK="5"

# Token Optimization
ENV TOKEN_TRACKING_ENABLED="true" \
TOOL_TRUNCATION_ENABLED="true" \
SYSTEM_PROMPT_MODE="dynamic" \
TOOL_DESCRIPTIONS="minimal" \
HISTORY_COMPRESSION_ENABLED="true" \
HISTORY_KEEP_RECENT_TURNS="10" \
HISTORY_SUMMARIZE_OLDER="true" \
TOKEN_BUDGET_WARNING="100000" \
TOKEN_BUDGET_MAX="180000" \
TOKEN_BUDGET_ENFORCEMENT="true"

# Smart Tool Selection
ENV SMART_TOOL_SELECTION_MODE="heuristic" \
SMART_TOOL_SELECTION_TOKEN_BUDGET="2500"

# Hot Reload
ENV HOT_RELOAD_ENABLED="true" \
HOT_RELOAD_DEBOUNCE_MS="1000"

# Tiered Model Routing (optional)
# Format: TIER_<LEVEL>=provider:model
# All 4 tiers must be set to enable tiered routing
# ENV TIER_SIMPLE="ollama:llama3.2" \
# TIER_MEDIUM="openrouter:openai/gpt-4o-mini" \
# TIER_COMPLEX="azure-openai:gpt-4o" \
# TIER_REASONING="azure-openai:gpt-4o"

# Switch to non-root user
USER node

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ Lynkr supports [ClawdBot](https://github.com/openclaw/openclaw) via its OpenAI-c

### Getting Started
- 📦 **[Installation Guide](documentation/installation.md)** - Detailed installation for all methods
- ⚙️ **[Provider Configuration](documentation/providers.md)** - Complete setup for all 9+ providers
- ⚙️ **[Provider Configuration](documentation/providers.md)** - Complete setup for all 12+ providers
- 🎯 **[Quick Start Examples](documentation/installation.md#quick-start-examples)** - Copy-paste configs

### IDE & CLI Integration
Expand Down Expand Up @@ -277,7 +277,7 @@ Lynkr supports [ClawdBot](https://github.com/openclaw/openclaw) via its OpenAI-c

## Key Features Highlights

- ✅ **Multi-Provider Support** - 9+ providers including local (Ollama, llama.cpp) and cloud (Bedrock, Databricks, OpenRouter)
- ✅ **Multi-Provider Support** - 12+ providers including local (Ollama, llama.cpp) and cloud (Bedrock, Databricks, OpenRouter, Moonshot AI)
- ✅ **60-80% Cost Reduction** - Token optimization with smart tool selection, prompt caching, memory deduplication
- ✅ **100% Local Option** - Run completely offline with Ollama/llama.cpp (zero cloud dependencies)
- ✅ **OpenAI Compatible** - Works with Cursor IDE, Continue.dev, and any OpenAI-compatible client
Expand Down
89 changes: 89 additions & 0 deletions config/model-tiers.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"tiers": {
"SIMPLE": {
"description": "Greetings, simple Q&A, confirmations, basic lookups",
"range": [0, 25],
"priority": 1,
"preferred": {
"ollama": ["llama3.2", "gemma2", "phi3", "qwen2.5:7b", "mistral"],
"llamacpp": ["default"],
"lmstudio": ["default"],
"openai": ["gpt-4o-mini", "gpt-3.5-turbo"],
"azure-openai": ["gpt-4o-mini", "gpt-35-turbo"],
"anthropic": ["claude-3-haiku-20240307", "claude-3-5-haiku-20241022"],
"bedrock": ["anthropic.claude-3-haiku-20240307-v1:0", "amazon.nova-lite-v1:0"],
"databricks": ["databricks-claude-haiku-4-5", "databricks-gpt-5-nano"],
"google": ["gemini-2.0-flash", "gemini-1.5-flash"],
"openrouter": ["google/gemini-flash-1.5", "deepseek/deepseek-chat"],
"zai": ["GLM-4-Flash"],
"moonshot": ["kimi-k2-turbo-preview"]
}
},
"MEDIUM": {
"description": "Code reading, simple edits, research, documentation",
"range": [26, 50],
"priority": 2,
"preferred": {
"ollama": ["qwen2.5:32b", "deepseek-coder:33b", "codellama:34b"],
"llamacpp": ["default"],
"lmstudio": ["default"],
"openai": ["gpt-4o", "gpt-4-turbo"],
"azure-openai": ["gpt-4o", "gpt-4"],
"anthropic": ["claude-sonnet-4-20250514", "claude-3-5-sonnet-20241022"],
"bedrock": ["anthropic.claude-3-5-sonnet-20241022-v2:0", "amazon.nova-pro-v1:0"],
"databricks": ["databricks-claude-sonnet-4-5", "databricks-gpt-5-1"],
"google": ["gemini-1.5-pro", "gemini-2.0-pro"],
"openrouter": ["anthropic/claude-3.5-sonnet", "openai/gpt-4o"],
"zai": ["GLM-4.7"],
"moonshot": ["kimi-k2-turbo-preview"]
}
},
"COMPLEX": {
"description": "Multi-file changes, debugging, architecture, refactoring",
"range": [51, 75],
"priority": 3,
"preferred": {
"ollama": ["qwen2.5:72b", "llama3.1:70b", "deepseek-coder-v2:236b"],
"openai": ["o1-mini", "o3-mini", "gpt-4o"],
"azure-openai": ["o1-mini", "gpt-4o"],
"anthropic": ["claude-sonnet-4-20250514", "claude-3-5-sonnet-20241022"],
"bedrock": ["anthropic.claude-3-5-sonnet-20241022-v2:0"],
"databricks": ["databricks-claude-sonnet-4-5", "databricks-gpt-5-1-codex-max"],
"google": ["gemini-2.5-pro", "gemini-1.5-pro"],
"openrouter": ["anthropic/claude-3.5-sonnet", "meta-llama/llama-3.1-405b"],
"zai": ["GLM-4.7"],
"moonshot": ["kimi-k2-turbo-preview"]
}
},
"REASONING": {
"description": "Complex analysis, security audits, novel problems, deep thinking",
"range": [76, 100],
"priority": 4,
"preferred": {
"openai": ["o1", "o1-pro", "o3"],
"azure-openai": ["o1", "o1-pro"],
"anthropic": ["claude-opus-4-20250514", "claude-3-opus-20240229"],
"bedrock": ["anthropic.claude-3-opus-20240229-v1:0"],
"databricks": ["databricks-claude-opus-4-6", "databricks-claude-opus-4-5", "databricks-gpt-5-2"],
"google": ["gemini-2.5-pro"],
"openrouter": ["anthropic/claude-3-opus", "deepseek/deepseek-reasoner", "openai/o1"],
"deepseek": ["deepseek-reasoner", "deepseek-r1"],
"moonshot": ["kimi-k2-thinking", "kimi-k2-turbo-preview"]
}
}
},
"localProviders": {
"ollama": { "free": true, "defaultTier": "SIMPLE" },
"llamacpp": { "free": true, "defaultTier": "SIMPLE" },
"lmstudio": { "free": true, "defaultTier": "SIMPLE" }
},
"providerAliases": {
"azure": "azure-openai",
"aws": "bedrock",
"amazon": "bedrock",
"claude": "anthropic",
"gemini": "google",
"vertex": "google",
"kimi": "moonshot"
}
}
Loading
Loading