Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,27 @@ Supported model families:
> since vectors are not cross-compatible between models. The prompt format is
> automatically adjusted for each model family.

### OpenAI Embeddings (Optional)

As an alternative to local embedding models, you can use OpenAI's API for faster, more reliable embeddings:

```yaml
# ~/.config/qmd/index.yml
embedding:
provider: openai
openai:
api_key: sk-... # Optional, falls back to OPENAI_API_KEY env var
model: text-embedding-3-small # Optional, this is the default
```

Benefits:
- **~10x faster** than local CPU inference
- **No GPU required** - works on any machine
- **More reliable** - no local model loading issues
- **Cost:** ~$0.02 per 1M tokens (very cheap)

When using OpenAI embeddings, query expansion and reranking are skipped to avoid loading local models.

## Installation

```sh
Expand Down
272 changes: 161 additions & 111 deletions bun.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,10 @@
"better-sqlite3": "^12.4.5",
"fast-glob": "^3.3.0",
"node-llama-cpp": "^3.17.1",
"openai": "^4.77.0",
"picomatch": "^4.0.0",
"sqlite-vec": "^0.1.7-alpha.2",
"tiktoken": "^1.0.22",
"web-tree-sitter": "0.26.7",
"yaml": "^2.8.2",
"zod": "4.2.1"
Expand Down
21 changes: 16 additions & 5 deletions src/cli/qmd.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ import {
type ReindexResult,
type ChunkStrategy,
} from "../store.js";
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, getDefaultEmbeddingLLM, withLLMSession, pullModels, setEmbeddingConfig, isUsingOpenAI, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
import {
formatSearchResults,
formatDocuments,
Expand All @@ -97,6 +97,7 @@ import {
listAllContexts,
setConfigIndexName,
loadConfig,
getEmbeddingConfig as getEmbeddingConfigFromYaml,
} from "../collections.js";
import { getEmbeddedQmdSkillContent, getEmbeddedQmdSkillFiles } from "../embedded-skills.js";

Expand Down Expand Up @@ -2132,10 +2133,8 @@ function search(query: string, opts: OutputOptions): void {

// Use large limit for --all, otherwise fetch more than needed and let outputResults filter
const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2);
const results = filterByCollections(
searchFTS(db, query, fetchLimit, singleCollection),
collectionNames
);
// Pass collections directly to searchFTS (it now supports arrays)
const results = searchFTS(db, query, fetchLimit, collectionNames.length > 0 ? collectionNames : undefined);

// Add context to results
const resultsWithContext = results.map(r => ({
Expand Down Expand Up @@ -2742,6 +2741,18 @@ if (isMain) {
process.exit(cli.values.help ? 0 : 1);
}

// Load embedding configuration from config file
const embeddingYamlConfig = getEmbeddingConfigFromYaml();
if (embeddingYamlConfig.provider === 'openai') {
setEmbeddingConfig({
provider: 'openai',
openai: {
apiKey: embeddingYamlConfig.openai?.api_key,
embedModel: embeddingYamlConfig.openai?.model,
},
});
}

switch (cli.command) {
case "context": {
const subcommand = cli.args[0];
Expand Down
23 changes: 22 additions & 1 deletion src/collections.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,24 @@ export interface Collection {
includeByDefault?: boolean; // Include in queries by default (default: true)
}

/**
* Embedding provider configuration (optional in config file)
*/
export interface EmbeddingProviderConfig {
provider?: 'local' | 'openai'; // Default: 'local'
openai?: {
api_key?: string; // Falls back to OPENAI_API_KEY env var
model?: string; // Default: 'text-embedding-3-small'
};
}

/**
* The complete configuration file structure
*/
export interface CollectionConfig {
global_context?: string; // Context applied to all collections
collections: Record<string, Collection>; // Collection name -> config
collections: Record<string, Collection>; // Collection name -> config
embedding?: EmbeddingProviderConfig; // Optional embedding provider settings
}

/**
Expand Down Expand Up @@ -498,3 +510,12 @@ export function isValidCollectionName(name: string): boolean {
// Allow alphanumeric, hyphens, underscores
return /^[a-zA-Z0-9_-]+$/.test(name);
}

/**
* Get embedding configuration from config file
* Returns default (local) config if not specified
*/
export function getEmbeddingConfig(): EmbeddingProviderConfig {
const config = loadConfig();
return config.embedding || { provider: 'local' };
}
74 changes: 74 additions & 0 deletions src/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,16 @@ export interface LLM {
*/
embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;

/**
* Get embeddings for multiple texts in a batch
*/
embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;

/**
* Get the model name used for embeddings
*/
getModelName(): string;

/**
* Generate text completion
*/
Expand Down Expand Up @@ -445,6 +455,13 @@ export class LlamaCpp implements LLM {
this.disposeModelsOnInactivity = config.disposeModelsOnInactivity ?? false;
}

/**
* Get the model name used for embeddings
*/
getModelName(): string {
return this.embedModelUri;
}

/**
* Reset the inactivity timer. Called after each model operation.
* When timer fires, models are unloaded to free memory (if no active sessions).
Expand Down Expand Up @@ -1555,3 +1572,60 @@ export async function disposeDefaultLlamaCpp(): Promise<void> {
defaultLlamaCpp = null;
}
}

// =============================================================================
// OpenAI Embedding Support
// =============================================================================

import { OpenAIEmbedding, type OpenAIConfig } from "./openai-llm.js";

/**
* Embedding provider configuration
*/
export type EmbeddingProvider = 'local' | 'openai';

export type EmbeddingConfig = {
provider: EmbeddingProvider;
openai?: OpenAIConfig;
};

// Default embedding config: use local llama-cpp
let embeddingConfig: EmbeddingConfig = { provider: 'local' };
let openAIEmbedding: OpenAIEmbedding | null = null;

/**
* Set the embedding configuration. Call before using embeddings.
*/
export function setEmbeddingConfig(config: EmbeddingConfig): void {
embeddingConfig = config;
// Reset OpenAI instance if config changes
openAIEmbedding = null;
}

/**
* Get the current embedding configuration
*/
export function getEmbeddingConfig(): EmbeddingConfig {
return embeddingConfig;
}

/**
* Check if using OpenAI for embeddings
*/
export function isUsingOpenAI(): boolean {
return embeddingConfig.provider === 'openai';
}

/**
* Get the appropriate LLM for embeddings based on config.
* Returns OpenAI embedding client if configured, otherwise local LlamaCpp.
*/
export function getDefaultEmbeddingLLM(): LLM {
if (embeddingConfig.provider === 'openai') {
if (!openAIEmbedding) {
openAIEmbedding = new OpenAIEmbedding(embeddingConfig.openai);
}
return openAIEmbedding;
}
return getDefaultLlamaCpp();
}
Loading