tobi · jonesj38 · Feb 4, 2026 · Feb 4, 2026 · Feb 9, 2026 · Feb 9, 2026
diff --git a/README.md b/README.md
@@ -515,6 +515,27 @@ Supported model families:
 > since vectors are not cross-compatible between models. The prompt format is
 > automatically adjusted for each model family.
 
+### OpenAI Embeddings (Optional)
+
+As an alternative to local embedding models, you can use OpenAI's API for faster, more reliable embeddings:
+
+```yaml
+# ~/.config/qmd/index.yml
+embedding:
+  provider: openai
+  openai:
+    api_key: sk-...  # Optional, falls back to OPENAI_API_KEY env var
+    model: text-embedding-3-small  # Optional, this is the default
+```
+
+Benefits:
+- **~10x faster** than local CPU inference
+- **No GPU required** - works on any machine
+- **More reliable** - no local model loading issues
+- **Cost:** ~$0.02 per 1M tokens (very cheap)
+
+When using OpenAI embeddings, query expansion and reranking are skipped to avoid loading local models.
+
 ## Installation
 
 ```sh

diff --git a/bun.lock b/bun.lock
diff --git a/package.json b/package.json
@@ -49,8 +49,10 @@
     "better-sqlite3": "^12.4.5",
     "fast-glob": "^3.3.0",
     "node-llama-cpp": "^3.17.1",
+    "openai": "^4.77.0",
     "picomatch": "^4.0.0",
     "sqlite-vec": "^0.1.7-alpha.2",
+    "tiktoken": "^1.0.22",
     "web-tree-sitter": "0.26.7",
     "yaml": "^2.8.2",
     "zod": "4.2.1"

diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts
@@ -77,7 +77,7 @@ import {
   type ReindexResult,
   type ChunkStrategy,
 } from "../store.js";
-import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
+import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, getDefaultEmbeddingLLM, withLLMSession, pullModels, setEmbeddingConfig, isUsingOpenAI, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
 import {
   formatSearchResults,
   formatDocuments,
@@ -97,6 +97,7 @@ import {
   listAllContexts,
   setConfigIndexName,
   loadConfig,
+  getEmbeddingConfig as getEmbeddingConfigFromYaml,
 } from "../collections.js";
 import { getEmbeddedQmdSkillContent, getEmbeddedQmdSkillFiles } from "../embedded-skills.js";
 
@@ -2132,10 +2133,8 @@ function search(query: string, opts: OutputOptions): void {
 
   // Use large limit for --all, otherwise fetch more than needed and let outputResults filter
   const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2);
-  const results = filterByCollections(
-    searchFTS(db, query, fetchLimit, singleCollection),
-    collectionNames
-  );
+  // Pass collections directly to searchFTS (it now supports arrays)
+  const results = searchFTS(db, query, fetchLimit, collectionNames.length > 0 ? collectionNames : undefined);
 
   // Add context to results
   const resultsWithContext = results.map(r => ({
@@ -2742,6 +2741,18 @@ if (isMain) {
     process.exit(cli.values.help ? 0 : 1);
   }
 
+  // Load embedding configuration from config file
+  const embeddingYamlConfig = getEmbeddingConfigFromYaml();
+  if (embeddingYamlConfig.provider === 'openai') {
+    setEmbeddingConfig({
+      provider: 'openai',
+      openai: {
+        apiKey: embeddingYamlConfig.openai?.api_key,
+        embedModel: embeddingYamlConfig.openai?.model,
+      },
+    });
+  }
+
   switch (cli.command) {
     case "context": {
       const subcommand = cli.args[0];

diff --git a/src/collections.ts b/src/collections.ts
@@ -33,12 +33,24 @@ export interface Collection {
   includeByDefault?: boolean; // Include in queries by default (default: true)
 }
 
+/**
+ * Embedding provider configuration (optional in config file)
+ */
+export interface EmbeddingProviderConfig {
+  provider?: 'local' | 'openai';  // Default: 'local'
+  openai?: {
+    api_key?: string;             // Falls back to OPENAI_API_KEY env var
+    model?: string;               // Default: 'text-embedding-3-small'
+  };
+}
+
 /**
  * The complete configuration file structure
  */
 export interface CollectionConfig {
   global_context?: string;                    // Context applied to all collections
-  collections: Record<string, Collection>;    // Collection name -> config
+  collections: Record<string, Collection>;   // Collection name -> config
+  embedding?: EmbeddingProviderConfig;        // Optional embedding provider settings
 }
 
 /**
@@ -498,3 +510,12 @@ export function isValidCollectionName(name: string): boolean {
   // Allow alphanumeric, hyphens, underscores
   return /^[a-zA-Z0-9_-]+$/.test(name);
 }
+
+/**
+ * Get embedding configuration from config file
+ * Returns default (local) config if not specified
+ */
+export function getEmbeddingConfig(): EmbeddingProviderConfig {
+  const config = loadConfig();
+  return config.embedding || { provider: 'local' };
+}
diff --git a/src/llm.ts b/src/llm.ts
@@ -319,6 +319,16 @@ export interface LLM {
    */
   embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;
 
+  /**
+   * Get embeddings for multiple texts in a batch
+   */
+  embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;
+
+  /**
+   * Get the model name used for embeddings
+   */
+  getModelName(): string;
+
   /**
    * Generate text completion
    */
@@ -445,6 +455,13 @@ export class LlamaCpp implements LLM {
     this.disposeModelsOnInactivity = config.disposeModelsOnInactivity ?? false;
   }
 
+  /**
+   * Get the model name used for embeddings
+   */
+  getModelName(): string {
+    return this.embedModelUri;
+  }
+
   /**
    * Reset the inactivity timer. Called after each model operation.
    * When timer fires, models are unloaded to free memory (if no active sessions).
@@ -1555,3 +1572,60 @@ export async function disposeDefaultLlamaCpp(): Promise<void> {
     defaultLlamaCpp = null;
   }
 }
+
+// =============================================================================
+// OpenAI Embedding Support
+// =============================================================================
+
+import { OpenAIEmbedding, type OpenAIConfig } from "./openai-llm.js";
+
+/**
+ * Embedding provider configuration
+ */
+export type EmbeddingProvider = 'local' | 'openai';
+
+export type EmbeddingConfig = {
+  provider: EmbeddingProvider;
+  openai?: OpenAIConfig;
+};
+
+// Default embedding config: use local llama-cpp
+let embeddingConfig: EmbeddingConfig = { provider: 'local' };
+let openAIEmbedding: OpenAIEmbedding | null = null;
+
+/**
+ * Set the embedding configuration. Call before using embeddings.
+ */
+export function setEmbeddingConfig(config: EmbeddingConfig): void {
+  embeddingConfig = config;
+  // Reset OpenAI instance if config changes
+  openAIEmbedding = null;
+}
+
+/**
+ * Get the current embedding configuration
+ */
+export function getEmbeddingConfig(): EmbeddingConfig {
+  return embeddingConfig;
+}
+
+/**
+ * Check if using OpenAI for embeddings
+ */
+export function isUsingOpenAI(): boolean {
+  return embeddingConfig.provider === 'openai';
+}
+
+/**
+ * Get the appropriate LLM for embeddings based on config.
+ * Returns OpenAI embedding client if configured, otherwise local LlamaCpp.
+ */
+export function getDefaultEmbeddingLLM(): LLM {
+  if (embeddingConfig.provider === 'openai') {
+    if (!openAIEmbedding) {
+      openAIEmbedding = new OpenAIEmbedding(embeddingConfig.openai);
+    }
+    return openAIEmbedding;
+  }
+  return getDefaultLlamaCpp();
+}