Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions crates/zeph-memory/tests/ollama_integration.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
// SPDX-License-Identifier: MIT OR Apache-2.0

use zeph_llm::ollama::OllamaProvider;
use zeph_llm::provider::LlmProvider;
use zeph_memory::response_cache::ResponseCache;
use zeph_memory::sqlite::SqliteStore;

const OLLAMA_BASE_URL: &str = "http://localhost:11434";
const CHAT_MODEL: &str = "qwen3:8b";
const EMBEDDING_MODEL: &str = "qwen3-embedding";

// Cold Ollama model starts can take 30+ seconds on first embed call.
async fn setup_cache_with_ollama() -> (ResponseCache, OllamaProvider) {
let store = SqliteStore::new(":memory:")
.await
.expect("in-memory SQLite must open");
let pool = store.pool().clone();
let cache = ResponseCache::new(pool, 3600);
let provider = OllamaProvider::new(OLLAMA_BASE_URL, CHAT_MODEL.into(), EMBEDDING_MODEL.into());
(cache, provider)
}

#[tokio::test]
#[ignore = "requires running Ollama instance with qwen3-embedding model"]
async fn with_ollama_embedding() {
// Roundtrip: embed a query, store via put_with_embedding, retrieve with the same
// embedding. Identical vectors must yield score ~1.0.
let (cache, provider) = setup_cache_with_ollama().await;

let query = "What is the Rust programming language?";
let embedding = provider
.embed(query)
.await
.expect("Ollama embed must succeed");

assert!(!embedding.is_empty(), "embedding must not be empty");
assert!(
embedding.len() > 100,
"embedding must have more than 100 dimensions"
);
assert!(
embedding.iter().all(|v| v.is_finite()),
"all embedding values must be finite"
);

cache
.put_with_embedding(
"k1",
"Rust is a systems programming language",
CHAT_MODEL,
&embedding,
EMBEDDING_MODEL,
)
.await
.expect("put_with_embedding must succeed");

let result = cache
.get_semantic(&embedding, EMBEDDING_MODEL, 0.95, 10)
.await
.expect("get_semantic must succeed");

let (response, score) = result.expect("identical embedding must produce a cache hit");
assert_eq!(response, "Rust is a systems programming language");
assert!(
(score - 1.0_f32).abs() < 1e-5,
"identical embedding must yield score ~1.0, got {score}"
);
}

#[tokio::test]
#[ignore = "requires running Ollama instance with qwen3-embedding model"]
async fn hit_on_rephrase() {
// Threshold 0.80 provides margin for embedding model version variance.
// Semantically equivalent rephrases typically score 0.85–0.98 with qwen3-embedding.
let (cache, provider) = setup_cache_with_ollama().await;

let original = "What is the capital of France?";
let embedding_original = provider
.embed(original)
.await
.expect("Ollama embed must succeed for original query");

cache
.put_with_embedding(
"k1",
"Paris is the capital of France",
CHAT_MODEL,
&embedding_original,
EMBEDDING_MODEL,
)
.await
.expect("put_with_embedding must succeed");

let rephrase = "Tell me the capital city of France";
let embedding_rephrase = provider
.embed(rephrase)
.await
.expect("Ollama embed must succeed for rephrased query");

let result = cache
.get_semantic(&embedding_rephrase, EMBEDDING_MODEL, 0.80, 10)
.await
.expect("get_semantic must succeed");

let (_response, score) = result.expect("rephrase must hit semantic cache at threshold 0.80");
assert!(
score > 0.80,
"rephrase similarity must exceed 0.80, got {score}"
);
}

#[tokio::test]
#[ignore = "requires running Ollama instance with qwen3-embedding model"]
async fn threshold_boundary() {
// Verify that threshold correctly separates hits from misses.
// An unrelated query (Rust ownership vs pasta recipes) must miss at 0.95
// but hit at 0.0 since cosine similarity >= 0.0 for any stored entry.
let (cache, provider) = setup_cache_with_ollama().await;

let tech_query = "Explain Rust ownership and borrowing";
let embedding_tech = provider
.embed(tech_query)
.await
.expect("Ollama embed must succeed for tech query");

cache
.put_with_embedding(
"k1",
"Rust ownership ensures memory safety without GC",
CHAT_MODEL,
&embedding_tech,
EMBEDDING_MODEL,
)
.await
.expect("put_with_embedding must succeed");

let unrelated = "Best Italian pasta recipes for beginners";
let embedding_unrelated = provider
.embed(unrelated)
.await
.expect("Ollama embed must succeed for unrelated query");

let miss = cache
.get_semantic(&embedding_unrelated, EMBEDDING_MODEL, 0.95, 10)
.await
.expect("get_semantic must succeed");
assert!(
miss.is_none(),
"unrelated query must not hit cache at threshold 0.95"
);

// Threshold 0.0 guarantees a hit: cosine similarity for any real embedding pair
// stored in the cache is >= 0.0 for typical non-adversarial inputs.
let hit = cache
.get_semantic(&embedding_unrelated, EMBEDDING_MODEL, 0.0, 10)
.await
.expect("get_semantic must succeed");
assert!(
hit.is_some(),
"any non-negative similarity must pass threshold 0.0"
);
}
Loading