diff --git a/crates/index/src/hnsw/search.rs b/crates/index/src/hnsw/search.rs index 9b87783..bdc6640 100644 --- a/crates/index/src/hnsw/search.rs +++ b/crates/index/src/hnsw/search.rs @@ -23,7 +23,7 @@ impl HnswIndex { let mut current = ep; loop { let cur_vec = self.get_vec(current)?; - let mut best_score = distance(&query.to_vec(), cur_vec, self.similarity); + let mut best_score = distance(query, cur_vec, self.similarity); let mut best_id = current; let empty: &[PointId] = &[]; @@ -46,7 +46,7 @@ impl HnswIndex { continue; } let n_vec = self.get_vec(n)?; - let score = distance(&query.to_vec(), n_vec, self.similarity); + let score = distance(query, n_vec, self.similarity); if score < best_score { best_score = score; best_id = n; @@ -91,7 +91,7 @@ impl HnswIndex { .unwrap_or(ep), }; - let ep_score = distance(&query.to_vec(), self.get_vec(seed)?, self.similarity); + let ep_score = distance(query, self.get_vec(seed)?, self.similarity); candidates.push((Reverse(OrdF32::new(ep_score)), seed)); w_heap.push((OrdF32::new(ep_score), seed)); visited.insert(seed); @@ -125,7 +125,7 @@ impl HnswIndex { } visited.insert(n); - let score = distance(&query.to_vec(), self.get_vec(n)?, self.similarity); + let score = distance(query, self.get_vec(n)?, self.similarity); let score = OrdF32::new(score); candidates.push((Reverse(score), n)); if w_heap.len() < ef_construction { diff --git a/crates/index/src/lib.rs b/crates/index/src/lib.rs index 3725573..ba72325 100644 --- a/crates/index/src/lib.rs +++ b/crates/index/src/lib.rs @@ -21,41 +21,38 @@ pub trait VectorIndex: Send + Sync { } /// Distance function to get the distance between two vectors (taken from old version) -pub fn distance(a: &DenseVector, b: &DenseVector, dist_type: Similarity) -> f32 { +pub fn distance(a: &[f32], b: &[f32], dist_type: Similarity) -> f32 { assert_eq!(a.len(), b.len()); match dist_type { - Similarity::Euclidean => { - let score: Vec = a - .iter() - .zip(b.iter()) - .map(|(&x, &y)| (x - y) * (x - y)) - .collect(); - score.iter().sum::().sqrt() - } - Similarity::Manhattan => { - let score: Vec = a - .iter() - .zip(b.iter()) - .map(|(&x, &y)| (x - y).abs()) - .collect(); - score.iter().sum::() - } - Similarity::Hamming => { - let score: Vec = a - .iter() - .zip(b.iter()) - .map(|(&x, &y)| if (x - y).abs() > 1e-8 { 1f32 } else { 0f32 }) - .collect(); - score.iter().sum::() - } + Similarity::Euclidean => a + .iter() + .zip(b.iter()) + .map(|(&x, &y)| { + let d = x - y; + d * d + }) + .sum::() + .sqrt(), + Similarity::Manhattan => a + .iter() + .zip(b.iter()) + .map(|(&x, &y)| (x - y).abs()) + .sum::(), + Similarity::Hamming => a + .iter() + .zip(b.iter()) + .map(|(&x, &y)| if (x - y).abs() > 1e-8 { 1f32 } else { 0f32 }) + .sum::(), Similarity::Cosine => { - let p_score: Vec = a.iter().zip(b.iter()).map(|(&x, &y)| x * y).collect(); - let p = p_score.iter().sum::(); - let q_score: Vec = a.iter().map(|&n| n * n).collect(); - let q = q_score.iter().sum::().sqrt(); - let r_score: Vec = b.iter().map(|&n| n * n).collect(); - let r = r_score.iter().sum::().sqrt(); - 1.0 - p / (q * r) + let mut dot = 0.0f32; + let mut norm_a = 0.0f32; + let mut norm_b = 0.0f32; + for (&x, &y) in a.iter().zip(b.iter()) { + dot += x * y; + norm_a += x * x; + norm_b += y * y; + } + 1.0 - dot / (norm_a.sqrt() * norm_b.sqrt()) } } }