From f7086742068bd6fe591afdaf9cceebb78e40037d Mon Sep 17 00:00:00 2001 From: MHHamdan Date: Wed, 12 Feb 2025 00:44:00 -0500 Subject: [PATCH] Add C++ and Java vector search prototypes --- app/services/VectorSearchPrototype.java | 106 +++++++++++++++++++++++ app/services/vector_search_prototype.cpp | 79 +++++++++++++++++ 2 files changed, 185 insertions(+) create mode 100644 app/services/VectorSearchPrototype.java create mode 100644 app/services/vector_search_prototype.cpp diff --git a/app/services/VectorSearchPrototype.java b/app/services/VectorSearchPrototype.java new file mode 100644 index 0000000..9045962 --- /dev/null +++ b/app/services/VectorSearchPrototype.java @@ -0,0 +1,106 @@ +// VectorSearchPrototype.java +// Prototype for a simple vector search component in VectorSphere using Java. +// This example demonstrates calculating cosine similarity between a query vector +// and a set of stored vectors (simulated vector database). + +import java.util.ArrayList; +import java.util.List; + +public class VectorSearchPrototype { + + /** + * Computes the dot product of two vectors. + * @param v1 The first vector. + * @param v2 The second vector. + * @return The dot product as a double. + */ + public static double dotProduct(List v1, List v2) { + double result = 0.0; + for (int i = 0; i < v1.size(); i++) { + result += v1.get(i) * v2.get(i); + } + return result; + } + + /** + * Computes the Euclidean norm (magnitude) of a vector. + * @param v The vector. + * @return The norm as a double. + */ + public static double norm(List v) { + double sum = 0.0; + for (Double val : v) { + sum += val * val; + } + return Math.sqrt(sum); + } + + /** + * Computes the cosine similarity between two vectors. + * Returns a value between -1 and 1, where 1 indicates identical orientation. + * @param v1 The first vector. + * @param v2 The second vector. + * @return The cosine similarity. + */ + public static double cosineSimilarity(List v1, List v2) { + double norm1 = norm(v1); + double norm2 = norm(v2); + + // To avoid division by zero, check if either norm is zero. + if (norm1 == 0.0 || norm2 == 0.0) { + return 0.0; + } + return dotProduct(v1, v2) / (norm1 * norm2); + } + + public static void main(String[] args) { + // Define a query vector (could be an embedding generated by a model). + List query = new ArrayList<>(); + query.add(0.1); + query.add(0.3); + query.add(0.5); + + // Simulated vector database (each vector might represent an embedded document). + List> vectorDB = new ArrayList<>(); + + // Vector 0 + List vector0 = new ArrayList<>(); + vector0.add(0.2); + vector0.add(0.4); + vector0.add(0.6); + vectorDB.add(vector0); + + // Vector 1 + List vector1 = new ArrayList<>(); + vector1.add(0.0); + vector1.add(0.1); + vector1.add(0.0); + vectorDB.add(vector1); + + // Vector 2 + List vector2 = new ArrayList<>(); + vector2.add(0.3); + vector2.add(0.3); + vector2.add(0.3); + vectorDB.add(vector2); + + // Variables to track the best matching vector. + int bestMatchIndex = -1; + double bestSimilarity = -Double.MAX_VALUE; + + // Iterate over the vector database to compute similarity scores. + for (int i = 0; i < vectorDB.size(); i++) { + double sim = cosineSimilarity(query, vectorDB.get(i)); + System.out.println("Similarity with vector " + i + ": " + sim); + + // Update the best match if the current similarity is higher. + if (sim > bestSimilarity) { + bestSimilarity = sim; + bestMatchIndex = i; + } + } + + // Output the result. + System.out.println("Best match index: " + bestMatchIndex + " with similarity: " + bestSimilarity); + } +} diff --git a/app/services/vector_search_prototype.cpp b/app/services/vector_search_prototype.cpp new file mode 100644 index 0000000..2bd7baf --- /dev/null +++ b/app/services/vector_search_prototype.cpp @@ -0,0 +1,79 @@ +// vector_search_prototype.cpp +// Prototype for a simple vector search component in VectorSphere. +// This example demonstrates calculating cosine similarity between a query vector +// and a set of stored vectors (vector database). + +#include +#include +#include +#include + +using std::vector; +using std::cout; +using std::endl; + +// Function to compute the dot product of two vectors. +float dot_product(const vector& v1, const vector& v2) { + float result = 0.0f; + for (size_t i = 0; i < v1.size(); i++) { + result += v1[i] * v2[i]; + } + return result; +} + +// Function to compute the Euclidean norm of a vector. +float norm(const vector& v) { + float sum = 0.0f; + for (float val : v) { + sum += val * val; + } + return std::sqrt(sum); +} + +// Function to compute the cosine similarity between two vectors. +// Returns a value between -1 and 1, where 1 means identical orientation. +float cosine_similarity(const vector& v1, const vector& v2) { + float n1 = norm(v1); + float n2 = norm(v2); + + // To avoid division by zero, check if any norm is zero. + if (n1 == 0.0f || n2 == 0.0f) { + return 0.0f; + } + + return dot_product(v1, v2) / (n1 * n2); +} + +int main() { + // Define a query vector (this could be an embedding generated by a model). + vector query = {0.1f, 0.3f, 0.5f}; + + // Simulated vector database (each vector might represent an embedded document). + vector> vector_db = { + {0.2f, 0.4f, 0.6f}, + {0.0f, 0.1f, 0.0f}, + {0.3f, 0.3f, 0.3f} + }; + + // Variables to track the best matching vector. + int best_match_index = -1; + float best_similarity = -std::numeric_limits::infinity(); + + // Iterate over the vector database to compute similarity scores. + for (size_t i = 0; i < vector_db.size(); i++) { + float sim = cosine_similarity(query, vector_db[i]); + cout << "Similarity with vector " << i << ": " << sim << endl; + + // Update the best match if the current similarity is higher. + if (sim > best_similarity) { + best_similarity = sim; + best_match_index = static_cast(i); + } + } + + // Output the result. + cout << "Best match index: " << best_match_index + << " with similarity: " << best_similarity << endl; + + return 0; +}