Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions jvector-example/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>dev.langchain4j</groupId>
<artifactId>jvector-example</artifactId>
<version>1.9.0-beta16-SNAPSHOT</version>

<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-community-jvector</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-embeddings-all-minilm-l6-v2-q</artifactId>
<version>1.8.0-beta15</version>
</dependency>
</dependencies>
</project>
162 changes: 162 additions & 0 deletions jvector-example/src/main/java/JVectorEmbeddingStoreExample.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import dev.langchain4j.community.store.embedding.jvector.JVectorEmbeddingStore;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.embedding.onnx.allminilml6v2q.AllMiniLmL6V2QuantizedEmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingMatch;
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingSearchResult;
import dev.langchain4j.store.embedding.EmbeddingStore;

import java.io.BufferedReader;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;

public class JVectorEmbeddingStoreExample {

private final static String TEST_DOCUMENT = "test-document.txt";
public static final String TMP_JVECTOR_EMBEDDING_STORE = "/tmp/jvector-embedding-store";

public static void main(String[] args) {

File tempPath = new File(TMP_JVECTOR_EMBEDDING_STORE);
try {
// Create the default embedding model
EmbeddingModel embeddingModel = new AllMiniLmL6V2QuantizedEmbeddingModel();

// Read all lines from the text file once
if (JVectorEmbeddingStoreExample.class.getClassLoader().getResource(TEST_DOCUMENT) == null) {
throw new RuntimeException("Test document not found: " + TEST_DOCUMENT);
}
List<String> lines = readLinesFromResource(TEST_DOCUMENT);
System.out.println("Read " + lines.size() + " lines from " + TEST_DOCUMENT);
System.out.println();

if (!tempPath.exists()) {
boolean created = tempPath.mkdirs();
if (!created) {
throw new RuntimeException("Failed to create temporary directory: " + TMP_JVECTOR_EMBEDDING_STORE);
}
}
File indexPath = new File(tempPath, "example-index");

// First store: JVector with maxDegree 16
EmbeddingStore<TextSegment> jvectorStore1 = JVectorEmbeddingStore.builder()
.dimension(384)
.maxDegree(16)
.build();

runWithStore(jvectorStore1, embeddingModel, lines, "JVectorStore(maxDegree=16)");

// Second store: JVector with a different configuration (maxDegree 8)
EmbeddingStore<TextSegment> jvectorStore2 = JVectorEmbeddingStore.builder()
.dimension(384)
.maxDegree(8)
.persistencePath(indexPath.getAbsolutePath())
.build();

runWithStore(jvectorStore2, embeddingModel, lines, "JVectorStore(maxDegree=8)");
} catch (Exception e) {
System.err.println("Failed to run the example due to: " + e.getMessage());
} finally {
// Clean up the temporary index directory
if (tempPath.exists() && tempPath.isDirectory()) {
final File[] files = tempPath.listFiles();
if (files != null) {
for (File file : files) {
if (!file.delete()) {
System.err.println("Failed to delete file: " + file.getAbsolutePath());
}
}
}
}
}
}

/**
* Runs the common workflow against the provided embedding store.
*/
private static void runWithStore(EmbeddingStore<TextSegment> embeddingStore,
EmbeddingModel embeddingModel,
List<String> lines,
String storeName) {

long startTime = System.currentTimeMillis();
System.out.println("=== Running with store: " + storeName + " ===");
System.out.println("Adding embeddings to the store...");
int added = 0;
for (String line : lines) {
if (!line.trim().isEmpty()) {
TextSegment segment = TextSegment.from(line);
Embedding embedding = embeddingModel.embed(segment).content();
embeddingStore.add(embedding, segment);
added++;
}
}
System.out.println("Successfully added " + added + " embeddings to the store");
System.out.println();

// Query the store with random lines from the file
Random random = new Random();
int numberOfQueries = 5;

System.out.println("Querying the embedding store with " + numberOfQueries + " random lines:");
System.out.println("=========================================");

for (int i = 0; i < numberOfQueries; i++) {
String randomLine = lines.get(random.nextInt(lines.size()));
System.out.println("\nQuery " + (i + 1) + ": " + randomLine);

Embedding queryEmbedding = embeddingModel.embed(randomLine).content();

EmbeddingSearchRequest searchRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(3)
.build();

EmbeddingSearchResult<TextSegment> result = embeddingStore.search(searchRequest);
List<EmbeddingMatch<TextSegment>> matches = result.matches();

System.out.println("Top 3 matches:");
for (int j = 0; j < matches.size(); j++) {
EmbeddingMatch<TextSegment> match = matches.get(j);
System.out.printf(" %d. Score: %.4f - %s%n",
j + 1,
match.score(),
match.embedded().text());
}
}

long endTime = System.currentTimeMillis();
System.out.printf("=== Finished running with store: %s in %d msec. ===%n",
storeName, (endTime - startTime));
System.out.println();
}

/**
* Read all lines from a resource file
*/
private static List<String> readLinesFromResource(String resourceName) {
List<String> lines = new ArrayList<>();
try (InputStream inputStream = JVectorEmbeddingStoreExample.class
.getClassLoader()
.getResourceAsStream(resourceName)) {
if (inputStream == null) {
throw new RuntimeException("Resource not found: " + resourceName);
}
try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) {
String line;
while ((line = reader.readLine()) != null) {
lines.add(line);
}
}
} catch (Exception e) {
throw new RuntimeException("Failed to read resource file: " + resourceName, e);
}
return lines;
}
}
40 changes: 40 additions & 0 deletions jvector-example/src/main/resources/test-document.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
The Earth completes one revolution around the Sun in approximately 365.25 days.
Each year the Earth makes a full orbit around the Sun, taking about 365.25 days.
Machine learning is a field of AI that builds models to learn patterns from data.
ML focuses on creating algorithms that learn from examples and improve over time.
The Pacific Ocean covers more area than any other ocean and is also the deepest.
The Pacific is the biggest and deepest ocean on the planet, containing the deepest trenches.
Photosynthesis enables plants to convert light energy into chemical energy and oxygen.
Plants use photosynthesis to transform sunlight into glucose and release oxygen.
The Great Wall of China stretches thousands of kilometers and is a famous historic structure.
China's Great Wall is a renowned fortified structure built over centuries across the landscape.
Python is a versatile high-level language commonly used for data analysis and scripting.
Python's simplicity and libraries make it a preferred language for data science tasks.
Mount Everest stands at about 8,848 meters and is the highest peak on Earth.
At roughly 8,848 meters above sea level, Mount Everest is the planet's tallest mountain.
Shakespeare authored plays such as Hamlet and many other notable works.
William Shakespeare wrote famous plays including Hamlet, Othello, and Macbeth.
DNA stores the genetic code that guides the growth and functioning of living organisms.
The sequence of nucleotides in DNA encodes the hereditary information for organisms.
The Eiffel Tower in Paris was finished in 1889 and measures around 330 meters tall.
Completed in 1889, the Eiffel Tower rises to approximately 330 meters above Paris.
Artificial neural networks are inspired by the structure and function of biological neurons.
Neural network models imitate networks of neurons to process information and learn.
The Amazon rainforest is estimated to produce about twenty percent of the world's oxygen.
About one-fifth of global oxygen production is attributed to the vast Amazon rainforest.
Albert Einstein introduced the theory of relativity in the early twentieth century.
Einstein developed both special and general relativity during the early 1900s.
James Naismith invented the game of basketball in 1891 while in Massachusetts.
Basketball was created by James Naismith in 1891 at a Massachusetts training school.
The adult human brain contains on the order of 86 billion neurons forming complex networks.
Researchers estimate the human brain has roughly 86 billion nerve cells.
Coffee ranks among the most widely consumed beverages worldwide each day.
Globally, coffee is one of the leading beverages consumed by millions daily.
The speed of light in a vacuum is approximately 299,792 kilometers per second.
In vacuum, light travels at about 299,792 km/s, a fundamental physical constant.
Ancient Egyptians constructed pyramids to serve as monumental tombs for their pharaohs.
The pyramids were built by the ancient Egyptians as burial complexes for their rulers.
Quantum computing harnesses quantum phenomena like superposition to perform computation.
Quantum computers use entanglement and superposition to tackle certain computational problems.
The Mona Lisa was painted by Leonardo da Vinci during the sixteenth century.
Leonardo da Vinci created the Mona Lisa in the 1500s, one of the most famous paintings.
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
<module>payara-micro-example</module>
<module>gpullama3.java-example</module>
<module>watsonx-ai-examples</module>
<module>jvector-example</module>
</modules>

</project>