From 311831ec1ceb5ee206da3ab56e6c49d75a1a1b36 Mon Sep 17 00:00:00 2001 From: ianrob Date: Fri, 1 May 2026 14:18:33 +0100 Subject: [PATCH] Update docs so that most examples show enebaling only chunk indexes (overriding default of chunk+statement) --- docs-site/src/content/docs/index.mdx | 3 +- .../docs/lexical-graph/batch-extraction.mdx | 5 +- .../content/docs/lexical-graph/indexing.mdx | 15 +++- .../lexical-graph/semantic-guided-search.mdx | 17 +++-- .../lexical-graph/traversal-based-search.mdx | 8 ++- .../vector-store-neptune-analytics.mdx | 5 +- .../vector-store-opensearch-serverless.mdx | 5 +- .../lexical-graph/vector-store-s3-vectors.mdx | 5 +- .../docs/lexical-graph/versioned-updates.mdx | 70 +++++++++++++++---- 9 files changed, 106 insertions(+), 27 deletions(-) diff --git a/docs-site/src/content/docs/index.mdx b/docs-site/src/content/docs/index.mdx index 9375d10b..7692a772 100644 --- a/docs-site/src/content/docs/index.mdx +++ b/docs-site/src/content/docs/index.mdx @@ -52,7 +52,8 @@ with ( 'neptune-db://my-graph.cluster-abcdefghijkl.us-east-1.neptune.amazonaws.com' ) as graph_store, VectorStoreFactory.for_vector_store( - 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com' + 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com', + index_names=['chunk'] ) as vector_store, ): graph_index = LexicalGraphIndex(graph_store, vector_store) diff --git a/docs-site/src/content/docs/lexical-graph/batch-extraction.mdx b/docs-site/src/content/docs/lexical-graph/batch-extraction.mdx index 2395aa3e..7bceeaa9 100644 --- a/docs-site/src/content/docs/lexical-graph/batch-extraction.mdx +++ b/docs-site/src/content/docs/lexical-graph/batch-extraction.mdx @@ -46,7 +46,10 @@ def batch_extract_and_load(): with ( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): graph_index = LexicalGraphIndex( diff --git a/docs-site/src/content/docs/lexical-graph/indexing.mdx b/docs-site/src/content/docs/lexical-graph/indexing.mdx index 0bcba2bf..f8a0d6d7 100644 --- a/docs-site/src/content/docs/lexical-graph/indexing.mdx +++ b/docs-site/src/content/docs/lexical-graph/indexing.mdx @@ -69,7 +69,10 @@ docs = SimpleWebPageReader( with ( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): graph_index = LexicalGraphIndex( @@ -124,7 +127,10 @@ extracted_docs = S3BasedDocs( with ( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): graph_index = LexicalGraphIndex( @@ -166,7 +172,10 @@ docs = S3BasedDocs( with ( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): graph_index = LexicalGraphIndex( diff --git a/docs-site/src/content/docs/lexical-graph/semantic-guided-search.mdx b/docs-site/src/content/docs/lexical-graph/semantic-guided-search.mdx index 9c200e0a..7718fe88 100644 --- a/docs-site/src/content/docs/lexical-graph/semantic-guided-search.mdx +++ b/docs-site/src/content/docs/lexical-graph/semantic-guided-search.mdx @@ -10,6 +10,11 @@ The recommended method for query and retrieval is to used the [traversal-based s This page contains the semantic-guided search documentation. +### Enable statement indexes + +Semantic-guided search requires statement-level vector indexes. By default, the `VectorStoreFactory` will enable both a statement index and a chunk index. In the future, this behaviour may change so that only chunk-based indexes are created by default. Therefore, if you choose to use semantic-guided search, we recommend explictly enabling both the chunk and statement indexes using the `index_names` parameter. The examples below all include this explicit configuration. + + ### Example The following example uses semantic-guided search with all the default settings to query the graph: @@ -24,7 +29,8 @@ with ( 'neptune-db://my-graph.cluster-abcdefghijkl.us-east-1.neptune.amazonaws.com' ) as graph_store, VectorStoreFactory.for_vector_store( - 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com' + 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com', + index_names=['chunk', 'statement'] ) as vector_store ): @@ -121,7 +127,8 @@ with ( 'neptune-db://my-graph.cluster-abcdefghijkl.us-east-1.neptune.amazonaws.com' ) as graph_store, VectorStoreFactory.for_vector_store( - 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com' + 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com', + index_names=['chunk', 'statement'] ) as vector_store ): @@ -181,7 +188,8 @@ with ( 'neptune-db://my-graph.cluster-abcdefghijkl.us-east-1.neptune.amazonaws.com' ) as graph_store, VectorStoreFactory.for_vector_store( - 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com' + 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com', + index_names=['chunk', 'statement'] ) as vector_store ): @@ -251,7 +259,8 @@ with ( 'neptune-db://my-graph.cluster-abcdefghijkl.us-east-1.neptune.amazonaws.com' ) as graph_store, VectorStoreFactory.for_vector_store( - 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com' + 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com', + index_names=['chunk', 'statement'] ) as vector_store ): diff --git a/docs-site/src/content/docs/lexical-graph/traversal-based-search.mdx b/docs-site/src/content/docs/lexical-graph/traversal-based-search.mdx index 7f1e5fa8..62d0eb2c 100644 --- a/docs-site/src/content/docs/lexical-graph/traversal-based-search.mdx +++ b/docs-site/src/content/docs/lexical-graph/traversal-based-search.mdx @@ -6,6 +6,7 @@ title: Traversal-Based Search - [Overview](#overview) - [Example](#example) + - Enable chunk indexes(#enable-chunk-indexes) - [Basic concepts](#basic-concepts) - [Connectivity types](#connectivity-types) - [Entity network contexts](#entity-network-contexts) @@ -24,6 +25,10 @@ For optimal results, users should use traversal-based search in their applicatio Traversal-based search can be used in two ways: retrieval and querying. When you perform a retrieval operation, the system searches the graph and vector stores to find the most relevant information related to your query. It then returns these raw search results directly to you. With a query operation, the system takes an extra step. After finding the relevant information, it passes these results to a Large Language Model (LLM). The LLM processes this information and generates a natural language response that answers your query. +### Enable chunk indexes + +Traversal-based search requires chunk-level vector indexes. By default, the `VectorStoreFactory` will enable both a statement index and a chunk index. If you plan to only use traversal-based search in your application (recommended), then you should disable the creation of statement indexes, thereby saving storage space and embedding costs. Use the `index_names` argument to enable just the chunk index. The examples throughout this documentation explicitly configure only the chunk index using the `index_names` argument. + ### Example The following example performs a traversal-based search using the default settings: @@ -38,7 +43,8 @@ with ( 'neptune-db://my-graph.cluster-abcdefghijkl.us-east-1.neptune.amazonaws.com' ) as graph_store, VectorStoreFactory.for_vector_store( - 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com' + 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com', + index_names=['chunk'] ) as vector_store ): diff --git a/docs-site/src/content/docs/lexical-graph/vector-store-neptune-analytics.mdx b/docs-site/src/content/docs/lexical-graph/vector-store-neptune-analytics.mdx index c853f787..03dbce2e 100644 --- a/docs-site/src/content/docs/lexical-graph/vector-store-neptune-analytics.mdx +++ b/docs-site/src/content/docs/lexical-graph/vector-store-neptune-analytics.mdx @@ -22,6 +22,9 @@ from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory neptune_connection_info = 'neptune-graph://g-jbzzaqb209' -with VectorStoreFactory.for_vector_store(neptune_connection_info) as vector_store: +with VectorStoreFactory.for_vector_store( + neptune_connection_info, + index_names=['chunk'] +) as vector_store: ... ``` diff --git a/docs-site/src/content/docs/lexical-graph/vector-store-opensearch-serverless.mdx b/docs-site/src/content/docs/lexical-graph/vector-store-opensearch-serverless.mdx index 87f04334..b18a0839 100644 --- a/docs-site/src/content/docs/lexical-graph/vector-store-opensearch-serverless.mdx +++ b/docs-site/src/content/docs/lexical-graph/vector-store-opensearch-serverless.mdx @@ -33,7 +33,10 @@ from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory opensearch_connection_info = 'aoss://https://123456789012.us-east-1.aoss.amazonaws.com' -with VectorStoreFactory.for_vector_store(opensearch_connection_info) as vector_store: +with VectorStoreFactory.for_vector_store( + opensearch_connection_info, + index_names=['chunk'] +) as vector_store: ... ``` diff --git a/docs-site/src/content/docs/lexical-graph/vector-store-s3-vectors.mdx b/docs-site/src/content/docs/lexical-graph/vector-store-s3-vectors.mdx index 1a92dc88..c40c92a5 100644 --- a/docs-site/src/content/docs/lexical-graph/vector-store-s3-vectors.mdx +++ b/docs-site/src/content/docs/lexical-graph/vector-store-s3-vectors.mdx @@ -32,7 +32,10 @@ from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory s3_vectors_connection_info = 's3vectors://my-s3-vectors-bucket/app1' -with VectorStoreFactory.for_vector_store(s3_vectors_connection_info) as vector_store: +with VectorStoreFactory.for_vector_store( + s3_vectors_connection_info, + index_names=['chunk'] +) as vector_store: ... ``` diff --git a/docs-site/src/content/docs/lexical-graph/versioned-updates.mdx b/docs-site/src/content/docs/lexical-graph/versioned-updates.mdx index 3aa1f342..4ae46ed1 100644 --- a/docs-site/src/content/docs/lexical-graph/versioned-updates.mdx +++ b/docs-site/src/content/docs/lexical-graph/versioned-updates.mdx @@ -94,7 +94,10 @@ def get_file_metadata(file_path): with( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): graph_index = LexicalGraphIndex( graph_store, @@ -151,7 +154,10 @@ GraphRAGConfig.enable_versioning = True with( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): query_engine = LexicalGraphQueryEngine.for_traversal_based_search( graph_store, @@ -170,7 +176,10 @@ from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine with( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): query_engine = LexicalGraphQueryEngine.for_traversal_based_search( graph_store, @@ -192,7 +201,10 @@ GraphRAGConfig.enable_versioning = True with( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): query_engine = LexicalGraphQueryEngine.for_traversal_based_search( graph_store, @@ -216,7 +228,10 @@ from llama_index.core.vector_stores.types import FilterOperator, MetadataFilter with( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): query_engine = LexicalGraphQueryEngine.for_traversal_based_search( graph_store, @@ -282,7 +297,10 @@ from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory with ( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): graph_index = LexicalGraphIndex( @@ -408,7 +426,10 @@ from graphrag_toolkit.lexical_graph.versioning import VersioningConfig, Versioni with ( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): graph_index = LexicalGraphIndex( @@ -437,7 +458,10 @@ from graphrag_toolkit.lexical_graph.versioning import VersioningConfig, Versioni with ( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): graph_index = LexicalGraphIndex( @@ -466,7 +490,10 @@ from graphrag_toolkit.lexical_graph.versioning import VersioningConfig, Versioni with ( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): graph_index = LexicalGraphIndex( @@ -510,7 +537,10 @@ from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory with ( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): graph_index = LexicalGraphIndex( @@ -542,7 +572,10 @@ from graphrag_toolkit.lexical_graph.versioning import VersioningConfig, Versioni with ( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): graph_index = LexicalGraphIndex( @@ -578,7 +611,10 @@ from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory with ( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): graph_index = LexicalGraphIndex( @@ -622,7 +658,10 @@ def get_file_metadata(file_path): with( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): graph_index = LexicalGraphIndex( graph_store, @@ -666,7 +705,10 @@ def deletion_protection_filter_fn(metadata): with( GraphStoreFactory.for_graph_store(os.environ['GRAPH_STORE']) as graph_store, - VectorStoreFactory.for_vector_store(os.environ['VECTOR_STORE']) as vector_store + VectorStoreFactory.for_vector_store( + os.environ['VECTOR_STORE'], + index_names=['chunk'] + ) as vector_store ): graph_index = LexicalGraphIndex( graph_store,