Skip to content

Commit 696ed27

Browse files
feat: Adding option to return embeddings and metadata from /vector_stores/*/files/*/content and UI updates
1 parent 47ae606 commit 696ed27

File tree

8 files changed

+173
-9
lines changed

8 files changed

+173
-9
lines changed

.stats.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 89
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-0ace6fde1fda4dcf15bb1177074f61b15ffa1e574127f1d99c570f2a5fae04e9.yml
3-
openapi_spec_hash: 620ae49556af9e59880cfcf033058def
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-af20fa1866f461e9fef4f7fd226d757b0dddee907e2a083fa582ac0580735e20.yml
3+
openapi_spec_hash: 68caf264f8ade02c34456c526d7300b1
44
config_hash: e8a35d9d37cb4774b4b0fe1b167dc156

api.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ Methods:
255255
- <code title="post /v1/vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">update</a>(file_id, \*, vector_store_id, \*\*<a href="src/llama_stack_client/types/vector_stores/file_update_params.py">params</a>) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
256256
- <code title="get /v1/vector_stores/{vector_store_id}/files">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">list</a>(vector_store_id, \*\*<a href="src/llama_stack_client/types/vector_stores/file_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file.py">SyncOpenAICursorPage[VectorStoreFile]</a></code>
257257
- <code title="delete /v1/vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">delete</a>(file_id, \*, vector_store_id) -> <a href="./src/llama_stack_client/types/vector_stores/file_delete_response.py">FileDeleteResponse</a></code>
258-
- <code title="get /v1/vector_stores/{vector_store_id}/files/{file_id}/content">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">content</a>(file_id, \*, vector_store_id) -> <a href="./src/llama_stack_client/types/vector_stores/file_content_response.py">FileContentResponse</a></code>
258+
- <code title="get /v1/vector_stores/{vector_store_id}/files/{file_id}/content">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">content</a>(file_id, \*, vector_store_id, \*\*<a href="src/llama_stack_client/types/vector_stores/file_content_params.py">params</a>) -> <a href="./src/llama_stack_client/types/vector_stores/file_content_response.py">FileContentResponse</a></code>
259259

260260
## FileBatches
261261

src/llama_stack_client/resources/vector_stores/files.py

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
)
2626
from ...pagination import SyncOpenAICursorPage, AsyncOpenAICursorPage
2727
from ..._base_client import AsyncPaginator, make_request_options
28-
from ...types.vector_stores import file_list_params, file_create_params, file_update_params
28+
from ...types.vector_stores import file_list_params, file_create_params, file_update_params, file_content_params
2929
from ...types.vector_stores.vector_store_file import VectorStoreFile
3030
from ...types.vector_stores.file_delete_response import FileDeleteResponse
3131
from ...types.vector_stores.file_content_response import FileContentResponse
@@ -286,6 +286,8 @@ def content(
286286
file_id: str,
287287
*,
288288
vector_store_id: str,
289+
include_embeddings: bool | Omit = omit,
290+
include_metadata: bool | Omit = omit,
289291
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
290292
# The extra values given here take precedence over values defined on the client or passed to this method.
291293
extra_headers: Headers | None = None,
@@ -297,6 +299,10 @@ def content(
297299
Retrieves the contents of a vector store file.
298300
299301
Args:
302+
include_embeddings: Whether to include embedding vectors in the response.
303+
304+
include_metadata: Whether to include chunk metadata in the response.
305+
300306
extra_headers: Send extra headers
301307
302308
extra_query: Add additional query parameters to the request
@@ -312,7 +318,17 @@ def content(
312318
return self._get(
313319
f"/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
314320
options=make_request_options(
315-
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
321+
extra_headers=extra_headers,
322+
extra_query=extra_query,
323+
extra_body=extra_body,
324+
timeout=timeout,
325+
query=maybe_transform(
326+
{
327+
"include_embeddings": include_embeddings,
328+
"include_metadata": include_metadata,
329+
},
330+
file_content_params.FileContentParams,
331+
),
316332
),
317333
cast_to=FileContentResponse,
318334
)
@@ -571,6 +587,8 @@ async def content(
571587
file_id: str,
572588
*,
573589
vector_store_id: str,
590+
include_embeddings: bool | Omit = omit,
591+
include_metadata: bool | Omit = omit,
574592
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
575593
# The extra values given here take precedence over values defined on the client or passed to this method.
576594
extra_headers: Headers | None = None,
@@ -582,6 +600,10 @@ async def content(
582600
Retrieves the contents of a vector store file.
583601
584602
Args:
603+
include_embeddings: Whether to include embedding vectors in the response.
604+
605+
include_metadata: Whether to include chunk metadata in the response.
606+
585607
extra_headers: Send extra headers
586608
587609
extra_query: Add additional query parameters to the request
@@ -597,7 +619,17 @@ async def content(
597619
return await self._get(
598620
f"/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
599621
options=make_request_options(
600-
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
622+
extra_headers=extra_headers,
623+
extra_query=extra_query,
624+
extra_body=extra_body,
625+
timeout=timeout,
626+
query=await async_maybe_transform(
627+
{
628+
"include_embeddings": include_embeddings,
629+
"include_metadata": include_metadata,
630+
},
631+
file_content_params.FileContentParams,
632+
),
601633
),
602634
cast_to=FileContentResponse,
603635
)

src/llama_stack_client/types/vector_store_search_response.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,45 @@
1111

1212
from .._models import BaseModel
1313

14-
__all__ = ["VectorStoreSearchResponse", "Data", "DataContent"]
14+
__all__ = ["VectorStoreSearchResponse", "Data", "DataContent", "DataContentChunkMetadata"]
15+
16+
17+
class DataContentChunkMetadata(BaseModel):
18+
chunk_embedding_dimension: Optional[int] = None
19+
"""The dimension of the embedding vector for the chunk."""
20+
21+
chunk_embedding_model: Optional[str] = None
22+
"""The embedding model used to create the chunk's embedding."""
23+
24+
chunk_id: Optional[str] = None
25+
"""The ID of the chunk.
26+
27+
If not set, it will be generated based on the document ID and content.
28+
"""
29+
30+
chunk_tokenizer: Optional[str] = None
31+
"""The tokenizer used to create the chunk. Default is Tiktoken."""
32+
33+
chunk_window: Optional[str] = None
34+
"""The window of the chunk, which can be used to group related chunks together."""
35+
36+
content_token_count: Optional[int] = None
37+
"""The number of tokens in the content of the chunk."""
38+
39+
created_timestamp: Optional[int] = None
40+
"""An optional timestamp indicating when the chunk was created."""
41+
42+
document_id: Optional[str] = None
43+
"""The ID of the document this chunk belongs to."""
44+
45+
metadata_token_count: Optional[int] = None
46+
"""The number of tokens in the metadata of the chunk."""
47+
48+
source: Optional[str] = None
49+
"""The source of the content, such as a URL, file path, or other identifier."""
50+
51+
updated_timestamp: Optional[int] = None
52+
"""An optional timestamp indicating when the chunk was last updated."""
1553

1654

1755
class DataContent(BaseModel):
@@ -21,6 +59,15 @@ class DataContent(BaseModel):
2159
type: Literal["text"]
2260
"""Content type, currently only "text" is supported"""
2361

62+
chunk_metadata: Optional[DataContentChunkMetadata] = None
63+
"""Optional chunk metadata"""
64+
65+
embedding: Optional[List[float]] = None
66+
"""Optional embedding vector for this content chunk"""
67+
68+
metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
69+
"""Optional user-defined metadata"""
70+
2471

2572
class Data(BaseModel):
2673
content: List[DataContent]

src/llama_stack_client/types/vector_stores/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from .vector_store_file import VectorStoreFile as VectorStoreFile
1313
from .file_create_params import FileCreateParams as FileCreateParams
1414
from .file_update_params import FileUpdateParams as FileUpdateParams
15+
from .file_content_params import FileContentParams as FileContentParams
1516
from .file_delete_response import FileDeleteResponse as FileDeleteResponse
1617
from .file_content_response import FileContentResponse as FileContentResponse
1718
from .file_batch_create_params import FileBatchCreateParams as FileBatchCreateParams
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2+
3+
from __future__ import annotations
4+
5+
from typing_extensions import Required, TypedDict
6+
7+
__all__ = ["FileContentParams"]
8+
9+
10+
class FileContentParams(TypedDict, total=False):
11+
vector_store_id: Required[str]
12+
13+
include_embeddings: bool
14+
"""Whether to include embedding vectors in the response."""
15+
16+
include_metadata: bool
17+
"""Whether to include chunk metadata in the response."""

src/llama_stack_client/types/vector_stores/file_content_response.py

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,50 @@
66

77
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
88

9-
from typing import List, Optional
9+
from typing import Dict, List, Union, Optional
1010
from typing_extensions import Literal
1111

1212
from ..._models import BaseModel
1313

14-
__all__ = ["FileContentResponse", "Data"]
14+
__all__ = ["FileContentResponse", "Data", "DataChunkMetadata"]
15+
16+
17+
class DataChunkMetadata(BaseModel):
18+
chunk_embedding_dimension: Optional[int] = None
19+
"""The dimension of the embedding vector for the chunk."""
20+
21+
chunk_embedding_model: Optional[str] = None
22+
"""The embedding model used to create the chunk's embedding."""
23+
24+
chunk_id: Optional[str] = None
25+
"""The ID of the chunk.
26+
27+
If not set, it will be generated based on the document ID and content.
28+
"""
29+
30+
chunk_tokenizer: Optional[str] = None
31+
"""The tokenizer used to create the chunk. Default is Tiktoken."""
32+
33+
chunk_window: Optional[str] = None
34+
"""The window of the chunk, which can be used to group related chunks together."""
35+
36+
content_token_count: Optional[int] = None
37+
"""The number of tokens in the content of the chunk."""
38+
39+
created_timestamp: Optional[int] = None
40+
"""An optional timestamp indicating when the chunk was created."""
41+
42+
document_id: Optional[str] = None
43+
"""The ID of the document this chunk belongs to."""
44+
45+
metadata_token_count: Optional[int] = None
46+
"""The number of tokens in the metadata of the chunk."""
47+
48+
source: Optional[str] = None
49+
"""The source of the content, such as a URL, file path, or other identifier."""
50+
51+
updated_timestamp: Optional[int] = None
52+
"""An optional timestamp indicating when the chunk was last updated."""
1553

1654

1755
class Data(BaseModel):
@@ -21,6 +59,15 @@ class Data(BaseModel):
2159
type: Literal["text"]
2260
"""Content type, currently only "text" is supported"""
2361

62+
chunk_metadata: Optional[DataChunkMetadata] = None
63+
"""Optional chunk metadata"""
64+
65+
embedding: Optional[List[float]] = None
66+
"""Optional embedding vector for this content chunk"""
67+
68+
metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
69+
"""Optional user-defined metadata"""
70+
2471

2572
class FileContentResponse(BaseModel):
2673
data: List[Data]

tests/api_resources/vector_stores/test_files.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,16 @@ def test_method_content(self, client: LlamaStackClient) -> None:
287287
)
288288
assert_matches_type(FileContentResponse, file, path=["response"])
289289

290+
@parametrize
291+
def test_method_content_with_all_params(self, client: LlamaStackClient) -> None:
292+
file = client.vector_stores.files.content(
293+
file_id="file_id",
294+
vector_store_id="vector_store_id",
295+
include_embeddings=True,
296+
include_metadata=True,
297+
)
298+
assert_matches_type(FileContentResponse, file, path=["response"])
299+
290300
@parametrize
291301
def test_raw_response_content(self, client: LlamaStackClient) -> None:
292302
response = client.vector_stores.files.with_raw_response.content(
@@ -592,6 +602,16 @@ async def test_method_content(self, async_client: AsyncLlamaStackClient) -> None
592602
)
593603
assert_matches_type(FileContentResponse, file, path=["response"])
594604

605+
@parametrize
606+
async def test_method_content_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
607+
file = await async_client.vector_stores.files.content(
608+
file_id="file_id",
609+
vector_store_id="vector_store_id",
610+
include_embeddings=True,
611+
include_metadata=True,
612+
)
613+
assert_matches_type(FileContentResponse, file, path=["response"])
614+
595615
@parametrize
596616
async def test_raw_response_content(self, async_client: AsyncLlamaStackClient) -> None:
597617
response = await async_client.vector_stores.files.with_raw_response.content(

0 commit comments

Comments
 (0)