Skip to content

Barebones NumPy vector store backed by a SQLite document store

License

Notifications You must be signed in to change notification settings

dwisdom0/vector_db_at_home

Repository files navigation

Mom, can we have milvus-lite?

No, we have an embedded vector database at home.

Quickstart

from pprint import pprint

import numpy as np

from vector_db_at_home import VectorStore

docs = [
    {"name": "Alice", "ice_cream_flavor": "Apricot"},
    {"name": "Bob", "ice_cream_flavor": "Banana Nut"},
    {"name": "Christy", "ice_cream_flavor": "Chocolate Chip Cookie Dough"},
    {"name": "Devin", "ice_cream_flavor": "Dulce de Leche"},
]

# usually you would use vectors from an embedding model
# or have some other way to create meaningful vectors
vector_dim = 10
vecs = np.random.rand(len(docs), vector_dim)

vs = VectorStore(db_path="my_vector_store.sqlite3", dim=vector_dim)

vs.insert(vecs, docs)

search_query = np.ones((1, vector_dim))

results = vs.search(search_query, k=len(docs))

pprint(results)
[[{'distance': np.float32(1.6019362),
   'doc': {'ice_cream_flavor': 'Dulce de Leche', 'name': 'Devin'},
   'id': 3,
   'vec': array([0.49663496, 0.38377824, 0.6321332 , 0.45838   , 0.14038095,
       0.5488281 , 0.8200291 , 0.6347742 , 0.82873976, 0.39435542],
      dtype=float32)},
  {'distance': np.float32(1.6356196),
   'doc': {'ice_cream_flavor': 'Apricot', 'name': 'Alice'},
   'id': 0,
   'vec': array([0.3221011 , 0.70819205, 0.7256356 , 0.93855625, 0.59427375,
       0.79462117, 0.74481624, 0.6318444 , 0.17722918, 0.01658864],
      dtype=float32)},
  {'distance': np.float32(1.754901),
   'doc': {'ice_cream_flavor': 'Chocolate Chip Cookie Dough',
           'name': 'Christy'},
   'id': 2,
   'vec': array([0.4852044 , 0.86260295, 0.20924412, 0.6022649 , 0.10757214,
       0.5913356 , 0.04581964, 0.77801263, 0.77060676, 0.8089424 ],
      dtype=float32)},
  {'distance': np.float32(1.7927729),
   'doc': {'ice_cream_flavor': 'Banana Nut', 'name': 'Bob'},
   'id': 1,
   'vec': array([0.02818148, 0.79818   , 0.52629197, 0.65800613, 0.8545857 ,
       0.6708415 , 0.12327503, 0.9758107 , 0.9879582 , 0.00569335],
      dtype=float32)}]]

And you can go look at it in SQLite as well

% sqlite3 my_vector_store.sqlite3
sqlite> .tables
vector
sqlite> .d vector
{...snip...}
CREATE TABLE vector (
    id INTEGER PRIMARY KEY,
    vec BLOB NOT NULL,
    doc TEXT
);
{...snip...}
sqlite> .mode lines
sqlite> select id, doc, hex(vec) from vector;
      id = 0
     doc = {"name": "Alice", "ice_cream_flavor": "Apricot"}
hex(vec) = 6FEAA43E134C353F41C3393F3945703F5322183F4B6C4B3F47AC3E3F8EC0213F917B353EE7E4873C

      id = 1
     doc = {"name": "Bob", "ice_cream_flavor": "Banana Nut"}
hex(vec) = DBDCE63C86554C3F12BB063F1773283F21C65A3F45BC2B3F9E77FC3DBBCE793FD4EA7C3F498FBA3B

      id = 2
     doc = {"name": "Christy", "ice_cream_flavor": "Chocolate Chip Cookie Dough"}
hex(vec) = B66CF83E8CD35C3F1744563E082E1A3FC84EDC3DC561173F61AD3B3DD62B473F7C46453FD9164F3F

      id = 3
     doc = {"name": "Devin", "ice_cream_flavor": "Dulce de Leche"}
hex(vec) = F046FE3E957EC43E7BD3213FC9B0EA3E06C00F3E00800C3F6DED513F9080223F4A28543FF4E8C93E

About

Barebones NumPy vector store backed by a SQLite document store

Resources

License

Stars

Watchers

Forks

Packages

No packages published

Languages