diff --git a/Cargo.lock b/Cargo.lock index e19cfa0..fbbc655 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -32,11 +32,20 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "api" @@ -45,6 +54,7 @@ dependencies = [ "defs", "index", "snafu", + "snapshot", "storage", "tempfile", "uuid", @@ -67,6 +77,12 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "axum" version = "0.8.8" @@ -176,14 +192,14 @@ version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cexpr", "clang-sys", "itertools 0.13.0", "proc-macro2", "quote", "regex", - "rustc-hash 2.1.1", + "rustc-hash 2.1.2", "shlex", "syn", ] @@ -196,21 +212,30 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.10.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] [[package]] name = "bumpalo" -version = "3.19.1" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "bzip2-sys" @@ -239,9 +264,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.51" +version = "1.2.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203" +checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1" dependencies = [ "find-msvc-tools", "jobserver", @@ -264,6 +289,20 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + [[package]] name = "clang-sys" version = "1.8.1" @@ -325,19 +364,47 @@ dependencies = [ "libc", ] +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossterm" version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "crossterm_winapi", "libc", "mio 0.8.11", @@ -356,6 +423,22 @@ dependencies = [ "winapi", ] +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "data-encoding" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" + [[package]] name = "defs" version = "0.1.0" @@ -364,6 +447,16 @@ dependencies = [ "uuid", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -428,11 +521,22 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "filetime" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db" +dependencies = [ + "cfg-if", + "libc", + "libredox", +] + [[package]] name = "find-msvc-tools" -version = "0.1.6" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "fixedbitset" @@ -440,6 +544,16 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -476,11 +590,21 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -488,33 +612,33 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-core", "futures-io", @@ -522,15 +646,24 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "libc", @@ -545,10 +678,23 @@ checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", - "r-efi", + "r-efi 5.3.0", "wasip2", ] +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + [[package]] name = "gimli" version = "0.32.3" @@ -585,9 +731,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" dependencies = [ "atomic-waker", "bytes", @@ -755,14 +901,13 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.19" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "base64", "bytes", "futures-channel", - "futures-core", "futures-util", "http 1.4.0", "http-body", @@ -779,6 +924,30 @@ dependencies = [ "windows-registry", ] +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "2.1.1" @@ -860,6 +1029,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "idna" version = "1.1.0" @@ -891,33 +1066,38 @@ checksum = "964de6e86d545b246d84badc0fef527924ace5134f30641c203ef52ba83f58d5" name = "index" version = "0.1.0" dependencies = [ + "bincode", "defs", "rand", + "serde", "snafu", + "storage", "uuid", ] [[package]] name = "indexmap" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown 0.16.1", + "serde", + "serde_core", ] [[package]] name = "ipnet" -version = "2.11.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" -version = "0.7.9" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" +checksum = "d8e7418f59cc01c88316161279a7f665217ae316b388e58a0d10e29f54f1e5eb" dependencies = [ "memchr", "serde", @@ -952,9 +1132,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "jobserver" @@ -968,10 +1148,12 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.83" +version = "0.3.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "cc4c90f45aa2e6eacbe8645f77fdea542ac97a494bcd117a67df9ff4d611f995" dependencies = [ + "cfg-if", + "futures-util", "once_cell", "wasm-bindgen", ] @@ -988,11 +1170,17 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" -version = "0.2.178" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "libloading" @@ -1004,6 +1192,18 @@ dependencies = [ "windows-link", ] +[[package]] +name = "libredox" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ddbf48fd451246b1f8c2610bd3b4ac0cc6e149d89832867093ab69a17194f08" +dependencies = [ + "bitflags 2.11.0", + "libc", + "plain", + "redox_syscall 0.7.3", +] + [[package]] name = "librocksdb-sys" version = "0.11.0+8.1.1" @@ -1022,9 +1222,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.23" +version = "1.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15d118bbf3771060e7311cc7bb0545b01d08a8b4a7de949198dec1fa0ca1c0f7" +checksum = "d52f4c29e2a68ac30c9087e1b772dc9f44a2b66ed44edf2266cf2be9b03dafc1" dependencies = [ "cc", "pkg-config", @@ -1033,9 +1233,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" @@ -1085,9 +1285,9 @@ checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "mime" @@ -1118,6 +1318,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -1134,9 +1335,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" dependencies = [ "libc", "wasi", @@ -1151,9 +1352,9 @@ checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" [[package]] name = "native-tls" -version = "0.2.14" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" +checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" dependencies = [ "libc", "log", @@ -1185,6 +1386,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "object" version = "0.37.3" @@ -1196,17 +1406,17 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "openssl" -version = "0.10.75" +version = "0.10.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" +checksum = "951c002c75e16ea2c65b8c7e4d3d51d5530d8dfa7d060b4776828c88cfb18ecf" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cfg-if", "foreign-types", "libc", @@ -1228,15 +1438,15 @@ dependencies = [ [[package]] name = "openssl-probe" -version = "0.1.6" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" [[package]] name = "openssl-sys" -version = "0.9.111" +version = "0.9.112" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" +checksum = "57d55af3b3e226502be1526dfdba67ab0e9c96fc293004e79576b2b9edb0dbdb" dependencies = [ "cc", "libc", @@ -1246,9 +1456,9 @@ dependencies = [ [[package]] name = "owo-colors" -version = "4.2.3" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c6901729fa79e91a0913333229e9ca5dc725089d1c363b2f4b4760709dc4a52" +checksum = "d211803b9b6b570f68772237e415a029d5a50c65d382910b879fb19d3271f94d" [[package]] name = "parking_lot" @@ -1268,7 +1478,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.5.18", "smallvec", "windows-link", ] @@ -1293,28 +1503,29 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "petgraph" -version = "0.7.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", + "hashbrown 0.15.5", "indexmap", ] [[package]] name = "pin-project" -version = "1.1.10" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.10" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" dependencies = [ "proc-macro2", "quote", @@ -1323,9 +1534,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] name = "pin-utils" @@ -1339,6 +1550,12 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + [[package]] name = "potential_utf" version = "0.1.4" @@ -1369,18 +1586,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "prost" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", "prost-derive", @@ -1388,15 +1605,14 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac6c3320f9abac597dcbc668774ef006702672474aad53c6d596b62e487b40b1" +checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", "itertools 0.14.0", "log", "multimap", - "once_cell", "petgraph", "prettyplease", "prost", @@ -1410,9 +1626,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools 0.14.0", @@ -1423,38 +1639,38 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ "prost", ] [[package]] name = "pulldown-cmark" -version = "0.13.0" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e8bbe1a966bd2f362681a44f6edce3c2310ac21e4d5067a6e7ec396297a6ea0" +checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "memchr", "unicase", ] [[package]] name = "pulldown-cmark-to-cmark" -version = "21.1.0" +version = "22.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8246feae3db61428fd0bb94285c690b460e4517d83152377543ca802357785f1" +checksum = "50793def1b900256624a709439404384204a5dc3a6ec580281bfaac35e882e90" dependencies = [ "pulldown-cmark", ] [[package]] name = "quote" -version = "1.0.42" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -1465,6 +1681,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "rand" version = "0.9.2" @@ -1487,9 +1709,9 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ "getrandom 0.3.4", ] @@ -1500,7 +1722,7 @@ version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f44c9e68fd46eda15c646fbb85e1040b657a58cdc8c98db1d97a55930d991eef" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cassowary", "compact_str", "crossterm", @@ -1520,14 +1742,23 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", +] + +[[package]] +name = "redox_syscall" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16" +dependencies = [ + "bitflags 2.11.0", ] [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -1537,9 +1768,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -1548,9 +1779,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "reqwest" @@ -1603,7 +1834,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.17", "libc", "untrusted", "windows-sys 0.52.0", @@ -1621,9 +1852,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" [[package]] name = "rustc-hash" @@ -1633,17 +1864,17 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc-hash" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" [[package]] name = "rustix" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "errno", "libc", "linux-raw-sys", @@ -1652,9 +1883,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.35" +version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ "once_cell", "rustls-pki-types", @@ -1665,18 +1896,18 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.2" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ "zeroize", ] [[package]] name = "rustls-webpki" -version = "0.103.8" +version = "0.103.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" dependencies = [ "ring", "rustls-pki-types", @@ -1691,15 +1922,15 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "schannel" -version = "0.1.28" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" dependencies = [ "windows-sys 0.61.2", ] @@ -1712,12 +1943,12 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "security-framework" -version = "2.11.1" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ - "bitflags 2.10.0", - "core-foundation", + "bitflags 2.11.0", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -1725,14 +1956,20 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.15.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" dependencies = [ "core-foundation-sys", "libc", ] +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + [[package]] name = "serde" version = "1.0.228" @@ -1765,9 +2002,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.148" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3084b546a1dd6289475996f182a22aba973866ea8e8b02c51d9f46b1336a22da" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", @@ -1823,6 +2060,17 @@ dependencies = [ "uuid", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -1869,11 +2117,17 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "smallvec" @@ -1902,14 +2156,34 @@ dependencies = [ "syn", ] +[[package]] +name = "snapshot" +version = "0.1.0" +dependencies = [ + "chrono", + "data-encoding", + "defs", + "flate2", + "fs2", + "index", + "semver", + "serde", + "serde_json", + "sha2", + "storage", + "tar", + "tempfile", + "uuid", +] + [[package]] name = "socket2" -version = "0.6.1" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -1940,8 +2214,11 @@ version = "0.1.0" dependencies = [ "bincode", "defs", + "flate2", "rocksdb", + "serde", "snafu", + "tar", "tempfile", "uuid", ] @@ -1976,9 +2253,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.111" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -2007,12 +2284,12 @@ dependencies = [ [[package]] name = "system-configuration" -version = "0.6.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ - "bitflags 2.10.0", - "core-foundation", + "bitflags 2.11.0", + "core-foundation 0.9.4", "system-configuration-sys", ] @@ -2026,14 +2303,25 @@ dependencies = [ "libc", ] +[[package]] +name = "tar" +version = "0.4.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22692a6476a21fa75fdfc11d452fda482af402c008cdbaf3476414e122040973" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempfile" -version = "3.24.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.2", "once_cell", "rustix", "windows-sys 0.61.2", @@ -2060,13 +2348,13 @@ dependencies = [ [[package]] name = "tokio" -version = "1.48.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" dependencies = [ "bytes", "libc", - "mio 1.1.1", + "mio 1.2.0", "parking_lot", "pin-project-lite", "signal-hook-registry", @@ -2077,9 +2365,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" dependencies = [ "proc-macro2", "quote", @@ -2108,9 +2396,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" dependencies = [ "futures-core", "pin-project-lite", @@ -2119,9 +2407,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.17" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", @@ -2132,9 +2420,9 @@ dependencies = [ [[package]] name = "tonic" -version = "0.14.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203" +checksum = "fec7c61a0695dc1887c1b53952990f3ad2e3a31453e1f49f10e75424943a93ec" dependencies = [ "async-trait", "axum", @@ -2161,9 +2449,9 @@ dependencies = [ [[package]] name = "tonic-build" -version = "0.14.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c40aaccc9f9eccf2cd82ebc111adc13030d23e887244bc9cfa5d1d636049de3" +checksum = "1882ac3bf5ef12877d7ed57aad87e75154c11931c2ba7e6cde5e22d63522c734" dependencies = [ "prettyplease", "proc-macro2", @@ -2173,9 +2461,9 @@ dependencies = [ [[package]] name = "tonic-prost" -version = "0.14.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67" +checksum = "a55376a0bbaa4975a3f10d009ad763d8f4108f067c7c2e74f3001fb49778d309" dependencies = [ "bytes", "prost", @@ -2184,9 +2472,9 @@ dependencies = [ [[package]] name = "tonic-prost-build" -version = "0.14.2" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4a16cba4043dc3ff43fcb3f96b4c5c154c64cbd18ca8dce2ab2c6a451d058a2" +checksum = "f3144df636917574672e93d0f56d7edec49f90305749c668df5101751bb8f95a" dependencies = [ "prettyplease", "proc-macro2", @@ -2200,9 +2488,9 @@ dependencies = [ [[package]] name = "tower" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", @@ -2223,7 +2511,7 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "bytes", "futures-util", "http 1.4.0", @@ -2303,9 +2591,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.22" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" dependencies = [ "nu-ansi-term", "sharded-slab", @@ -2340,23 +2628,29 @@ dependencies = [ "uuid", ] +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + [[package]] name = "unicase" -version = "2.8.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" [[package]] name = "unicode-truncate" @@ -2375,6 +2669,12 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "untrusted" version = "0.9.0" @@ -2383,9 +2683,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.7" +version = "2.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" dependencies = [ "form_urlencoded", "idna", @@ -2401,11 +2701,11 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.19.0" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" +checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" dependencies = [ - "getrandom 0.3.4", + "getrandom 0.4.2", "js-sys", "serde_core", "wasm-bindgen", @@ -2423,6 +2723,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "want" version = "0.3.1" @@ -2440,18 +2746,27 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.106" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +checksum = "6523d69017b7633e396a89c5efab138161ed5aafcbc8d3e5c5a42ae38f50495a" dependencies = [ "cfg-if", "once_cell", @@ -2462,22 +2777,19 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.56" +version = "0.4.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" +checksum = "2d1faf851e778dfa54db7cd438b70758eba9755cb47403f3496edd7c8fc212f0" dependencies = [ - "cfg-if", "js-sys", - "once_cell", "wasm-bindgen", - "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.106" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +checksum = "4e3a6c758eb2f701ed3d052ff5737f5bfe6614326ea7f3bbac7156192dc32e67" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2485,9 +2797,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.106" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +checksum = "921de2737904886b52bcbb237301552d05969a6f9c40d261eb0533c8b055fedf" dependencies = [ "bumpalo", "proc-macro2", @@ -2498,18 +2810,52 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.106" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +checksum = "a93e946af942b58934c604527337bad9ae33ba1d5c6900bbb41c2c07c2364a93" dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.11.0", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + [[package]] name = "web-sys" -version = "0.3.83" +version = "0.3.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +checksum = "84cde8507f4d7cfcb1185b8cb5890c494ffea65edbe1ba82cfd63661c805ed94" dependencies = [ "js-sys", "wasm-bindgen", @@ -2537,6 +2883,41 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "windows-link" version = "0.2.1" @@ -2590,15 +2971,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-sys" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.5", -] - [[package]] name = "windows-sys" version = "0.61.2" @@ -2632,30 +3004,13 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", + "windows_i686_gnullvm", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] -[[package]] -name = "windows-targets" -version = "0.53.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" -dependencies = [ - "windows-link", - "windows_aarch64_gnullvm 0.53.1", - "windows_aarch64_msvc 0.53.1", - "windows_i686_gnu 0.53.1", - "windows_i686_gnullvm 0.53.1", - "windows_i686_msvc 0.53.1", - "windows_x86_64_gnu 0.53.1", - "windows_x86_64_gnullvm 0.53.1", - "windows_x86_64_msvc 0.53.1", -] - [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -2668,12 +3023,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" - [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -2686,12 +3035,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" - [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -2704,24 +3047,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" -[[package]] -name = "windows_i686_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" - [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" - [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -2734,12 +3065,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_i686_msvc" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" - [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -2752,12 +3077,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -2770,12 +3089,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" - [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -2789,16 +3102,92 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] -name = "windows_x86_64_msvc" -version = "0.53.1" +name = "wit-bindgen" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] [[package]] -name = "wit-bindgen" -version = "0.46.0" +name = "wit-bindgen-core" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.11.0", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "writeable" @@ -2806,6 +3195,16 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix", +] + [[package]] name = "yoke" version = "0.8.1" @@ -2831,18 +3230,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.31" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.31" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", @@ -2911,9 +3310,9 @@ dependencies = [ [[package]] name = "zmij" -version = "1.0.0" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d6085d62852e35540689d1f97ad663e3971fc19cf5eceab364d62c646ea167" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" [[package]] name = "zstd-sys" diff --git a/Cargo.toml b/Cargo.toml index 76c1c7f..86d073e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ members = [ "crates/http", "crates/tui", "crates/grpc", + "crates/snapshot", ] [workspace.package] @@ -51,5 +52,6 @@ grpc = { path = "crates/grpc" } http = { path = "crates/http" } index = { path = "crates/index" } server = { path = "crates/server" } +snapshot = { path = "crates/snapshot" } storage = { path = "crates/storage" } tui = { path = "crates/tui" } diff --git a/crates/api/Cargo.toml b/crates/api/Cargo.toml index 287f94c..7c198a3 100644 --- a/crates/api/Cargo.toml +++ b/crates/api/Cargo.toml @@ -10,6 +10,7 @@ license.workspace = true defs.workspace = true index.workspace = true snafu.workspace = true +snapshot.workspace = true storage.workspace = true tempfile.workspace = true uuid.workspace = true diff --git a/crates/api/src/lib.rs b/crates/api/src/lib.rs index 16e9dab..de028c0 100644 --- a/crates/api/src/lib.rs +++ b/crates/api/src/lib.rs @@ -1,13 +1,15 @@ -use defs::{Dimension, IndexedVector, Similarity}; - +use defs::{DbError, Dimension, IndexedVector, Similarity, SnapshottableDb}; use defs::{DenseVector, Payload, Point, PointId}; use index::hnsw::HnswIndex; -use std::path::PathBuf; +use index::kd_tree::index::KDTree; +use std::path::{Path, PathBuf}; +use tempfile::tempdir; // use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, RwLock}; -use index::flat::FlatIndex; +use index::flat::index::FlatIndex; use index::{IndexType, VectorIndex}; +use snapshot::Snapshot; use storage::rocks_db::RocksDbStorage; use storage::{StorageEngine, StorageType, VectorPage}; @@ -152,6 +154,33 @@ impl VectorDb { } } +impl SnapshottableDb for VectorDb { + fn create_snapshot(&self, dir_path: &Path) -> Result { + if !dir_path.is_dir() { + return Err(DbError::SnapshotError(format!( + "Invalid path: {}", + dir_path.display() + ))); + } + + let index_snapshot = self + .index + .read() + .map_err(|_| DbError::LockError)? + .snapshot()?; + + let tempdir = tempdir().unwrap(); + let storage_checkpoint = self.storage.checkpoint_at(tempdir.path()).map_err(|e| { + DbError::StorageCheckpointError(format!("Could not create storage checkpoint: {e}")) + })?; + + let snapshot = Snapshot::new(index_snapshot, storage_checkpoint, self.dimension)?; + let snapshot_path = snapshot.save(dir_path)?; + + Ok(snapshot_path) + } +} + #[derive(Debug)] pub struct DbConfig { pub storage_type: StorageType, @@ -161,6 +190,28 @@ pub struct DbConfig { pub similarity: Similarity, } +#[derive(Debug)] +pub struct DbRestoreConfig { + pub data_path: PathBuf, + pub snapshot_path: PathBuf, +} + +impl DbRestoreConfig { + pub fn new(data_path: PathBuf, snapshot_path: PathBuf) -> Self { + Self { + data_path, + snapshot_path, + } + } +} + +pub fn restore_from_snapshot(config: &DbRestoreConfig) -> Result { + // restore the index from the snapshot + let (storage_engine, index, dimensions) = + Snapshot::load(&config.snapshot_path, &config.data_path)?; + Ok(VectorDb::_new(storage_engine, index, dimensions)) +} + pub fn init_api(config: DbConfig) -> Result { // Initialize the storage engine let storage = match config.storage_type { @@ -171,11 +222,11 @@ pub fn init_api(config: DbConfig) -> Result { // Initialize the vector index let index: Arc> = match config.index_type { IndexType::Flat => Arc::new(RwLock::new(FlatIndex::new())), + IndexType::KDTree => Arc::new(RwLock::new(KDTree::build_empty(config.dimension))), IndexType::HNSW => Arc::new(RwLock::new(HnswIndex::new( config.similarity, config.dimension, ))), - _ => Arc::new(RwLock::new(FlatIndex::new())), }; // Init the db @@ -192,8 +243,11 @@ mod tests { // TODO: Add more exhaustive tests + use std::sync::Mutex; + use super::*; use defs::ContentType; + use snapshot::{engine::SnapshotEngine, registry::local::LocalRegistry}; use tempfile::{TempDir, tempdir}; // Helper function to create a test database @@ -419,4 +473,143 @@ mod tests { let inserted = db.build_index().unwrap(); assert_eq!(inserted, 10); } + + #[test] + fn test_create_and_load_snapshot() { + let (old_db, temp_dir) = create_test_db(); + + let v1 = vec![0.0, 1.0, 2.0]; + let v2 = vec![3.0, 4.0, 5.0]; + let v3 = vec![6.0, 7.0, 8.0]; + + let id1 = old_db + .insert( + v1.clone(), + Payload { + content_type: ContentType::Text, + content: "test".to_string(), + }, + ) + .unwrap(); + + let id2 = old_db + .insert( + v2.clone(), + Payload { + content_type: ContentType::Text, + content: "test".to_string(), + }, + ) + .unwrap(); + + let temp_snapshot_dir = tempdir().unwrap(); + let snapshot_path = old_db.create_snapshot(temp_snapshot_dir.path()).unwrap(); + + // insert v3 after snapshot + let id3 = old_db + .insert( + v3.clone(), + Payload { + content_type: ContentType::Text, + content: "test".to_string(), + }, + ) + .unwrap(); + + let reload_config = DbRestoreConfig { + data_path: temp_dir.path().to_path_buf(), + snapshot_path, + }; + + std::mem::drop(old_db); + let loaded_db = restore_from_snapshot(&reload_config).unwrap(); + + assert!(loaded_db.get(id1).unwrap_or(None).is_some()); + assert!(loaded_db.get(id2).unwrap_or(None).is_some()); + assert!(loaded_db.get(id3).unwrap_or(None).is_none()); // v3 was inserted after snapshot was taken + + // vector restore check + assert!(loaded_db.get(id1).unwrap().unwrap().vector.unwrap() == v1); + assert!(loaded_db.get(id2).unwrap().unwrap().vector.unwrap() == v2); + } + + #[test] + fn test_snapshot_engine() { + let (_db, _temp_dir) = create_test_db(); + let db = Arc::new(Mutex::new(_db)); + + let registry_tempdir = tempdir().unwrap(); + + let registry = Arc::new(Mutex::new( + LocalRegistry::new(registry_tempdir.path()).unwrap(), + )); + + let last_k = 4; + let mut se = SnapshotEngine::new(last_k, db.clone(), registry.clone()); + + let v1 = vec![0.0, 1.0, 2.0]; + let v2 = vec![3.0, 4.0, 5.0]; + let v3 = vec![6.0, 7.0, 8.0]; + + let test_vectors = vec![v1.clone(), v2.clone(), v3.clone()]; + let mut inserted_ids = Vec::new(); + + for (i, vector) in test_vectors.clone().into_iter().enumerate() { + se.snapshot().unwrap(); + let id = db + .lock() + .unwrap() + .insert( + vector.clone(), + Payload { + content_type: ContentType::Text, + content: format!("{}", i), + }, + ) + .unwrap(); + inserted_ids.push(id); + } + se.snapshot().unwrap(); + let snapshots = se.list_alive_snapshots().unwrap(); + + // asserting these cases: + // snapshot 0 : no vectors + // snapshot 1 : v1 + // snapshot 2 : v1, v2 + // snapshot 3 : v1, v2, v3 + + std::mem::drop(db); + std::mem::drop(se); + + for (i, snapshot) in snapshots.iter().enumerate() { + let temp_dir = tempdir().unwrap(); + let db = restore_from_snapshot(&DbRestoreConfig { + data_path: temp_dir.path().to_path_buf(), + snapshot_path: snapshot.path.clone(), + }) + .unwrap(); + for j in 0..i { + // test if point is present + assert!(db.get(inserted_ids[j]).unwrap_or(None).is_some()); + // test vector restore + assert!( + db.get(inserted_ids[j]).unwrap().unwrap().vector.unwrap() == test_vectors[j] + ); + // test payload restore + assert!( + db.get(inserted_ids[j]) + .unwrap() + .unwrap() + .payload + .unwrap() + .content + == format!("{}", j) + ); + } + for absent_id in inserted_ids.iter().skip(i) { + assert!(db.get(*absent_id).unwrap_or(None).is_none()); + } + std::mem::drop(db); + } + } } diff --git a/crates/defs/src/error.rs b/crates/defs/src/error.rs index 13f0b60..8354768 100644 --- a/crates/defs/src/error.rs +++ b/crates/defs/src/error.rs @@ -1,5 +1,29 @@ use std::io; +use crate::{Dimension, PointId}; +#[derive(Debug, PartialEq, Eq)] +pub enum DbError { + ParseError, + StorageError(String), + SerializationError(String), + DeserializationError, + IndexError(String), + LockError, + IndexInitError, //TODO: Change this + UnsupportedSimilarity, + DimensionMismatch, + SnapshotError(String), + StorageInitializationError, + StorageCheckpointError(String), + InvalidMagicBytes(String), + VectorNotFound(uuid::Uuid), + SnapshotRegistryError(String), + SnapshotEngineError(String), + InvalidDimension { expected: Dimension, got: Dimension }, + PointAlreadyExists { id: PointId }, + PointNotFound { id: PointId }, +} + #[derive(Debug)] pub enum ServerError { Bind(io::Error), diff --git a/crates/defs/src/lib.rs b/crates/defs/src/lib.rs index c2a79bf..cf3b3f3 100644 --- a/crates/defs/src/lib.rs +++ b/crates/defs/src/lib.rs @@ -3,4 +3,10 @@ pub mod types; // Without re-exports, users would need to write defs::types::SomeType instead of just defs::SomeType. Re-exports simplify the API by flattening the module hierarchy. The * means "everything public" from that module. pub use error::*; +use std::path::{Path, PathBuf}; pub use types::*; + +// hoisted trait so it can be used by the snapshots crate +pub trait SnapshottableDb: Send + Sync { + fn create_snapshot(&self, dir_path: &Path) -> Result; +} diff --git a/crates/defs/src/types.rs b/crates/defs/src/types.rs index 65c861a..525df26 100644 --- a/crates/defs/src/types.rs +++ b/crates/defs/src/types.rs @@ -15,6 +15,8 @@ pub type Dimension = usize; // Sparse vector implementation not supported yet. Refer lib/sparse/src/common/sparse_vector.rs pub type DenseVector = Vec; +pub type Magic = [u8; 4]; + pub enum StoredVector { Dense(DenseVector), } @@ -45,7 +47,7 @@ pub struct IndexedVector { pub vector: DenseVector, } -#[derive(Debug, Deserialize, Copy, Clone)] +#[derive(Debug, Serialize, Deserialize, Copy, Clone)] pub enum Similarity { Euclidean, Manhattan, diff --git a/crates/grpc/src/error.rs b/crates/grpc/src/error.rs index ce8fd38..c8bd1fd 100644 --- a/crates/grpc/src/error.rs +++ b/crates/grpc/src/error.rs @@ -120,6 +120,21 @@ impl From for GrpcError { StorageError::Deserialization { id, source: _ } => GrpcError::Internal { message: format!("failed to deserialize point {}", id), }, + StorageError::RocksDbInitialization {} => GrpcError::Internal { + message: "failed to initialize storage".to_string(), + }, + StorageError::RocksDbCheckpointMsg { msg } => GrpcError::Internal { + message: format!("checkpoint error: {}", msg), + }, + StorageError::RocksDbCheckpointIo { msg, source: _ } => GrpcError::Internal { + message: format!("checkpoint io error: {}", msg), + }, + StorageError::RocksDbCheckpoint { source: _ } => GrpcError::Internal { + message: "checkpoint error".to_string(), + }, + StorageError::RocksDbFlush { source: _ } => GrpcError::Internal { + message: "flush error".to_string(), + }, } } } diff --git a/crates/http/src/handler.rs b/crates/http/src/handler.rs index ee49e46..f86599d 100644 --- a/crates/http/src/handler.rs +++ b/crates/http/src/handler.rs @@ -126,7 +126,12 @@ fn api_error_to_response(err: &ApiError) -> (StatusCode, String) { | StorageError::RocksDbDelete { .. } | StorageError::RocksDbIteration { .. } | StorageError::Serialization { .. } - | StorageError::Deserialization { .. } => { + | StorageError::Deserialization { .. } + | StorageError::RocksDbCheckpoint { .. } + | StorageError::RocksDbFlush { .. } + | StorageError::RocksDbInitialization { .. } + | StorageError::RocksDbCheckpointMsg { .. } + | StorageError::RocksDbCheckpointIo { .. } => { (StatusCode::INTERNAL_SERVER_ERROR, source.to_string()) } }, diff --git a/crates/index/Cargo.toml b/crates/index/Cargo.toml index fda6a82..9ce8b02 100644 --- a/crates/index/Cargo.toml +++ b/crates/index/Cargo.toml @@ -7,7 +7,10 @@ edition.workspace = true license.workspace = true [dependencies] +bincode.workspace = true defs.workspace = true rand.workspace = true +serde.workspace = true snafu.workspace = true +storage.workspace = true uuid.workspace = true diff --git a/crates/index/src/flat.rs b/crates/index/src/flat.rs deleted file mode 100644 index 80b46da..0000000 --- a/crates/index/src/flat.rs +++ /dev/null @@ -1,274 +0,0 @@ -use defs::{DenseVector, DistanceOrderedVector, IndexedVector, PointId, Similarity}; - -use crate::{IndexError, Result, VectorIndex, distance}; - -pub struct FlatIndex { - index: Vec, -} - -impl FlatIndex { - pub fn new() -> Self { - Self { index: Vec::new() } - } - - pub fn build(vectors: Vec) -> Self { - FlatIndex { index: vectors } - } -} - -impl Default for FlatIndex { - fn default() -> Self { - Self::new() - } -} - -impl VectorIndex for FlatIndex { - fn insert(&mut self, vector: IndexedVector) -> Result<()> { - self.index.push(vector); - Ok(()) - } - - fn delete(&mut self, point_id: PointId) -> Result { - if let Some(pos) = self.index.iter().position(|vector| vector.id == point_id) { - self.index.remove(pos); - Ok(true) - } else { - Ok(false) - } - } - - fn search( - &self, - query_vector: DenseVector, - similarity: Similarity, - k: usize, - ) -> Result> { - // Validate search limit - if k == 0 { - return Err(IndexError::InvalidSearchLimit { limit: k }); - } - let scores = self - .index - .iter() - .map(|point| DistanceOrderedVector { - distance: distance(&point.vector, &query_vector, similarity), - query_vector: &query_vector, - point_id: Some(point.id), - }) - .collect::>(); - - // select k smallest elements in scores using a max heap - let mut heap = std::collections::BinaryHeap::::new(); - for score in scores { - if heap.len() < k { - heap.push(score); - } else if score < *heap.peek().unwrap() { - heap.pop(); - heap.push(score); - } - } - Ok(heap - .into_sorted_vec() - .into_iter() - .map(|v| v.point_id.unwrap()) - .collect()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use uuid::Uuid; - - #[test] - fn test_flat_index_new() { - let index = FlatIndex::new(); - assert_eq!(index.index.len(), 0); - } - - #[test] - fn test_flat_index_build() { - let vectors = vec![ - IndexedVector { - id: Uuid::new_v4(), - vector: vec![1.0, 2.0, 3.0], - }, - IndexedVector { - id: Uuid::new_v4(), - vector: vec![4.0, 5.0, 6.0], - }, - ]; - let index = FlatIndex::build(vectors.clone()); - assert_eq!(index.index, vectors); - } - - #[test] - fn test_insert() { - let mut index = FlatIndex::new(); - let vector = IndexedVector { - id: Uuid::new_v4(), - vector: vec![1.0, 2.0, 3.0], - }; - - assert!(index.insert(vector.clone()).is_ok()); - assert_eq!(index.index.len(), 1); - assert_eq!(index.index[0], vector); - } - - #[test] - fn test_delete_existing() { - let mut index = FlatIndex::new(); - let existing_id = Uuid::new_v4(); - let vector = IndexedVector { - id: existing_id, - vector: vec![1.0, 2.0, 3.0], - }; - index.insert(vector).unwrap(); - - let result = index.delete(existing_id).unwrap(); - assert!(result); - assert_eq!(index.index.len(), 0); - } - - #[test] - fn test_delete_non_existing() { - let mut index = FlatIndex::new(); - let vector = IndexedVector { - id: Uuid::new_v4(), - vector: vec![1.0, 2.0, 3.0], - }; - index.insert(vector).unwrap(); - - let result = index.delete(Uuid::new_v4()).unwrap(); - assert!(!result); - assert_eq!(index.index.len(), 1); - } - - #[test] - fn test_search_euclidean() { - let mut index = FlatIndex::new(); - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - index - .insert(IndexedVector { - id: id1, - vector: vec![1.0, 1.0], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id2, - vector: vec![2.0, 2.0], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id3, - vector: vec![10.0, 10.0], - }) - .unwrap(); - - let results = index - .search(vec![0.0, 0.0], Similarity::Euclidean, 2) - .unwrap(); - assert_eq!(results, vec![id1, id2]); - } - - #[test] - fn test_search_cosine() { - let mut index = FlatIndex::new(); - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - index - .insert(IndexedVector { - id: id1, - vector: vec![1.0, 0.0], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id2, - vector: vec![0.5, 0.5], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id3, - vector: vec![0.0, 1.0], - }) - .unwrap(); - - let results = index.search(vec![1.0, 1.0], Similarity::Cosine, 2).unwrap(); - assert_eq!(results, vec![id2, id1]); - } - - #[test] - fn test_search_manhattan() { - let mut index = FlatIndex::new(); - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - index - .insert(IndexedVector { - id: id1, - vector: vec![1.0, 1.0], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id2, - vector: vec![2.0, 2.0], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id3, - vector: vec![5.0, 5.0], - }) - .unwrap(); - - let results = index - .search(vec![0.0, 0.0], Similarity::Manhattan, 2) - .unwrap(); - assert_eq!(results, vec![id1, id2]); - } - - #[test] - fn test_search_hamming() { - let mut index = FlatIndex::new(); - let id1 = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - index - .insert(IndexedVector { - id: id1, - vector: vec![1.0, 0.0, 1.0, 1.0], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id2, - vector: vec![1.0, 0.0, 0.0, 0.0], - }) - .unwrap(); - index - .insert(IndexedVector { - id: id3, - vector: vec![0.0, 0.0, 0.0, 0.0], - }) - .unwrap(); - - let results = index - .search(vec![1.0, 0.0, 0.0, 0.0], Similarity::Hamming, 2) - .unwrap(); - assert_eq!(results, vec![id2, id3]); - } - - #[test] - fn test_default() { - let index = FlatIndex::default(); - assert_eq!(index.index.len(), 0); - } -} diff --git a/crates/index/src/flat/index.rs b/crates/index/src/flat/index.rs new file mode 100644 index 0000000..927e41c --- /dev/null +++ b/crates/index/src/flat/index.rs @@ -0,0 +1,71 @@ +use crate::{IndexError, VectorIndex, distance}; +use defs::{DenseVector, DistanceOrderedVector, IndexedVector, PointId, Similarity}; + +pub struct FlatIndex { + pub index: Vec, +} + +impl FlatIndex { + pub fn new() -> Self { + Self { index: Vec::new() } + } + + pub fn build(vectors: Vec) -> Self { + FlatIndex { index: vectors } + } +} + +impl Default for FlatIndex { + fn default() -> Self { + Self::new() + } +} + +impl VectorIndex for FlatIndex { + fn insert(&mut self, vector: IndexedVector) -> Result<(), IndexError> { + self.index.push(vector); + Ok(()) + } + + fn delete(&mut self, point_id: PointId) -> Result { + if let Some(pos) = self.index.iter().position(|vector| vector.id == point_id) { + self.index.remove(pos); + Ok(true) + } else { + Ok(false) + } + } + + fn search( + &self, + query_vector: DenseVector, + similarity: Similarity, + k: usize, + ) -> Result, IndexError> { + let scores = self + .index + .iter() + .map(|point| DistanceOrderedVector { + distance: distance(&point.vector, &query_vector, similarity), + query_vector: &query_vector, + point_id: Some(point.id), + }) + .collect::>(); + + // select k smallest elements in scores using a max heap + let mut heap = std::collections::BinaryHeap::::new(); + for score in scores { + if heap.len() < k { + heap.push(score); + } else if score < *heap.peek().unwrap() { + heap.pop(); + heap.push(score); + } + } + Ok(heap + .into_sorted_vec() + .into_iter() + .map(|v| v.point_id.unwrap()) + .collect()) + } +} diff --git a/crates/index/src/flat/mod.rs b/crates/index/src/flat/mod.rs new file mode 100644 index 0000000..5e3f726 --- /dev/null +++ b/crates/index/src/flat/mod.rs @@ -0,0 +1,9 @@ +use defs::Magic; + +pub mod index; +mod serialize; + +#[cfg(test)] +mod tests; + +pub const FLAT_MAGIC_BYTES: Magic = [0x00, 0x00, 0x00, 0x01]; diff --git a/crates/index/src/flat/serialize.rs b/crates/index/src/flat/serialize.rs new file mode 100644 index 0000000..d390120 --- /dev/null +++ b/crates/index/src/flat/serialize.rs @@ -0,0 +1,116 @@ +use super::FLAT_MAGIC_BYTES; +use crate::IndexType; +use crate::flat::index::FlatIndex; +use crate::{IndexSnapshot, SerializableIndex}; +use defs::{DbError, IndexedVector}; +use serde::{Deserialize, Serialize}; +use std::io::{Cursor, Read}; +use storage::StorageEngine; +use uuid::Uuid; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FlatIndexMetadata { + total_points: usize, +} + +impl FlatIndex { + pub fn deserialize( + IndexSnapshot { + index_type, + magic, + topology_b, + metadata_b, + }: &IndexSnapshot, + ) -> Result { + if index_type != &IndexType::Flat { + return Err(DbError::SerializationError( + "Invalid index type".to_string(), + )); + } + + if magic != &FLAT_MAGIC_BYTES { + return Err(DbError::SerializationError( + "Invalid magic bytes".to_string(), + )); + } + + let metadata: FlatIndexMetadata = bincode::deserialize(metadata_b).map_err(|e| { + DbError::SerializationError(format!("Failed to deserialize FlatIndex Metadata: {}", e)) + })?; + let total_points = metadata.total_points; + + let mut cursor = Cursor::new(topology_b); + let mut vectors = Vec::new(); + + for _ in 0..total_points { + let mut uuid_slice = [0u8; 16]; + cursor.read_exact(&mut uuid_slice).map_err(|e| { + DbError::SerializationError(format!( + "Failed to deserialize FlatIndex Topology: {}", + e + )) + })?; + let id = Uuid::from_bytes_le(uuid_slice); + vectors.push(IndexedVector { + id, + vector: Vec::new(), + }); + } + + Ok(FlatIndex { index: vectors }) + } +} + +impl SerializableIndex for FlatIndex { + fn serialize_topology(&self) -> Result, DbError> { + let mut buffer: Vec = Vec::new(); + for point in &self.index { + buffer.extend_from_slice(&point.id.to_bytes_le()); + } + + Ok(buffer) + } + + fn serialize_metadata(&self) -> Result, DbError> { + let mut buffer: Vec = Vec::new(); + let metadata = FlatIndexMetadata { + total_points: self.index.len(), + }; + + let metadata_bytes = bincode::serialize(&metadata).map_err(|e| { + DbError::SerializationError(format!("Failed to serialize FlatIndex Metadata: {}", e)) + })?; + + buffer.extend_from_slice(&metadata_bytes); + Ok(buffer) + } + + fn populate_vectors(&mut self, storage: &dyn StorageEngine) -> Result<(), DbError> { + for item in &mut self.index { + item.vector = storage + .get_vector(item.id) + .map_err(|e| { + DbError::SerializationError(format!("Could not get vector from storage: {e}")) + })? + .ok_or_else(|| { + DbError::SerializationError(format!( + "Failed to locate vector for id: {}", + item.id + )) + })?; + } + Ok(()) + } + + fn snapshot(&self) -> Result { + let topology = self.serialize_topology()?; + let metadata = self.serialize_metadata()?; + + Ok(IndexSnapshot { + metadata_b: metadata, + topology_b: topology, + magic: FLAT_MAGIC_BYTES, + index_type: IndexType::Flat, + }) + } +} diff --git a/crates/index/src/flat/tests.rs b/crates/index/src/flat/tests.rs new file mode 100644 index 0000000..6d43c3d --- /dev/null +++ b/crates/index/src/flat/tests.rs @@ -0,0 +1,238 @@ +use super::index::FlatIndex; +use crate::{SerializableIndex, VectorIndex}; +use defs::{IndexedVector, Similarity}; +use uuid::Uuid; + +#[test] +fn test_flat_index_new() { + let index = FlatIndex::new(); + assert_eq!(index.index.len(), 0); +} + +#[test] +fn test_flat_index_build() { + let vectors = vec![ + IndexedVector { + id: Uuid::new_v4(), + vector: vec![1.0, 2.0, 3.0], + }, + IndexedVector { + id: Uuid::new_v4(), + vector: vec![4.0, 5.0, 6.0], + }, + ]; + let index = FlatIndex::build(vectors.clone()); + assert_eq!(index.index, vectors); +} + +#[test] +fn test_insert() { + let mut index = FlatIndex::new(); + let vector = IndexedVector { + id: Uuid::new_v4(), + vector: vec![1.0, 2.0, 3.0], + }; + + assert!(index.insert(vector.clone()).is_ok()); + assert_eq!(index.index.len(), 1); + assert_eq!(index.index[0], vector); +} + +#[test] +fn test_delete_existing() { + let mut index = FlatIndex::new(); + let existing_id = Uuid::new_v4(); + let vector = IndexedVector { + id: existing_id, + vector: vec![1.0, 2.0, 3.0], + }; + index.insert(vector).unwrap(); + + let result = index.delete(existing_id).unwrap(); + assert!(result); + assert_eq!(index.index.len(), 0); +} + +#[test] +fn test_delete_non_existing() { + let mut index = FlatIndex::new(); + let vector = IndexedVector { + id: Uuid::new_v4(), + vector: vec![1.0, 2.0, 3.0], + }; + index.insert(vector).unwrap(); + + let result = index.delete(Uuid::new_v4()).unwrap(); + assert!(!result); + assert_eq!(index.index.len(), 1); +} + +#[test] +fn test_search_euclidean() { + let mut index = FlatIndex::new(); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + index + .insert(IndexedVector { + id: id1, + vector: vec![1.0, 1.0], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id2, + vector: vec![2.0, 2.0], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id3, + vector: vec![10.0, 10.0], + }) + .unwrap(); + + let results = index + .search(vec![0.0, 0.0], Similarity::Euclidean, 2) + .unwrap(); + assert_eq!(results, vec![id1, id2]); +} + +#[test] +fn test_search_cosine() { + let mut index = FlatIndex::new(); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + index + .insert(IndexedVector { + id: id1, + vector: vec![1.0, 0.0], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id2, + vector: vec![0.5, 0.5], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id3, + vector: vec![0.0, 1.0], + }) + .unwrap(); + + let results = index.search(vec![1.0, 1.0], Similarity::Cosine, 2).unwrap(); + assert_eq!(results, vec![id2, id1]); +} + +#[test] +fn test_search_manhattan() { + let mut index = FlatIndex::new(); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + index + .insert(IndexedVector { + id: id1, + vector: vec![1.0, 1.0], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id2, + vector: vec![2.0, 2.0], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id3, + vector: vec![5.0, 5.0], + }) + .unwrap(); + + let results = index + .search(vec![0.0, 0.0], Similarity::Manhattan, 2) + .unwrap(); + assert_eq!(results, vec![id1, id2]); +} + +#[test] +fn test_search_hamming() { + let mut index = FlatIndex::new(); + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + index + .insert(IndexedVector { + id: id1, + vector: vec![1.0, 0.0, 1.0, 1.0], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id2, + vector: vec![1.0, 0.0, 0.0, 0.0], + }) + .unwrap(); + index + .insert(IndexedVector { + id: id3, + vector: vec![0.0, 0.0, 0.0, 0.0], + }) + .unwrap(); + + let results = index + .search(vec![1.0, 0.0, 0.0, 0.0], Similarity::Hamming, 2) + .unwrap(); + assert_eq!(results, vec![id2, id3]); +} + +#[test] +fn test_default() { + let index = FlatIndex::default(); + assert_eq!(index.index.len(), 0); +} + +#[test] +fn test_serialize_and_deserialize_topo() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let id4 = Uuid::new_v4(); + + let v1 = IndexedVector { + id: id1, + vector: vec![0.0, 0.0, 0.0, 0.0], + }; + let v2 = IndexedVector { + id: id2, + vector: vec![1.0, 0.0, 0.0, 0.0], + }; + let v3 = IndexedVector { + id: id3, + vector: vec![2.0, 0.0, 0.0, 0.0], + }; + let v4 = IndexedVector { + id: id4, + vector: vec![3.0, 0.0, 0.0, 0.0], + }; + + let vectors = vec![v1.clone(), v2.clone(), v3.clone(), v4.clone()]; + let mut index_before = FlatIndex::build(vectors); + index_before.insert(v4.clone()).unwrap(); + + index_before.delete(id1).unwrap(); + + let snapshot = index_before.snapshot().unwrap(); + + let idx = FlatIndex::deserialize(&snapshot).unwrap(); + + assert_eq!(idx.index.len(), 4); + assert!(!idx.index.iter().any(|v| v.id == id1)); + assert!(idx.index.iter().any(|v| v.id == id2)); + assert!(idx.index.iter().any(|v| v.id == id3)); + assert!(idx.index.iter().any(|v| v.id == id3)); + assert!(idx.index.iter().any(|v| v.id == id4)); +} diff --git a/crates/index/src/hnsw/index.rs b/crates/index/src/hnsw/index.rs index d676ddc..8bf91d8 100644 --- a/crates/index/src/hnsw/index.rs +++ b/crates/index/src/hnsw/index.rs @@ -19,7 +19,7 @@ pub struct HnswIndex { // Default query beam width (ef); recommended ef ≥ k at query time pub ef: usize, // In-memory vector cache owned by the index - cache: HashMap, + pub cache: HashMap, // Fixed metric for this index; used consistently in insert and search pub similarity: Similarity, } diff --git a/crates/index/src/hnsw/mod.rs b/crates/index/src/hnsw/mod.rs index dfbc7ae..0cedc17 100644 --- a/crates/index/src/hnsw/mod.rs +++ b/crates/index/src/hnsw/mod.rs @@ -3,9 +3,12 @@ pub mod index; pub mod search; +pub mod serialize; pub mod types; - +use defs::Magic; pub use index::HnswIndex; +pub const HNSW_MAGIC_BYTES: Magic = [0x02, 0x01, 0x03, 0x00]; + #[cfg(test)] mod tests; diff --git a/crates/index/src/hnsw/serialize.rs b/crates/index/src/hnsw/serialize.rs new file mode 100644 index 0000000..d05f485 --- /dev/null +++ b/crates/index/src/hnsw/serialize.rs @@ -0,0 +1,156 @@ +use std::collections::HashMap; + +use defs::{DbError, Dimension, PointId, Similarity}; +use serde::{Deserialize, Serialize}; +use storage::StorageEngine; + +use crate::{ + IndexSnapshot, IndexType, SerializableIndex, + hnsw::{ + HNSW_MAGIC_BYTES, HnswIndex, + types::{LevelGenerator, Node, PointIndexation}, + }, +}; + +#[derive(Serialize, Deserialize)] +pub struct HnswMetadataPack { + pub ef_construction: usize, + pub data_dimension: Dimension, + pub ef: usize, + pub similarity: Similarity, +} + +#[derive(Serialize, Deserialize)] +pub struct HnswIndexPack { + pub max_connections: usize, + pub max_connections_0: usize, + pub max_layer: usize, + pub points_by_layer: Vec>, + pub nodes: Vec, + pub entry_point: Option, + pub level_scale: f64, +} + +impl SerializableIndex for HnswIndex { + fn serialize_topology(&self) -> Result, DbError> { + let mut buffer = Vec::new(); + + let nodes: Vec = self.index.nodes.values().cloned().collect(); + let index_pack = HnswIndexPack { + max_connections: self.index.max_connections, + max_connections_0: self.index.max_connections_0, + max_layer: self.index.max_layer, + points_by_layer: self.index.points_by_layer.clone(), + nodes, + entry_point: self.index.entry_point, + level_scale: self.index.level_generator.level_scale, + }; + + let index_bytes = bincode::serialize(&index_pack) + .map_err(|e| DbError::SerializationError(e.to_string()))?; + buffer.extend(index_bytes); + + Ok(buffer) + } + + fn serialize_metadata(&self) -> Result, DbError> { + let mut buffer = Vec::new(); + let index_pack = HnswMetadataPack { + ef_construction: self.ef_construction, + data_dimension: self.data_dimension, + ef: self.ef, + similarity: self.similarity, + }; + + let metadata_bytes = bincode::serialize(&index_pack) + .map_err(|e| DbError::SerializationError(e.to_string()))?; + buffer.extend(metadata_bytes); + Ok(buffer) + } + + fn snapshot(&self) -> Result { + let topology_bytes = self.serialize_topology()?; + let metadata_bytes = self.serialize_metadata()?; + Ok(IndexSnapshot { + index_type: crate::IndexType::HNSW, + magic: HNSW_MAGIC_BYTES, + topology_b: topology_bytes, + metadata_b: metadata_bytes, + }) + } + + fn populate_vectors(&mut self, storage: &dyn StorageEngine) -> Result<(), DbError> { + // assumes index topology is restored + for id in self.index.nodes.keys() { + let vec = storage + .get_vector(*id) + .map_err(|e| { + DbError::SerializationError(format!("Could not get vector from storage: {e}")) + })? + .ok_or_else(|| { + DbError::SerializationError(format!("Failed to locate vector for id: {id}")) + })?; + self.cache.insert(*id, vec); + } + Ok(()) + } +} + +impl HnswIndex { + pub fn deserialize( + IndexSnapshot { + index_type, + magic, + topology_b, + metadata_b, + }: &IndexSnapshot, + ) -> Result { + if index_type != &IndexType::HNSW { + return Err(DbError::SerializationError( + "Invalid index type".to_string(), + )); + } + + if magic != &HNSW_MAGIC_BYTES { + return Err(DbError::SerializationError( + "Invalid magic bytes".to_string(), + )); + } + + let metadata: HnswMetadataPack = bincode::deserialize(metadata_b).map_err(|e| { + DbError::SerializationError(format!("Failed to deserialize HNSW Metadata: {}", e)) + })?; + + let index_pack: HnswIndexPack = bincode::deserialize(topology_b).map_err(|e| { + DbError::SerializationError(format!("Failed to deserialize HNSW Index: {}", e)) + })?; + + let mut hnsw_index_restored = PointIndexation { + max_connections: index_pack.max_connections, + max_connections_0: index_pack.max_connections_0, + max_layer: index_pack.max_layer, + points_by_layer: index_pack.points_by_layer, + entry_point: index_pack.entry_point, + nodes: HashMap::new(), + level_generator: LevelGenerator { + level_scale: index_pack.level_scale, + }, + }; + + // restore nodes hashmap + for i in index_pack.nodes { + hnsw_index_restored.nodes.insert(i.id, i); + } + + let hnsw = HnswIndex { + ef_construction: metadata.ef_construction, + data_dimension: metadata.data_dimension, + ef: metadata.ef, + cache: HashMap::new(), + similarity: metadata.similarity, + index: hnsw_index_restored, + }; + + Ok(hnsw) + } +} diff --git a/crates/index/src/hnsw/tests.rs b/crates/index/src/hnsw/tests.rs index 3f0e4f3..605c8f7 100644 --- a/crates/index/src/hnsw/tests.rs +++ b/crates/index/src/hnsw/tests.rs @@ -1,6 +1,6 @@ use super::*; use crate::VectorIndex; -use crate::flat::FlatIndex; +use crate::flat::index::FlatIndex; use defs::{IndexedVector, Similarity}; use uuid::Uuid; diff --git a/crates/index/src/hnsw/types.rs b/crates/index/src/hnsw/types.rs index c16e04b..c52d4ff 100644 --- a/crates/index/src/hnsw/types.rs +++ b/crates/index/src/hnsw/types.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use defs::PointId; use rand::Rng; +use serde::{Deserialize, Serialize}; // Compact storage for layered points and adjacency used by `HnswIndex`. pub struct PointIndexation { @@ -22,6 +23,7 @@ pub struct PointIndexation { } // Node with highest level and per-level neighbor lists +#[derive(Serialize, Deserialize, Clone)] pub struct Node { pub id: PointId, // Highest level (0-based; level 0 is the base layer) diff --git a/crates/index/src/kd_tree/mod.rs b/crates/index/src/kd_tree/mod.rs index fa8a23d..4a29b61 100644 --- a/crates/index/src/kd_tree/mod.rs +++ b/crates/index/src/kd_tree/mod.rs @@ -1,6 +1,11 @@ +use defs::Magic; + pub mod helpers; pub mod index; +mod serialize; pub mod types; #[cfg(test)] mod tests; + +pub const KD_TREE_MAGIC_BYTES: Magic = [0x00, 0x01, 0x02, 0x00]; diff --git a/crates/index/src/kd_tree/serialize.rs b/crates/index/src/kd_tree/serialize.rs new file mode 100644 index 0000000..a651d10 --- /dev/null +++ b/crates/index/src/kd_tree/serialize.rs @@ -0,0 +1,212 @@ +use std::collections::HashSet; +use std::io::{Cursor, Read, Write}; + +use super::KD_TREE_MAGIC_BYTES; +use super::index::KDTree; +use super::types::KDTreeNode; +use crate::{IndexSnapshot, IndexType, SerializableIndex}; +use bincode; +use defs::{DbError, IndexedVector, PointId}; +use serde::{Deserialize, Serialize}; +use storage::StorageEngine; +use uuid::Uuid; + +#[derive(Serialize, Deserialize)] +pub struct KDTreeMetadata { + pub dim: usize, + pub total_nodes: usize, + pub deleted_count: usize, +} + +impl SerializableIndex for KDTree { + fn serialize_topology(&self) -> Result, DbError> { + let mut buffer = Vec::new(); + let mut cursor = Cursor::new(&mut buffer); + serialize_topology_recursive(&self.root, &mut cursor)?; + Ok(buffer) + } + + fn serialize_metadata(&self) -> Result, DbError> { + let mut buffer = Vec::new(); + let km = KDTreeMetadata { + dim: self.dim, + total_nodes: self.total_nodes, + deleted_count: self.deleted_count, + }; + let metadata_bytes = bincode::serialize(&km).map_err(|e| { + DbError::SerializationError(format!("Failed to serailize KD Tree Metadata: {}", e)) + })?; + buffer.extend_from_slice(metadata_bytes.as_slice()); + Ok(buffer) + } + + fn snapshot(&self) -> Result { + let topology_bytes = self.serialize_topology()?; + let metadata_bytes = self.serialize_metadata()?; + Ok(IndexSnapshot { + index_type: crate::IndexType::KDTree, + magic: KD_TREE_MAGIC_BYTES, + topology_b: topology_bytes, + metadata_b: metadata_bytes, + }) + } + + fn populate_vectors(&mut self, storage: &dyn StorageEngine) -> Result<(), DbError> { + populate_vectors_recursive(&mut self.root, storage)?; + Ok(()) + } +} + +const NODE_MARKER_BYTE: u8 = 1u8; +const SKIP_MARKER_BYTE: u8 = 0u8; + +const DELETED_MASK: u8 = 2u8; + +impl KDTree { + pub fn deserialize( + IndexSnapshot { + index_type, + magic, + topology_b, + metadata_b, + }: &IndexSnapshot, + ) -> Result { + if index_type != &IndexType::KDTree { + return Err(DbError::SerializationError( + "Invalid index type".to_string(), + )); + } + + if magic != &KD_TREE_MAGIC_BYTES { + return Err(DbError::SerializationError( + "Invalid magic bytes".to_string(), + )); + } + + let metadata: KDTreeMetadata = + bincode::deserialize(metadata_b.as_slice()).map_err(|e| { + DbError::SerializationError(format!( + "Failed to deserailize KD Tree Metadata: {}", + e + )) + })?; + + let mut buf = Cursor::new(topology_b); + let mut non_deleted = HashSet::new(); + let root = deserialize_topology_recursive(metadata.dim, 0, &mut buf, &mut non_deleted)?; + + Ok(KDTree { + dim: metadata.dim, + root, + point_ids: non_deleted, + total_nodes: metadata.total_nodes, + deleted_count: metadata.deleted_count, + }) + } +} + +// helper functions + +fn serialize_topology_recursive( + current_opt: &Option>, + buffer: &mut Cursor<&mut Vec>, +) -> Result<(), DbError> { + if let Some(current) = current_opt { + let mut marker = NODE_MARKER_BYTE; + if current.is_deleted { + marker |= DELETED_MASK; + } + buffer + .write_all(&[marker]) + .map_err(|e| DbError::SerializationError(e.to_string()))?; + + let uuid_bytes = current.indexed_vector.id.to_bytes_le(); + buffer + .write_all(&uuid_bytes) + .map_err(|e| DbError::SerializationError(e.to_string()))?; + + // serialize left subtree topology + serialize_topology_recursive(¤t.left, buffer)?; + // serialize right subtree topology + serialize_topology_recursive(¤t.right, buffer)?; + } else { + buffer + .write_all(&[SKIP_MARKER_BYTE]) + .map_err(|e| DbError::SerializationError(e.to_string()))?; + } + Ok(()) +} + +fn populate_vectors_recursive( + node: &mut Option>, + storage: &dyn StorageEngine, +) -> Result<(), DbError> { + if let Some(node) = node { + let vector = storage + .get_vector(node.indexed_vector.id) + .map_err(|e| { + DbError::SerializationError(format!("Could not get vector from storage: {e}")) + })? + .ok_or_else(|| { + DbError::SerializationError(format!( + "Failed to locate vector for id: {}", + node.indexed_vector.id + )) + })?; + node.indexed_vector.vector = vector; + + populate_vectors_recursive(&mut node.left, storage)?; + populate_vectors_recursive(&mut node.right, storage)?; + } + Ok(()) +} + +fn deserialize_topology_recursive( + dimensions: usize, + depth: usize, + buffer: &mut Cursor<&Vec>, + non_deleted: &mut HashSet, +) -> Result>, DbError> { + let mut current_marker: [u8; 1] = [0u8; 1]; + buffer.read_exact(&mut current_marker).map_err(|e| { + DbError::SerializationError(format!("Failed to deserialize KD Topology: {}", e)) + })?; + + if current_marker[0] == SKIP_MARKER_BYTE { + return Ok(None); + } + + let mut uuid_bytes = [0u8; 16]; + buffer.read_exact(&mut uuid_bytes).map_err(|e| { + DbError::SerializationError(format!("Failed to deserialize KD Topology: {}", e)) + })?; + let uuid = Uuid::from_bytes_le(uuid_bytes); + let indexed_vector = IndexedVector { + id: uuid, + vector: Vec::new(), + }; + + let is_deleted = current_marker[0] & DELETED_MASK == DELETED_MASK; + if !is_deleted { + non_deleted.insert(uuid); + } + + // pre order deserialization + let lower_dim = (depth + 1) % dimensions; + let left_node = deserialize_topology_recursive(dimensions, lower_dim, buffer, non_deleted)?; + let right_node = deserialize_topology_recursive(dimensions, lower_dim, buffer, non_deleted)?; + + let left_size = left_node.as_ref().map_or(0, |n| n.subtree_size); + let right_size = right_node.as_ref().map_or(0, |n| n.subtree_size); + + let current_node = KDTreeNode { + indexed_vector, + left: left_node, + right: right_node, + is_deleted, + axis: depth, + subtree_size: left_size + right_size + 1, + }; + + Ok(Some(Box::new(current_node))) +} diff --git a/crates/index/src/kd_tree/tests.rs b/crates/index/src/kd_tree/tests.rs index d48b308..a5425c8 100644 --- a/crates/index/src/kd_tree/tests.rs +++ b/crates/index/src/kd_tree/tests.rs @@ -1,9 +1,11 @@ use super::index::KDTree; use crate::IndexError; +use crate::SerializableIndex; use crate::VectorIndex; use crate::distance; -use crate::flat::FlatIndex; +use crate::flat::index::FlatIndex; use defs::{IndexedVector, Similarity}; + use std::collections::HashSet; use uuid::Uuid; @@ -708,3 +710,33 @@ fn test_kdtree_vs_flat_euclidean_5d() { } } } + +#[test] +fn test_serialize_and_deserialize_topo() { + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + let id3 = Uuid::new_v4(); + let id4 = Uuid::new_v4(); + + let vectors = vec![ + make_vector_with_id(id1, vec![1.0, 2.0, 3.0]), + make_vector_with_id(id2, vec![4.0, 5.0, 6.0]), + make_vector_with_id(id3, vec![7.0, 8.0, 9.0]), + ]; + let mut tree_before = KDTree::build(vectors).unwrap(); + tree_before + .insert(make_vector_with_id(id4, vec![10.0, 11.0, 12.0])) + .unwrap(); + tree_before.delete(id1).unwrap(); + + let snapshot = tree_before.snapshot().unwrap(); + let tree = KDTree::deserialize(&snapshot).unwrap(); + + assert!(tree.root.is_some()); + assert_eq!(tree.dim, 3); + assert_eq!(tree.total_nodes, 4); + assert!(!tree.point_ids.contains(&id1)); + assert!(tree.point_ids.contains(&id2)); + assert!(tree.point_ids.contains(&id3)); + assert!(tree.point_ids.contains(&id3)); +} diff --git a/crates/index/src/kd_tree/types.rs b/crates/index/src/kd_tree/types.rs index aca187c..06d2234 100644 --- a/crates/index/src/kd_tree/types.rs +++ b/crates/index/src/kd_tree/types.rs @@ -1,4 +1,5 @@ use defs::{IndexedVector, OrdF32, PointId}; +use std::cmp::Ordering; // the node which will be the part of the KD Tree pub struct KDTreeNode { @@ -12,8 +13,25 @@ pub struct KDTreeNode { // The struct definition which is present in max heap while search // distance is first for correct Ord derivation (primary sort key) -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Clone, PartialEq)] pub struct Neighbor { pub distance: OrdF32, pub id: PointId, } + +impl Eq for Neighbor {} + +// Custom Ord implementation for the max-heap +impl Ord for Neighbor { + fn cmp(&self, other: &Self) -> Ordering { + self.distance + .partial_cmp(&other.distance) + .unwrap_or(Ordering::Equal) + } +} + +impl PartialOrd for Neighbor { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} diff --git a/crates/index/src/lib.rs b/crates/index/src/lib.rs index ba72325..13202d8 100644 --- a/crates/index/src/lib.rs +++ b/crates/index/src/lib.rs @@ -1,12 +1,15 @@ -use defs::{DenseVector, IndexedVector, PointId, Similarity}; +use defs::{DbError, DenseVector, IndexedVector, Magic, PointId, Similarity}; pub use error::{IndexError, Result}; pub mod error; + +use serde::{Deserialize, Serialize}; +use storage::StorageEngine; pub mod flat; pub mod hnsw; pub mod kd_tree; -pub trait VectorIndex: Send + Sync { +pub trait VectorIndex: Send + Sync + SerializableIndex { fn insert(&mut self, vector: IndexedVector) -> Result<()>; // Returns true if point id existed and is deleted, else returns false @@ -57,9 +60,25 @@ pub fn distance(a: &[f32], b: &[f32], dist_type: Similarity) -> f32 { } } -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] pub enum IndexType { Flat, KDTree, HNSW, } + +pub struct IndexSnapshot { + pub index_type: IndexType, + pub magic: Magic, + pub topology_b: Vec, + pub metadata_b: Vec, +} + +pub trait SerializableIndex { + fn serialize_topology(&self) -> Result, DbError>; + fn serialize_metadata(&self) -> Result, DbError>; + + fn snapshot(&self) -> Result; + + fn populate_vectors(&mut self, storage: &dyn StorageEngine) -> Result<(), DbError>; +} diff --git a/crates/snapshot/Cargo.toml b/crates/snapshot/Cargo.toml new file mode 100644 index 0000000..10328f1 --- /dev/null +++ b/crates/snapshot/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "snapshot" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +chrono.workspace = true +data-encoding = "2.9.0" +defs.workspace = true +flate2 = "1.1.5" +fs2 = "0.4.3" +index.workspace = true +semver = "1.0.27" +serde.workspace = true +serde_json.workspace = true +sha2 = "0.10.9" +storage.workspace = true +tar = "0.4.44" +tempfile.workspace = true +uuid.workspace = true diff --git a/crates/snapshot/README.md b/crates/snapshot/README.md new file mode 100644 index 0000000..e69de29 diff --git a/crates/snapshot/src/constants.rs b/crates/snapshot/src/constants.rs new file mode 100644 index 0000000..3dd46d3 --- /dev/null +++ b/crates/snapshot/src/constants.rs @@ -0,0 +1,5 @@ +use semver::Version; + +pub const SNAPSHOT_PARSER_VER: Version = Version::new(0, 1, 0); +pub const SMALL_ID_LEN: usize = 8; +pub const MANIFEST_FILE: &str = "manifest.json"; diff --git a/crates/snapshot/src/engine/mod.rs b/crates/snapshot/src/engine/mod.rs new file mode 100644 index 0000000..ceeec4d --- /dev/null +++ b/crates/snapshot/src/engine/mod.rs @@ -0,0 +1,128 @@ +use defs::{DbError, SnapshottableDb}; +use std::{ + collections::VecDeque, + sync::{Arc, Condvar, Mutex}, + thread::JoinHandle, +}; +mod worker; +use crate::{metadata::Metadata, registry::SnapshotRegistry}; + +pub struct SnapshotEngine { + last_k: usize, // only retain the last k snapshots on disk. old/stale snapshots are marked as dead on the registry + snapshot_queue: Arc>>, + db: Arc>, + registry: Arc>, + worker_ctx: WorkerContext, +} + +pub struct WorkerContext { + worker_cv: Arc, + worker_running: Arc>, + join_handle: Option>>, +} + +impl WorkerContext { + fn new() -> WorkerContext { + WorkerContext { + worker_cv: Arc::new(Condvar::new()), + worker_running: Arc::new(Mutex::new(false)), + join_handle: None, + } + } +} + +impl SnapshotEngine { + pub fn new( + last_k: usize, + db: Arc>, + registry: Arc>, + ) -> Self { + Self { + last_k, + snapshot_queue: Arc::new(Mutex::new(VecDeque::new())), + db, + registry, + worker_ctx: WorkerContext::new(), + } + } + + // take a snapshot on the callers thread + pub fn snapshot(&mut self) -> Result<(), DbError> { + Self::take_snapshot( + &mut self.db, + &mut self.registry, + &mut self.snapshot_queue, + self.last_k, + ) + } + + pub fn list_alive_snapshots(&mut self) -> Result, DbError> { + Ok(self + .snapshot_queue + .lock() + .map_err(|_| DbError::LockError)? + .iter() + .cloned() + .collect()) + } + + // helper function to take snapshot + fn take_snapshot( + db: &mut Arc>, + registry: &mut Arc>, + snapshot_queue: &mut Arc>>, + last_k: usize, + ) -> Result<(), DbError> { + let snapshot_path = db + .lock() + .map_err(|_| DbError::LockError)? + .create_snapshot( + registry + .lock() + .map_err(|_| DbError::LockError)? + .dir() + .as_path(), + ) + .map_err(|err| { + DbError::SnapshotEngineError(format!("Could not create snapshot : {:?}", err)) + })?; + let snapshot_metadata = Metadata::parse(&snapshot_path).map_err(|err| { + DbError::SnapshotEngineError(format!("Could not parse snapshot metadata: {:?}", err)) + })?; + + // add the snapshot to registry + registry + .lock() + .map_err(|_| DbError::LockError)? + .add_snapshot(&snapshot_path) + .map_err(|err| { + DbError::SnapshotEngineError(format!( + "Could not add snapshot to registry: {:?}", + err + )) + })?; + + { + let mut queue = snapshot_queue.lock().map_err(|_| DbError::LockError)?; + queue.push_back(snapshot_metadata); + + while queue.len() > last_k { + let old = queue.pop_front().ok_or_else(|| { + DbError::SnapshotEngineError("Snapshot metadata queue is empty".to_string()) + })?; + registry + .lock() + .map_err(|_| DbError::LockError)? + .mark_dead(old.small_id) + .map_err(|err| { + DbError::SnapshotEngineError(format!( + "Could not mark snapshot as dead in registry: {:?}", + err + )) + })?; + } + // drop queue lock + } + Ok(()) + } +} diff --git a/crates/snapshot/src/engine/worker.rs b/crates/snapshot/src/engine/worker.rs new file mode 100644 index 0000000..73fe4ad --- /dev/null +++ b/crates/snapshot/src/engine/worker.rs @@ -0,0 +1,144 @@ +use std::{ + collections::VecDeque, + sync::{Arc, Condvar, Mutex}, + time::Duration, +}; + +use defs::{DbError, SnapshottableDb}; + +use crate::{engine::SnapshotEngine, metadata::Metadata, registry::SnapshotRegistry}; + +impl SnapshotEngine { + pub fn stop_worker(&mut self) -> Result<(), DbError> { + { + let mut worker_running = self + .worker_ctx + .worker_running + .lock() + .map_err(|_| DbError::LockError)?; + if !*worker_running { + return Err(DbError::SnapshotEngineError( + "Worker thread not running".to_string(), + )); + } + *worker_running = false; + } + self.worker_ctx.worker_cv.notify_one(); + + if let Some(handle) = self.worker_ctx.join_handle.take() { + handle + .join() + .map_err(|_| { + DbError::SnapshotEngineError("Could not join worker thread".to_string()) + })? + .map_err(|e| { + DbError::SnapshotEngineError(format!("Worker thread errored: {:?}", e)) + })?; + } + Ok(()) + } + + pub fn is_worker_alive(&self) -> Result { + { + let worker_running = self + .worker_ctx + .worker_running + .lock() + .map_err(|_| DbError::LockError)?; + if !*worker_running { + return Ok(false); + } + } + + match &self.worker_ctx.join_handle { + Some(handle) => Ok(!handle.is_finished()), + None => Ok(false), + } + } + + // notify the worker thread to take a snapshot now + pub fn worker_snapshot(&mut self) -> Result<(), DbError> { + // acquire lock for worker_running + let worker_running = self + .worker_ctx + .worker_running + .lock() + .map_err(|_| DbError::LockError)?; + if !*worker_running { + return Err(DbError::SnapshotEngineError( + "Worker thread not running".to_string(), + )); + } + self.worker_ctx.worker_cv.notify_one(); + Ok(()) + } + + pub fn start_worker(&mut self, interval: i64) -> Result<(), DbError> { + // acquire lock for worker_running + { + let mut worker_running = self + .worker_ctx + .worker_running + .lock() + .map_err(|_| DbError::LockError)?; + if *worker_running { + return Err(DbError::SnapshotEngineError( + "Worker thread already running".to_string(), + )); + } + *worker_running = true; + } + + let worker_running_clone = Arc::clone(&self.worker_ctx.worker_running); + let db_clone = Arc::clone(&self.db); + let registry_clone = Arc::clone(&self.registry); + let worker_cv_clone = Arc::clone(&self.worker_ctx.worker_cv); + let snapshot_queue_clone = Arc::clone(&self.snapshot_queue); + let last_k_clone = self.last_k; + + let dur_interval = Duration::from_secs(interval as u64); + self.worker_ctx.join_handle = Some(std::thread::spawn(move || -> Result<(), DbError> { + Self::worker( + dur_interval, + last_k_clone, + worker_running_clone, + db_clone, + registry_clone, + worker_cv_clone, + snapshot_queue_clone, + ) + })); + Ok(()) + } + + // TODO: fix sync issues if any (i dont think there are any) + fn worker( + interval: Duration, + last_k: usize, + worker_running: Arc>, + mut db: Arc>, + mut registry: Arc>, + worker_cv: Arc, + mut snapshot_queue: Arc>>, + ) -> Result<(), DbError> { + loop { + { + // acquire the lock and exit if its false + let worker_running = worker_running.lock().map_err(|_| DbError::LockError)?; + if !*worker_running { + break; + } + } + + Self::take_snapshot(&mut db, &mut registry, &mut snapshot_queue, last_k)?; + + let worker_running = worker_running.lock().map_err(|_| DbError::LockError)?; + let _ = worker_cv + .wait_timeout(worker_running, interval) + .map_err(|err| { + DbError::SnapshotEngineError(format!("Failed to wait on worker cv : {}", err)) + })?; + } + Ok(()) + } +} diff --git a/crates/snapshot/src/lib.rs b/crates/snapshot/src/lib.rs new file mode 100644 index 0000000..51b26fc --- /dev/null +++ b/crates/snapshot/src/lib.rs @@ -0,0 +1,304 @@ +pub mod constants; +pub mod engine; +pub mod manifest; +pub mod metadata; +pub mod registry; +mod util; + +use crate::{ + constants::{MANIFEST_FILE, SNAPSHOT_PARSER_VER}, + manifest::Manifest, + util::{compress_archive, save_index_metadata, save_topology}, +}; + +use chrono::{DateTime, Local}; +use defs::DbError; +use flate2::read::GzDecoder; +use index::{ + IndexSnapshot, IndexType, VectorIndex, flat::index::FlatIndex, hnsw::HnswIndex, + kd_tree::index::KDTree, +}; +use semver::Version; +use std::{ + fs::File, + path::{Path, PathBuf}, + sync::{Arc, RwLock}, + time::SystemTime, +}; +use storage::{ + StorageEngine, StorageType, checkpoint::StorageCheckpoint, rocks_db::RocksDbStorage, +}; +use tar::Archive; +use tempfile::tempdir; +use uuid::Uuid; + +type VectorDbRestore = (Arc, Arc>, usize); + +pub struct Snapshot { + pub id: Uuid, + pub date: SystemTime, + pub sem_ver: Version, + pub index_snapshot: IndexSnapshot, + pub storage_snapshot: StorageCheckpoint, + pub dimensions: usize, +} + +impl Snapshot { + pub fn new( + index_snapshot: IndexSnapshot, + storage_snapshot: StorageCheckpoint, + dimensions: usize, + ) -> Result { + let id = Uuid::new_v4(); + let date = SystemTime::now(); + + Ok(Snapshot { + id, + date, + sem_ver: SNAPSHOT_PARSER_VER, + index_snapshot, + storage_snapshot, + dimensions, + }) + } + + pub fn save(&self, dir_path: &Path) -> Result { + if !dir_path.is_dir() { + return Err(DbError::SnapshotError(format!( + "Invalid path: {}", + dir_path.display() + ))); + } + + let temp_dir = tempdir().map_err(|e| DbError::SnapshotError(e.to_string()))?; + + // save index snapshots + let index_metadata_path = save_index_metadata( + temp_dir.path(), + self.id, + &self.index_snapshot.metadata_b, + &self.index_snapshot.magic, + )?; + + let topology_path = save_topology( + temp_dir.path(), + self.id, + &self.index_snapshot.topology_b, + &self.index_snapshot.magic, + )?; + + // take checksums + let index_metadata_checksum = util::sha256_digest(&index_metadata_path) + .map_err(|e| DbError::SnapshotError(e.to_string()))?; + let index_topo_checksum = util::sha256_digest(&topology_path) + .map_err(|e| DbError::SnapshotError(e.to_string()))?; + let storage_checkpoint_checksum = util::sha256_digest(&self.storage_snapshot.path) + .map_err(|e| DbError::SnapshotError(e.to_string()))?; + + let dt_now_local: DateTime = self.date.into(); + + // need this for manifest + let storage_checkpoint_filename = self + .storage_snapshot + .path + .file_name() + .ok_or(DbError::SnapshotError( + "Storage checkpoint was not properly made".to_string(), + ))? + .to_str() + .ok_or(DbError::SnapshotError( + "Storage checkpoint filename is not valid UTF-8".to_string(), + ))? + .to_string(); + + // create manifest file + let manifest = Manifest { + id: self.id, + date: dt_now_local.timestamp(), + sem_ver: constants::SNAPSHOT_PARSER_VER.to_string(), + index_metadata_checksum, + index_topo_checksum, + storage_checkpoint_checksum, + storage_type: self.storage_snapshot.storage_type, + index_type: self.index_snapshot.index_type, + dimensions: self.dimensions, + storage_checkpoint_filename, + }; + + let manifest_path = manifest + .save(temp_dir.path()) + .map_err(|e| DbError::SnapshotError(e.to_string()))?; + + let tar_filename = format!( + "{}.tar.gz", + metadata::Metadata::new( + self.id, + self.date, + index_metadata_path.clone(), + constants::SNAPSHOT_PARSER_VER + ) + ); + let tar_gz_path = dir_path.join(tar_filename); + + compress_archive( + &tar_gz_path, + &[ + &index_metadata_path, + &topology_path, + &self.storage_snapshot.path, + &manifest_path, + ], + ) + .map_err(|e| DbError::SnapshotError(e.to_string()))?; + Ok(tar_gz_path.to_path_buf()) + } + + pub fn load(path: &Path, storage_data_path: &Path) -> Result { + let tar_gz = File::open(path) + .map_err(|e| DbError::SnapshotError(format!("Couldn't open snapshot: {}", e)))?; + + let tar = GzDecoder::new(tar_gz); + let mut archive = Archive::new(tar); + + let snapshot_filename = path.file_name().ok_or(DbError::SnapshotError( + "Invalid snapshot filename".to_string(), + ))?; + let temp_dir = std::env::temp_dir().join(snapshot_filename); + + // remove any existing data + if temp_dir.exists() && !temp_dir.is_dir() { + std::fs::remove_file(temp_dir.clone()).map_err(|e| { + DbError::SnapshotError(format!("Couldn't remove existing file: {}", e)) + })?; + } else if temp_dir.is_dir() { + std::fs::remove_dir_all(temp_dir.clone()).map_err(|e| { + DbError::SnapshotError(format!("Couldn't remove existing directory: {}", e)) + })?; + } + + std::fs::create_dir(temp_dir.clone()).map_err(|e| { + DbError::SnapshotError(format!("Couldn't create temporary directory: {}", e)) + })?; + + archive + .unpack(temp_dir.clone()) + .map_err(|e| DbError::SnapshotError(format!("Couldn't unpack archive: {}", e)))?; + + // read manifest and validate + let manifest_path = temp_dir.join(MANIFEST_FILE); + if !manifest_path.is_file() { + return Err(DbError::SnapshotError( + "Manifest file not found".to_string(), + )); + } + + let manifest = Manifest::load(&manifest_path) + .map_err(|e| DbError::SnapshotError(format!("Couldn't load manifest: {}", e)))?; + + if manifest.sem_ver != SNAPSHOT_PARSER_VER.to_string() { + return Err(DbError::SnapshotError( + "Incompatible snapshot version".to_string(), + )); + } + + // only rocksdb is supported for snapshots as of now + let mut storage_engine: Box = match manifest.storage_type { + StorageType::RocksDb => Box::new( + RocksDbStorage::new(storage_data_path) + .map_err(|e| DbError::StorageError(format!("Could not open storage: {e}")))?, + ), + _ => { + return Err(DbError::SnapshotError( + "Unsupported storage type".to_string(), + )); + } + }; + + let id = manifest.id; + let index_metadata_path = temp_dir.join(util::metadata_filename(&id)); + let topology_path = temp_dir.join(util::topology_filename(&id)); + let storage_checkpoint_path = temp_dir.join(manifest.storage_checkpoint_filename); + + if !index_metadata_path.exists() + || !topology_path.exists() + || !storage_checkpoint_path.exists() + { + return Err(DbError::SnapshotError(format!( + "Missing snapshot files {} , {}, {}", + index_metadata_path.display(), + topology_path.display(), + storage_checkpoint_path.display() + ))); + } + + // match checksums + if util::sha256_digest(&index_metadata_path).map_err(|_| { + DbError::SnapshotError("Could not calculate index metadata hash".to_string()) + })? != manifest.index_metadata_checksum + { + return Err(DbError::SnapshotError( + "Index metadata hash mismatch".to_string(), + )); + } + if util::sha256_digest(&topology_path) + .map_err(|_| DbError::SnapshotError("Could not calculate topology hash".to_string()))? + != manifest.index_topo_checksum + { + return Err(DbError::SnapshotError("Topology hash mismatch".to_string())); + } + if util::sha256_digest(&storage_checkpoint_path).map_err(|_| { + DbError::SnapshotError("Could not calculate storage checkpoint hash".to_string()) + })? != manifest.storage_checkpoint_checksum + { + return Err(DbError::SnapshotError( + "Storage checkpoint hash mismatch".to_string(), + )); + } + + let (mgmeta, meta_bytes) = util::read_index_metadata(&index_metadata_path) + .map_err(|_| DbError::SnapshotError("Could not read metadata".to_string()))?; + let (mgtopo, topo_bytes) = util::read_index_topology(&topology_path) + .map_err(|_| DbError::SnapshotError("Could not read topology".to_string()))?; + + if mgtopo != mgmeta { + return Err(DbError::InvalidMagicBytes( + "Magic bytes don't match".to_string(), + )); + } + + // validates if manifest storage type matches that in the filename of storage checkpoint + let storage_checkpoint = StorageCheckpoint::open(storage_checkpoint_path.as_path())?; + if storage_checkpoint.storage_type != manifest.storage_type { + return Err(DbError::SnapshotError( + "Storage type mismatch from manifest and checkpoint".to_string(), + )); + } + + storage_engine + .restore_checkpoint(&storage_checkpoint) + .map_err(|e| { + DbError::StorageCheckpointError(format!("Could not restore checkpoint: {e}")) + })?; + + let index_snapshot = IndexSnapshot { + index_type: manifest.index_type, + magic: mgmeta, + metadata_b: meta_bytes, + topology_b: topo_bytes, + }; + + // dynamic dispatch based on index type + let vector_index: Arc> = match manifest.index_type { + IndexType::Flat => Arc::new(RwLock::new(FlatIndex::deserialize(&index_snapshot)?)), + IndexType::KDTree => Arc::new(RwLock::new(KDTree::deserialize(&index_snapshot)?)), + IndexType::HNSW => Arc::new(RwLock::new(HnswIndex::deserialize(&index_snapshot)?)), + }; + + vector_index + .write() + .map_err(|_| DbError::LockError)? + .populate_vectors(&*storage_engine)?; + + Ok((storage_engine.into(), vector_index, manifest.dimensions)) + } +} diff --git a/crates/snapshot/src/manifest.rs b/crates/snapshot/src/manifest.rs new file mode 100644 index 0000000..0993435 --- /dev/null +++ b/crates/snapshot/src/manifest.rs @@ -0,0 +1,47 @@ +use index::IndexType; +use serde::{Deserialize, Serialize}; +use std::path::Path; +use std::{ + io::{BufReader, BufWriter, Error, Write}, + path::PathBuf, +}; +use storage::StorageType; +use uuid::Uuid; + +use crate::constants::MANIFEST_FILE; + +type UnixTimestamp = i64; + +#[derive(Serialize, Deserialize)] +pub struct Manifest { + pub id: Uuid, + pub date: UnixTimestamp, + pub sem_ver: String, + pub index_metadata_checksum: String, + pub index_topo_checksum: String, + pub storage_checkpoint_checksum: String, + pub index_type: IndexType, + pub storage_type: StorageType, + pub dimensions: usize, + pub storage_checkpoint_filename: String, +} + +impl Manifest { + pub fn save(&self, path: &Path) -> Result { + let manifest_path = path.join(MANIFEST_FILE); + + let file = std::fs::File::create(manifest_path.clone())?; + let mut writer = BufWriter::new(file); + serde_json::to_writer(&mut writer, self)?; + writer.flush()?; + + Ok(manifest_path) + } + + pub fn load(path: &Path) -> Result { + let file = std::fs::File::open(path)?; + let mut reader = BufReader::new(file); + let manifest: Manifest = serde_json::from_reader(&mut reader)?; + Ok(manifest) + } +} diff --git a/crates/snapshot/src/metadata.rs b/crates/snapshot/src/metadata.rs new file mode 100644 index 0000000..cd73185 --- /dev/null +++ b/crates/snapshot/src/metadata.rs @@ -0,0 +1,114 @@ +use crate::constants::SMALL_ID_LEN; +use chrono::DateTime; +use chrono::Local; +use defs::DbError; +use semver::Version; +use std::{fmt::Display, path::PathBuf, time::SystemTime}; +use std::{fs, path::Path}; +use uuid::Uuid; + +pub type SmallID = String; + +// Metadata is the data that can be parsed from the snapshot filename +#[derive(Debug, Clone)] +pub struct Metadata { + pub small_id: SmallID, + pub date: SystemTime, + pub path: PathBuf, + pub sem_ver: Version, +} + +const FILENAME_METADATA_SEPARATOR: &str = "-x"; + +impl Metadata { + pub fn new(id: Uuid, date: SystemTime, path: PathBuf, sem_ver: Version) -> Self { + Metadata { + small_id: id.to_string()[..SMALL_ID_LEN].to_string(), + date, + path, + sem_ver, + } + } + + pub fn parse(path: &Path) -> Result { + if !path.is_file() { + return Err(DbError::SnapshotError("File not found".to_string())); + } + let filename = path + .file_name() + .ok_or(DbError::SnapshotError("No filename".to_string()))? + .to_str() + .ok_or(DbError::SnapshotError( + "Invalid UTF-8 in filename".to_string(), + ))? + .strip_suffix(".tar.gz") + .ok_or(DbError::SnapshotError( + "Snapshot filename doesnt end with .tar.gz".to_string(), + ))?; + + let parts = filename + .split(FILENAME_METADATA_SEPARATOR) + .collect::>(); + + if parts.len() != 3 { + return Err(DbError::SnapshotError("Invalid filename".to_string())); + } + + let id = parts[1]; + if id.len() != SMALL_ID_LEN { + return Err(DbError::SnapshotError("Invalid UUID".to_string())); + } + + let date = chrono::DateTime::parse_from_rfc3339(parts[0]) + .map_err(|_| DbError::SnapshotError("Invalid date".to_string()))?; + let version = Version::parse(parts[2]) + .map_err(|_| DbError::SnapshotError("Invalid version".to_string()))?; + + Ok(Metadata { + small_id: id.to_string(), + date: date.into(), + path: path.to_path_buf(), + sem_ver: version, + }) + } + + pub fn snapshot_dir_metadata(path: &Path) -> Result, DbError> { + if !path.is_dir() { + return Err(DbError::SnapshotError( + "Path is not a directory".to_string(), + )); + } + + let mut metadata_vec = Vec::new(); + + for item in fs::read_dir(path).map_err(|_| { + DbError::SnapshotError(format!("Cannot read directory: {}", path.display())) + })? { + let entry = item.map_err(|_| { + DbError::SnapshotError(format!("Invalid entry: {}", path.display())) + })?; + let path = entry.path(); + if path.is_file() + && let Ok(metadata) = Self::parse(&path) + { + metadata_vec.push(metadata); + } + } + Ok(metadata_vec) + } +} + +impl Display for Metadata { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let dt_now_local: DateTime = self.date.into(); + write!( + f, + "{}{}{}{}{}", + dt_now_local.to_rfc3339_opts(chrono::SecondsFormat::Secs, true), + FILENAME_METADATA_SEPARATOR, + self.small_id, + FILENAME_METADATA_SEPARATOR, + self.sem_ver + ) + } +} diff --git a/crates/snapshot/src/registry/constants.rs b/crates/snapshot/src/registry/constants.rs new file mode 100644 index 0000000..9138454 --- /dev/null +++ b/crates/snapshot/src/registry/constants.rs @@ -0,0 +1 @@ +pub const LOCAL_REGISTRY_LOCKFILE: &str = "LOCKFILE"; diff --git a/crates/snapshot/src/registry/local.rs b/crates/snapshot/src/registry/local.rs new file mode 100644 index 0000000..8f79a73 --- /dev/null +++ b/crates/snapshot/src/registry/local.rs @@ -0,0 +1,271 @@ +use std::{ + collections::HashMap, + fs, + path::{Path, PathBuf}, +}; + +use crate::registry::{INFINITY_LIMIT, NO_OFFSET, SnapshotRegistry}; +use crate::registry::{SnapshotMetaPage, constants::LOCAL_REGISTRY_LOCKFILE}; +use crate::{ + Snapshot, VectorDbRestore, + metadata::{Metadata, SmallID}, +}; +use defs::DbError; +use fs2::FileExt; + +pub struct LocalRegistry { + pub dir: PathBuf, + filename_cache: HashMap, +} + +impl LocalRegistry { + pub fn new(dir: &Path) -> Result { + fs::create_dir_all(dir).map_err(|e| DbError::SnapshotRegistryError(e.to_string()))?; + let lock_file_path = dir.join(LOCAL_REGISTRY_LOCKFILE); + let lock_file = if !lock_file_path.exists() { + fs::File::create(&lock_file_path).map_err(|e| { + DbError::SnapshotRegistryError(format!("Couldn't create LOCKFILE : {}", e)) + })? + } else { + fs::OpenOptions::new() + .read(true) + .write(true) + .open(&lock_file_path) + .map_err(|e| { + DbError::SnapshotRegistryError(format!("Couldn't open LOCKFILE : {}", e)) + })? + }; + + // try to acquire lockfile + lock_file + .try_lock_exclusive() + .map_err(|_| DbError::SnapshotRegistryError("Couldn't acquire LOCKFILE".to_string()))?; + + Ok(LocalRegistry { + dir: dir.to_path_buf(), + filename_cache: HashMap::new(), + }) + } +} + +impl SnapshotRegistry for LocalRegistry { + fn add_snapshot(&mut self, snapshot_path: &Path) -> Result { + // move the snapshot file to the directory and cache its metadata + + let filename = snapshot_path + .file_name() + .ok_or(DbError::SnapshotRegistryError( + "Invalid snapshot path".to_string(), + ))?; + let final_snapshot_path = self.dir.join(filename); + + // if the snapshot is already in the managed directory then do nothing + if snapshot_path != final_snapshot_path.as_path() { + fs::rename(snapshot_path, final_snapshot_path.clone()).map_err(|e| { + DbError::SnapshotRegistryError(format!("Failed to move snapshot: {}", e)) + })?; + } + + let metadata = Metadata::parse(final_snapshot_path.as_path())?; + self.filename_cache.insert( + metadata.small_id.clone(), + filename.to_string_lossy().to_string(), + ); + Ok(metadata) + } + + fn list_snapshots(&mut self, limit: usize, offset: usize) -> Result { + let mut res = Vec::new(); + let filtered_files = fs::read_dir(self.dir.as_path()) + .map_err(|e| { + DbError::SnapshotRegistryError(format!("Cannot read local registry dir: {}", e)) + })? + .skip(offset) + .take(limit); + + for file in filtered_files { + let file = match file { + Ok(file) => file, + Err(_) => continue, + }; + let file_path = file.path(); + + if let Ok(metadata) = Metadata::parse(file_path.as_path()) { + let filename = file_path + .file_name() + .ok_or(DbError::SnapshotRegistryError( + "Could not load filename of snapshot".to_string(), + ))? + .to_string_lossy(); + self.filename_cache + .insert(metadata.small_id.clone(), filename.to_string()); + + res.push(metadata); + } + } + Ok(res) + } + + fn get_latest_snapshot(&mut self) -> Result { + let mut latest_record: Option = None; + for file in fs::read_dir(self.dir.as_path()).map_err(|e| { + DbError::SnapshotRegistryError(format!("Cannot read local registry dir: {}", e)) + })? { + let file = match file { + Ok(file) => file, + Err(_) => continue, + }; + let file_path = file.path(); + + if let Ok(metadata) = Metadata::parse(file_path.as_path()) { + let filename = file_path + .file_name() + .ok_or(DbError::SnapshotRegistryError( + "Could not load filename of snapshot".to_string(), + ))? + .to_string_lossy(); + self.filename_cache + .insert(metadata.small_id.clone(), filename.to_string()); + + latest_record = match latest_record { + None => Some(metadata), + Some(existing) => { + if metadata.date > existing.date { + Some(metadata) + } else { + Some(existing) + } + } + }; + } + } + match latest_record { + Some(metadata) => Ok(metadata), + None => Err(DbError::SnapshotRegistryError( + "No snapshots found".to_string(), + )), + } + } + + fn list_alive_snapshots(&mut self) -> Result { + self.list_snapshots(INFINITY_LIMIT, NO_OFFSET) + } + + fn remove_snapshot(&mut self, small_id: SmallID) -> Result { + if let Some(filename) = self.filename_cache.get(&small_id) { + let snapshot_filepath = self.dir.join(filename); + + let metadata = Metadata::parse(snapshot_filepath.as_path())?; + fs::remove_file(snapshot_filepath.as_path()).map_err(|e| { + DbError::SnapshotRegistryError(format!("Failed to remove snapshot: {}", e)) + })?; + self.filename_cache.remove_entry(&small_id); + Ok(metadata) + } else { + for file in fs::read_dir(self.dir.as_path()).map_err(|e| { + DbError::SnapshotRegistryError(format!("Cannot read local registry dir: {}", e)) + })? { + let file = match file { + Ok(file) => file, + Err(_) => continue, + }; + let file_path = file.path(); + if let Ok(metadata) = Metadata::parse(file_path.as_path()) + && metadata.small_id == small_id + { + fs::remove_file(metadata.path.as_path()).map_err(|e| { + DbError::SnapshotRegistryError(format!("Failed to remove snapshot: {}", e)) + })?; + return Ok(metadata); + } + } + Err(DbError::SnapshotRegistryError( + "Snapshot not found".to_string(), + )) + } + } + + fn get_metadata(&mut self, small_id: SmallID) -> Result { + if let Some(filename) = self.filename_cache.get(&small_id) { + let snapshot_filepath = self.dir.join(filename); + let metadata = Metadata::parse(snapshot_filepath.as_path())?; + Ok(metadata) + } else { + for file in fs::read_dir(self.dir.as_path()).map_err(|e| { + DbError::SnapshotRegistryError(format!("Cannot read local registry dir: {}", e)) + })? { + let file = match file { + Ok(file) => file, + Err(_) => continue, + }; + let file_path = file.path(); + if let Ok(metadata) = Metadata::parse(file_path.as_path()) + && metadata.small_id == small_id + { + return Ok(metadata); + } + } + Err(DbError::SnapshotRegistryError( + "Snapshot not found".to_string(), + )) + } + } + + fn mark_dead(&mut self, small_id: String) -> Result { + self.remove_snapshot(small_id) + } + + fn load( + &mut self, + small_id: String, + storage_data_path: &Path, + ) -> Result { + if let Some(filename) = self.filename_cache.get(&small_id) { + let snapshot_filepath = self.dir.join(filename); + Snapshot::load(snapshot_filepath.as_path(), storage_data_path) + } else { + for file in fs::read_dir(self.dir.as_path()).map_err(|e| { + DbError::SnapshotRegistryError(format!("Cannot read local registry dir: {}", e)) + })? { + let file = match file { + Ok(file) => file, + Err(_) => continue, + }; + let file_path = file.path(); + let metadata = Metadata::parse(file_path.as_path())?; + let filename = file_path + .file_name() + .ok_or(DbError::SnapshotRegistryError( + "Could not load filename of snapshot".to_string(), + ))? + .to_string_lossy(); + self.filename_cache + .insert(metadata.small_id.clone(), filename.to_string()); + if metadata.small_id == small_id { + return Snapshot::load(file_path.as_path(), storage_data_path); + } + } + Err(DbError::SnapshotRegistryError( + "Snapshot not found".to_string(), + )) + } + } + + fn dir(&self) -> PathBuf { + self.dir.clone() + } +} + +impl Drop for LocalRegistry { + fn drop(&mut self) { + // remove exclusive lock on lockfile + let lock_file_path = self.dir.join(LOCAL_REGISTRY_LOCKFILE); + if let Ok(lock_file) = fs::OpenOptions::new() + .read(true) + .write(true) + .open(&lock_file_path) + { + let _ = lock_file.unlock(); + } + } +} diff --git a/crates/snapshot/src/registry/mod.rs b/crates/snapshot/src/registry/mod.rs new file mode 100644 index 0000000..6513d97 --- /dev/null +++ b/crates/snapshot/src/registry/mod.rs @@ -0,0 +1,32 @@ +use std::path::{Path, PathBuf}; + +use defs::DbError; +pub mod constants; +pub mod local; +use crate::{VectorDbRestore, metadata::Metadata}; + +pub type SnapshotMetaPage = Vec; + +pub const INFINITY_LIMIT: usize = 100000; +pub const NO_OFFSET: usize = 0; + +pub trait SnapshotRegistry: Send + Sync { + fn add_snapshot(&mut self, snapshot_path: &Path) -> Result; + + fn list_snapshots(&mut self, limit: usize, offset: usize) -> Result; + fn get_latest_snapshot(&mut self) -> Result; + + fn get_metadata(&mut self, small_id: String) -> Result; + fn remove_snapshot(&mut self, small_id: String) -> Result; + + fn load( + &mut self, + small_id: String, + storage_data_path: &Path, + ) -> Result; + fn dir(&self) -> PathBuf; + + // in the future this could be used to maybe move an old/stale snapshot to cold storage or to a remote registry + fn mark_dead(&mut self, small_id: String) -> Result; // current behaviour is to call remove_snapshot; + fn list_alive_snapshots(&mut self) -> Result; // current behaviour is to call list_snapshots; +} diff --git a/crates/snapshot/src/util.rs b/crates/snapshot/src/util.rs new file mode 100644 index 0000000..c6a7ebc --- /dev/null +++ b/crates/snapshot/src/util.rs @@ -0,0 +1,151 @@ +use data_encoding::HEXLOWER; +use sha2::{Digest, Sha256}; +use std::fs::File; +use std::io::{BufReader, Error, ErrorKind, Read}; +use std::path::PathBuf; + +use defs::{DbError, Magic}; +use flate2::{Compression, write::GzEncoder}; +use std::{io::Write, path::Path}; +use tar::Builder; +use uuid::Uuid; + +type BinFileContent = (Magic, Vec); + +#[inline] +pub fn metadata_filename(id: &Uuid) -> String { + format!("{}-index-meta.bin", id) +} + +#[inline] +pub fn topology_filename(id: &Uuid) -> String { + format!("{}-index-topo.bin", id) +} + +// source: https://stackoverflow.com/questions/69787906/how-to-hash-a-binary-file-in-rust +pub fn sha256_digest(path: &PathBuf) -> Result { + let input = File::open(path)?; + let mut reader = BufReader::new(input); + + let digest = { + let mut hasher = Sha256::new(); + let mut buffer = [0; 1024]; + loop { + let count = reader.read(&mut buffer)?; + if count == 0 { + break; + } + hasher.update(&buffer[..count]); + } + hasher.finalize() + }; + Ok(HEXLOWER.encode(digest.as_ref())) +} + +pub fn save_index_metadata( + path: &Path, + uuid: Uuid, + bytes: &[u8], + magic: &Magic, +) -> Result { + let file_name = metadata_filename(&uuid); + let metadata_file_path = path.join(file_name); + + let mut file = std::fs::File::create(metadata_file_path.clone()) + .map_err(|e| DbError::SnapshotError(format!("Could not create metadata file: {}", e)))?; + + file.write_all(magic) + .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; + file.write_all(&bytes.len().to_le_bytes()) + .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; + file.write_all(bytes) + .map_err(|e| DbError::SnapshotError(format!("Could not write metadata file: {}", e)))?; + + Ok(metadata_file_path) +} + +pub fn save_topology( + path: &Path, + uuid: Uuid, + bytes: &[u8], + magic: &Magic, +) -> Result { + let file_name = topology_filename(&uuid); + let topology_file_path = path.join(file_name); + + let mut file = std::fs::File::create(topology_file_path.clone()) + .map_err(|e| DbError::SnapshotError(format!("Could not create topology file: {}", e)))?; + + file.write_all(magic) + .map_err(|e| DbError::SnapshotError(format!("Could not write topology file: {}", e)))?; + file.write_all(&bytes.len().to_le_bytes()) + .map_err(|e| DbError::SnapshotError(format!("Could not write topology file: {}", e)))?; + file.write_all(bytes) + .map_err(|e| DbError::SnapshotError(format!("Could not write topology file: {}", e)))?; + + Ok(topology_file_path) +} + +pub fn compress_archive(path: &Path, files: &[&Path]) -> Result<(), Error> { + let tar_gz = File::create(path)?; + let enc = GzEncoder::new(tar_gz, Compression::default()); + let mut tar = Builder::new(enc); + + for file in files { + let rel_path = file.file_name().ok_or(Error::new( + ErrorKind::InvalidFilename, + "Could not create archive : invalid snapshot file", + ))?; + let mut f = File::open(file)?; + tar.append_file(rel_path, &mut f)?; + } + + tar.into_inner()?; + Ok(()) +} + +pub fn read_index_topology(path: &Path) -> Result { + let mut file = File::open(path) + .map_err(|e| DbError::SnapshotError(format!("Couldn't open topology file: {}", e)))?; + + let mut magic = Magic::default(); + file.read_exact(&mut magic).map_err(|e| { + DbError::SnapshotError(format!("Couldn't read magic from topology file: {}", e)) + })?; + + let mut len_bytes = [0u8; size_of::()]; + file.read_exact(&mut len_bytes).map_err(|e| { + DbError::SnapshotError(format!("Couldn't read length from topology file: {}", e)) + })?; + let len = usize::from_le_bytes(len_bytes); + + let mut bytes = vec![0u8; len]; + file.read_exact(&mut bytes).map_err(|e| { + DbError::SnapshotError(format!("Couldn't read bytes from topology file: {}", e)) + })?; + + Ok((magic, bytes)) +} + +pub fn read_index_metadata(path: &Path) -> Result { + let mut file = File::open(path) + .map_err(|e| DbError::SnapshotError(format!("Couldn't open metadata file: {}", e)))?; + + let mut magic = Magic::default(); + file.read_exact(&mut magic).map_err(|e| { + DbError::SnapshotError(format!("Couldn't read magic from metadata file: {}", e)) + })?; + + let mut len_bytes = [0u8; size_of::()]; + file.read_exact(&mut len_bytes).map_err(|e| { + DbError::SnapshotError(format!("Couldn't read length from metadata file: {}", e)) + })?; + + let len = usize::from_le_bytes(len_bytes); + let mut bytes = vec![0u8; len]; + file.read_exact(&mut bytes).map_err(|e| { + DbError::SnapshotError(format!("Couldn't read bytes from metadata file: {}", e)) + })?; + + Ok((magic, bytes)) +} diff --git a/crates/storage/Cargo.toml b/crates/storage/Cargo.toml index b04c0ae..9fe9322 100644 --- a/crates/storage/Cargo.toml +++ b/crates/storage/Cargo.toml @@ -9,7 +9,10 @@ license.workspace = true [dependencies] bincode.workspace = true defs.workspace = true +flate2 = "1.1.5" rocksdb.workspace = true +serde.workspace = true snafu.workspace = true +tar = "0.4.44" tempfile.workspace = true uuid.workspace = true diff --git a/crates/storage/src/checkpoint.rs b/crates/storage/src/checkpoint.rs new file mode 100644 index 0000000..09827fd --- /dev/null +++ b/crates/storage/src/checkpoint.rs @@ -0,0 +1,51 @@ +use crate::StorageType; +use crate::in_memory::INMEMORY_CHECKPOINT_FILENAME_MARKER; +use crate::rocks_db::ROCKSDB_CHECKPOINT_FILENAME_MARKER; +use defs::DbError; +use std::path::{Path, PathBuf}; + +impl StorageType { + #[inline] + pub fn checkpoint_filename_marker(&self) -> &str { + match self { + StorageType::InMemory => INMEMORY_CHECKPOINT_FILENAME_MARKER, + StorageType::RocksDb => ROCKSDB_CHECKPOINT_FILENAME_MARKER, + } + } +} + +pub struct StorageCheckpoint { + pub path: PathBuf, + pub storage_type: StorageType, +} + +impl StorageCheckpoint { + pub fn open(path: &Path) -> Result { + let filename = path + .file_name() + .ok_or_else(|| DbError::StorageCheckpointError("Invalid filename".to_string()))? + .to_str() + .ok_or_else(|| { + DbError::StorageCheckpointError("Invalid UTF-8 in filename".to_string()) + })? + .to_owned(); + let marker = filename + .split_once("-") + .ok_or_else(|| DbError::StorageCheckpointError("Invalid filename".to_string()))? + .0; + + let storage_type = match marker { + ROCKSDB_CHECKPOINT_FILENAME_MARKER => StorageType::RocksDb, + _ => { + return Err(DbError::StorageCheckpointError( + "Invalid storage type".to_string(), + )); + } + }; + + Ok(StorageCheckpoint { + path: path.to_path_buf(), + storage_type, + }) + } +} diff --git a/crates/storage/src/error.rs b/crates/storage/src/error.rs index ba1772b..d54f67e 100644 --- a/crates/storage/src/error.rs +++ b/crates/storage/src/error.rs @@ -10,6 +10,21 @@ pub enum StorageError { source: rocksdb::Error, }, + #[snafu(display("Failed to intialize rocksdb"))] + RocksDbInitialization {}, + + #[snafu(display("Storage checkpoint error: {}", msg))] + RocksDbCheckpointMsg { msg: String }, + + #[snafu(display("{} : {}", msg, source))] + RocksDbCheckpointIo { msg: String, source: std::io::Error }, + + #[snafu(display("Failed to open rocksdb checkpoint: {source}"))] + RocksDbCheckpoint { source: rocksdb::Error }, + + #[snafu(display("Failed to flush database: {source}"))] + RocksDbFlush { source: rocksdb::Error }, + #[snafu(display("Failed to read point {id} from storage: {source}"))] RocksDbRead { id: PointId, source: rocksdb::Error }, diff --git a/crates/storage/src/in_memory.rs b/crates/storage/src/in_memory.rs index 1f3fd36..096d9ed 100644 --- a/crates/storage/src/in_memory.rs +++ b/crates/storage/src/in_memory.rs @@ -1,6 +1,10 @@ +use crate::StorageType; use crate::error::StorageError; -use crate::{StorageEngine, VectorPage}; +use crate::{StorageEngine, VectorPage, checkpoint::StorageCheckpoint}; use defs::{DenseVector, Payload, PointId}; +use std::path::{Path, PathBuf}; + +pub const INMEMORY_CHECKPOINT_FILENAME_MARKER: &str = "inmemory"; pub struct MemoryStorage { // define here how MemoryStorage will be defined @@ -46,4 +50,15 @@ impl StorageEngine for MemoryStorage { ) -> Result, StorageError> { Ok(None) } + + fn checkpoint_at(&self, _path: &Path) -> Result { + Ok(StorageCheckpoint { + path: PathBuf::default(), + storage_type: StorageType::InMemory, + }) + } + + fn restore_checkpoint(&mut self, _checkpoint: &StorageCheckpoint) -> Result<(), StorageError> { + Ok(()) + } } diff --git a/crates/storage/src/lib.rs b/crates/storage/src/lib.rs index 8419687..31f8170 100644 --- a/crates/storage/src/lib.rs +++ b/crates/storage/src/lib.rs @@ -1,8 +1,9 @@ +use crate::rocks_db::RocksDbStorage; use defs::{DenseVector, Payload, PointId}; -use std::path::PathBuf; +use serde::{Deserialize, Serialize}; +use std::path::{Path, PathBuf}; use std::sync::Arc; - -use crate::rocks_db::RocksDbStorage; +pub mod checkpoint; pub type VectorPage = (Vec<(PointId, DenseVector)>, PointId); @@ -24,9 +25,12 @@ pub trait StorageEngine: Send + Sync { fn delete_point(&self, id: PointId) -> Result<()>; fn contains_point(&self, id: PointId) -> Result; fn list_vectors(&self, offset: PointId, limit: usize) -> Result>; + + fn checkpoint_at(&self, path: &Path) -> Result; + fn restore_checkpoint(&mut self, checkpoint: &checkpoint::StorageCheckpoint) -> Result<()>; } -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)] pub enum StorageType { InMemory, RocksDb, diff --git a/crates/storage/src/rocks_db.rs b/crates/storage/src/rocks_db.rs index 4c68290..6d7e4db 100644 --- a/crates/storage/src/rocks_db.rs +++ b/crates/storage/src/rocks_db.rs @@ -1,23 +1,47 @@ // Rewrite needed -use crate::error::{self, StorageError}; -use crate::{StorageEngine, VectorPage}; +use crate::checkpoint::StorageCheckpoint; +use crate::error::{ + self, RocksDbCheckpointIoSnafu, RocksDbCheckpointMsgSnafu, RocksDbCheckpointSnafu, + RocksDbFlushSnafu, RocksDbInitializationSnafu, StorageError, +}; +use crate::{StorageEngine, StorageType, VectorPage}; use bincode::{deserialize, serialize}; use defs::{DenseVector, Payload, Point, PointId}; -use rocksdb::{DB, Options}; -use snafu::ResultExt; -use std::path::PathBuf; +use flate2::{Compression, read::GzDecoder, write::GzEncoder}; +use rocksdb::{DB, Error, Options}; +use snafu::{OptionExt, ResultExt}; +use std::fs::File; +use std::path::{Path, PathBuf}; +use tar::{Archive, Builder}; +use tempfile::tempdir; //TODO: Implement RocksDbStorage with necessary fields and implementations //TODO: Optimize the basic design pub struct RocksDbStorage { pub path: PathBuf, - pub db: DB, + pub db: Option, } +pub enum RocksDBStorageError { + RocksDBError(Error), +} + +pub const ROCKSDB_CHECKPOINT_FILENAME_MARKER: &str = "rocksdb"; + impl RocksDbStorage { // Creates new db or switches to existing db pub fn new(path: impl Into) -> Result { + let converted_path = path.into(); + let db = Self::initialize_db(&converted_path)?; + + Ok(RocksDbStorage { + path: converted_path, + db: Some(db), + }) + } + + fn initialize_db(path: &Path) -> Result { // Initialize a db at the given location let mut options = Options::default(); @@ -27,16 +51,11 @@ impl RocksDbStorage { options.create_if_missing(true); - let converted_path = path.into(); - let path_str = converted_path.display().to_string(); - - let db = DB::open(&options, converted_path.clone()) - .context(error::RocksDbOpenSnafu { path: path_str })?; - - Ok(RocksDbStorage { - path: converted_path, - db, - }) + let db = DB::open(&options, path).map_err(|e| StorageError::RocksDbOpen { + path: path.to_string_lossy().into_owned(), + source: e, + })?; + Ok(db) } pub fn get_current_path(&self) -> PathBuf { @@ -60,6 +79,8 @@ impl StorageEngine for RocksDbStorage { let value = serialize(&point).context(error::SerializationSnafu { id })?; self.db + .as_ref() + .context(error::RocksDbInitializationSnafu {})? .put(key.as_bytes(), value.as_slice()) .context(error::RocksDbWriteSnafu { id })?; @@ -69,9 +90,16 @@ impl StorageEngine for RocksDbStorage { fn contains_point(&self, id: PointId) -> Result { // Efficient lookup inspired from https://github.com/facebook/rocksdb/issues/11586#issuecomment-1890429488 let key = id.to_string(); - if self.db.key_may_exist(key.clone()) { + if self + .db + .as_ref() + .context(error::RocksDbInitializationSnafu {})? + .key_may_exist(key.clone()) + { let key_exist = self .db + .as_ref() + .context(error::RocksDbInitializationSnafu {})? .get(key) .context(error::RocksDbReadSnafu { id })? .is_some(); @@ -84,6 +112,8 @@ impl StorageEngine for RocksDbStorage { fn delete_point(&self, id: PointId) -> Result<(), StorageError> { let key = id.to_string(); self.db + .as_ref() + .context(error::RocksDbInitializationSnafu {})? .delete(key) .context(error::RocksDbDeleteSnafu { id })?; @@ -92,7 +122,12 @@ impl StorageEngine for RocksDbStorage { fn get_payload(&self, id: PointId) -> Result, StorageError> { let key = id.to_string(); - let Some(value_serialized) = self.db.get(key).context(error::RocksDbReadSnafu { id })? + let Some(value_serialized) = self + .db + .as_ref() + .ok_or(StorageError::RocksDbInitialization {})? + .get(key) + .context(error::RocksDbReadSnafu { id })? else { return Ok(None); // This should not return error but rather give None }; @@ -105,7 +140,12 @@ impl StorageEngine for RocksDbStorage { fn get_vector(&self, id: PointId) -> Result, StorageError> { let key = id.to_string(); - let Some(value_serialized) = self.db.get(key).context(error::RocksDbReadSnafu { id })? + let Some(value_serialized) = self + .db + .as_ref() + .ok_or(StorageError::RocksDbInitialization {})? + .get(key) + .context(error::RocksDbReadSnafu { id })? else { return Ok(None); // This should not return error but rather give None }; @@ -126,10 +166,14 @@ impl StorageEngine for RocksDbStorage { } let mut result = Vec::with_capacity(limit); - let iter = self.db.iterator(rocksdb::IteratorMode::From( - offset.to_string().as_bytes(), - rocksdb::Direction::Forward, - )); + let iter = self + .db + .as_ref() + .context(error::RocksDbInitializationSnafu {})? + .iterator(rocksdb::IteratorMode::From( + offset.to_string().as_bytes(), + rocksdb::Direction::Forward, + )); let mut last_id = offset; for item in iter { @@ -151,6 +195,128 @@ impl StorageEngine for RocksDbStorage { } Ok(Some((result, last_id))) } + + fn checkpoint_at(&self, path: &Path) -> Result { + // flush db first for durability + self.db + .as_ref() + .ok_or(StorageError::RocksDbInitialization {})? + .flush() + .context(RocksDbFlushSnafu)?; + + // filename is rocksdb-{uuid}.tar.gz + let checkpoint_filename = format!( + "{}-{}.tar.gz", + ROCKSDB_CHECKPOINT_FILENAME_MARKER, + uuid::Uuid::new_v4() + ); + let checkpoint_path = path.join(checkpoint_filename); + + let temp_dir_parent = tempdir().unwrap(); + let temp_dir = temp_dir_parent.path().join("checkpoint"); + + let db_ref = self + .db + .as_ref() + .ok_or(StorageError::RocksDbInitialization {})?; + + let checkpoint = + rocksdb::checkpoint::Checkpoint::new(db_ref).context(RocksDbCheckpointSnafu)?; + checkpoint + .create_checkpoint(temp_dir.clone()) + .context(RocksDbCheckpointSnafu)?; + + // compress the checkpoint into an archive + let tar_gz = + File::create(checkpoint_path.clone()).with_context(|e| RocksDbCheckpointIoSnafu { + msg: format!("Couldn't create tar.gz archive: {}", e), + })?; + let enc = GzEncoder::new(tar_gz, Compression::default()); + let mut archive = Builder::new(enc); + + archive + .append_dir_all("", temp_dir) + .with_context(|e| RocksDbCheckpointIoSnafu { + msg: format!("Couldn't append directory to archive: {}", e), + })?; + + let enc = archive + .into_inner() + .with_context(|e| RocksDbCheckpointIoSnafu { + msg: format!("Couldn't compress tar.gz archive: {}", e), + })?; + + enc.finish().with_context(|e| RocksDbCheckpointIoSnafu { + msg: format!("Couldn't compress tar.gz archive: {}", e), + })?; + + Ok(StorageCheckpoint { + path: checkpoint_path, + storage_type: crate::StorageType::RocksDb, + }) + } + + fn restore_checkpoint(&mut self, checkpoint: &StorageCheckpoint) -> Result<(), StorageError> { + // enforce storage type + if checkpoint.storage_type != StorageType::RocksDb { + return Err(StorageError::RocksDbCheckpointMsg { + msg: "Invalid storage type".to_string(), + }); + } + // enforce filename marker - should have been enforced during StoraegCheckpoint::open anyway + let checkpoint_filename = checkpoint + .path + .file_name() + .ok_or_else(|| StorageError::RocksDbCheckpointMsg { + msg: "Could not read checkpoint filename".to_string(), + })? + .to_str() + .ok_or_else(|| StorageError::RocksDbCheckpointMsg { + msg: "Checkpoint filename is not valid UTF-8".to_string(), + })?; + if !checkpoint_filename.ends_with(".tar.gz") + || !checkpoint_filename.starts_with(ROCKSDB_CHECKPOINT_FILENAME_MARKER) + { + return RocksDbCheckpointMsgSnafu { + msg: "Invalid file name".to_string(), + } + .fail(); + } + + let tar_gz = File::open(&checkpoint.path).with_context(|e| RocksDbCheckpointIoSnafu { + msg: format!("Couldn't open checkpoint file: {}", e), + })?; + let tar = GzDecoder::new(tar_gz); + let mut archive = Archive::new(tar); + + // remove existing stuff in data path + self.db + .as_ref() + .context(RocksDbInitializationSnafu)? + .cancel_all_background_work(true); + // drop db early + self.db = None; + + std::fs::remove_dir_all(&self.path).with_context(|e| RocksDbCheckpointIoSnafu { + msg: format!("Couldn't remove existing data: {}", e), + })?; + + // create new data path + std::fs::create_dir_all(&self.path).with_context(|e| RocksDbCheckpointIoSnafu { + msg: format!("Couldn't create data path: {}", e), + })?; + + archive + .unpack(&self.path) + .with_context(|e| RocksDbCheckpointIoSnafu { + msg: format!("Couldn't unpack tar.gz archive: {}", e), + })?; + + // reinitialize db + self.db = Some(Self::initialize_db(&self.path)?); + + Ok(()) + } } #[cfg(test)] @@ -171,7 +337,7 @@ mod tests { #[test] fn test_new_rocksdb_storage() { let (db, temp_dir) = create_test_db(); - assert_eq!(db.get_current_path(), PathBuf::from(temp_dir.path())); + assert_eq!(db.get_current_path(), temp_dir.path()); } #[test] @@ -282,4 +448,37 @@ mod tests { println!("Debug format: {}", debug_string); } } + + #[test] + fn test_create_and_load_checkpoint() { + let (mut db, temp_dir) = create_test_db(); + + let id1 = Uuid::new_v4(); + let id2 = Uuid::new_v4(); + + let vector = Some(vec![0.1, 0.2, 0.3]); + let payload = Some(Payload { + content_type: ContentType::Text, + content: "Test".to_string(), + }); + + assert!( + db.insert_point(id1, vector.clone(), payload.clone()) + .is_ok() + ); + + let checkpoint = db + .checkpoint_at(temp_dir.path()) + .expect("Failed to create checkpoint"); + + assert!( + db.insert_point(id2, vector.clone(), payload.clone()) + .is_ok() + ); + + db.restore_checkpoint(&checkpoint).unwrap(); + + assert!(db.contains_point(id1).unwrap()); + assert!(!db.contains_point(id2).unwrap()); + } } diff --git a/crates/tui/src/app/events.rs b/crates/tui/src/app/events.rs index e8f373c..b2736cb 100644 --- a/crates/tui/src/app/events.rs +++ b/crates/tui/src/app/events.rs @@ -121,11 +121,9 @@ fn handle_modal_navigation(app: &mut App, key: KeyEvent) -> io::Result<()> { Some(ModalType::DatabaseList) | Some(ModalType::DeleteDatabase) => { app.select_previous(); } - Some(ModalType::ListVectors) => { - if app.modal.selected_index() > 0 { - app.modal.select_previous(); - app.vector_list_selected_index = app.modal.selected_index(); - } + Some(ModalType::ListVectors) if app.modal.selected_index() > 0 => { + app.modal.select_previous(); + app.vector_list_selected_index = app.modal.selected_index(); } Some(ModalType::ConfirmDeleteDatabase) => { app.modal.set_selected_index(0, 2);