diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 42d72a81..9f044761 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ name: CI merge_group: env: - toolchain: nightly-2024-11-14 + toolchain: stable CARGO_HTTP_MULTIPLEXING: false CARGO_TERM_COLOR: always CARGO_UNSTABLE_SPARSE_REGISTRY: true diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index ee37ddd9..0e8d5609 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -1,14 +1,7 @@ name: Coverage -on: [push, pull_request] - -env: - toolchain: nightly-2024-11-14 - CARGO_HTTP_MULTIPLEXING: false - CARGO_TERM_COLOR: always - CARGO_UNSTABLE_SPARSE_REGISTRY: true - CARGO_INCREMENTAL: 0 - TERM: unknown +on: + pull_request: concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -17,6 +10,9 @@ concurrency: jobs: coverage: runs-on: ubuntu-22.04 + env: + toolchain: stable + CARGO_TERM_COLOR: always services: postgres: image: postgres:13 @@ -33,6 +29,11 @@ jobs: --health-retries 5 steps: - uses: actions/checkout@v4 + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + toolchain: ${{ env.toolchain }} + components: clippy - name: Install system dependencies run: | @@ -40,13 +41,14 @@ jobs: sudo apt-get install -y pkg-config libtss2-dev - name: Install cargo-llvm-cov - run: cargo install cargo-llvm-cov + uses: taiki-e/install-action@cargo-llvm-cov - name: Generate coverage - run: cargo llvm-cov --all-features --workspace --lcov --output-path lcov.info + run: cargo llvm-cov --all-features --workspace --exclude atoma-p2p-tester --codecov --output-path codecov.json - name: Upload to Codecov uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} - files: lcov.info + files: codecov.json + fail_ci_if_error: true diff --git a/.github/workflows/deny.yml b/.github/workflows/deny.yml index 5c716018..8709a570 100644 --- a/.github/workflows/deny.yml +++ b/.github/workflows/deny.yml @@ -1,5 +1,6 @@ name: CI -on: [push, pull_request] +on: + pull_request: jobs: cargo-deny: runs-on: ubuntu-22.04 diff --git a/Cargo.lock b/Cargo.lock index 3483610f..59da0862 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,16 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "Inflector" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" +dependencies = [ + "lazy_static", + "regex", +] + [[package]] name = "addchain" version = "0.2.0" @@ -105,6 +115,30 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "allocative" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fac2ce611db8b8cee9b2aa886ca03c924e9da5e5295d0dbd0526e5d0b0710f7" +dependencies = [ + "allocative_derive", + "bumpalo", + "ctor", + "hashbrown 0.14.5", + "num-bigint 0.4.6", +] + +[[package]] +name = "allocative_derive" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe233a377643e0fc1a56421d7c90acdec45c291b30345eb9f08e8d0ddce5a4ab" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + [[package]] name = "allocator-api2" version = "0.2.21" @@ -179,6 +213,15 @@ dependencies = [ "uuid", ] +[[package]] +name = "annotate-snippets" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccaf7e9dfbb6ab22c82e473cd1a8a7bd313c19a5b7e40970f3d89ef5a5c9e81e" +dependencies = [ + "unicode-width 0.1.14", +] + [[package]] name = "anstream" version = "0.6.18" @@ -220,12 +263,12 @@ dependencies = [ [[package]] name = "anstyle-wincon" -version = "3.0.7" +version = "3.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +checksum = "6680de5231bd6ee4c6191b8a1325daa282b415391ec9d3a37bd34f2060dc73fa" dependencies = [ "anstyle", - "once_cell", + "once_cell_polyfill", "windows-sys 0.59.0", ] @@ -263,15 +306,6 @@ dependencies = [ "derive_arbitrary", ] -[[package]] -name = "arc-swap" -version = "1.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" -dependencies = [ - "serde", -] - [[package]] name = "ark-bn254" version = "0.4.0" @@ -474,6 +508,15 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "ascii-canvas" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6" +dependencies = [ + "term", +] + [[package]] name = "asn1-rs" version = "0.6.2" @@ -577,9 +620,9 @@ dependencies = [ [[package]] name = "async-io" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a2b323ccce0a1d90b449fd71f2a06ca7faa7c54c2751f06c9bd851fc061059" +checksum = "1237c0ae75a0f3765f58910ff9cdd0a12eeb39ab2f4c7de23262f337f0aacbb3" dependencies = [ "async-lock", "cfg-if", @@ -588,7 +631,7 @@ dependencies = [ "futures-lite", "parking", "polling", - "rustix 0.38.44", + "rustix", "slab", "tracing", "windows-sys 0.59.0", @@ -1041,29 +1084,6 @@ dependencies = [ "syn 2.0.101", ] -[[package]] -name = "axum-server" -version = "0.6.1" -source = "git+https://github.com/bmwill/axum-server.git?rev=f44323e271afdd1365fd0c8b0a4c0bbdf4956cb7#f44323e271afdd1365fd0c8b0a4c0bbdf4956cb7" -dependencies = [ - "arc-swap", - "bytes", - "futures-util", - "http 1.3.1", - "http-body", - "http-body-util", - "hyper", - "hyper-util", - "pin-project-lite", - "rustls", - "rustls-pemfile", - "rustls-pki-types", - "tokio", - "tokio-rustls", - "tower 0.4.13", - "tower-service", -] - [[package]] name = "backtrace" version = "0.3.75" @@ -1137,6 +1157,12 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d86b93f97252c47b41663388e6d155714a9d0c398b99f1005cbc5f978b29f445" +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + [[package]] name = "bellpepper" version = "0.4.1" @@ -1208,15 +1234,30 @@ dependencies = [ "zeroize", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec 0.6.3", +] + [[package]] name = "bit-set" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" dependencies = [ - "bit-vec", + "bit-vec 0.8.0", ] +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bit-vec" version = "0.8.0" @@ -1246,9 +1287,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.0" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" dependencies = [ "serde", ] @@ -1480,9 +1521,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.22" +version = "1.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32db95edf998450acc7881c932f94cd9b05c87b4b2599e8bab064753da4acfd1" +checksum = "d0fc897dc1e865cc67c0e05a836d9d3f1df3cbe442aa4a9473b18e12624a4951" dependencies = [ "jobserver", "libc", @@ -1501,6 +1542,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + [[package]] name = "cfg_aliases" version = "0.2.1" @@ -1625,6 +1672,21 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +[[package]] +name = "clipboard-win" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15efe7a882b08f34e38556b14f2fb3daa98769d06c7f0c1b076dfd0d983bc892" +dependencies = [ + "error-code", +] + +[[package]] +name = "cmp_any" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9b18233253483ce2f65329a24072ec414db782531bdbb7d0bbc4bd2ce6b7e21" + [[package]] name = "codespan" version = "0.11.1" @@ -1703,7 +1765,7 @@ dependencies = [ [[package]] name = "consensus-config" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "fastcrypto 0.1.8", "mysten-network", @@ -1784,9 +1846,9 @@ dependencies = [ [[package]] name = "core-foundation" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b55271e5c8c478ad3f38ad24ef34923091e0548492a266d19b3c0b4d82574c63" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" dependencies = [ "core-foundation-sys", "libc", @@ -1955,6 +2017,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "ctor" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" +dependencies = [ + "quote", + "syn 1.0.109", +] + [[package]] name = "ctr" version = "0.9.2" @@ -2137,6 +2209,17 @@ dependencies = [ "uuid", ] +[[package]] +name = "debugserver-types" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf6834a70ed14e8e4e41882df27190bea150f1f6ecf461f1033f8739cd8af4a" +dependencies = [ + "schemafy", + "serde", + "serde_json", +] + [[package]] name = "der" version = "0.6.1" @@ -2289,12 +2372,19 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" dependencies = [ + "convert_case 0.6.0", "proc-macro2", "quote", "syn 2.0.101", "unicode-xid", ] +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + [[package]] name = "digest" version = "0.9.0" @@ -2378,6 +2468,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "display_container" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a110a75c96bedec8e65823dea00a1d710288b7a369d95fd8a0f5127639466fa" +dependencies = [ + "either", + "indenter", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -2416,6 +2516,26 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" +[[package]] +name = "dupe" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed2bc011db9c93fbc2b6cdb341a53737a55bafb46dbb74cf6764fc33a2fbf9c" +dependencies = [ + "dupe_derive", +] + +[[package]] +name = "dupe_derive" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83e195b4945e88836d826124af44fdcb262ec01ef94d44f14f4fb5103f19892a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.101", +] + [[package]] name = "dyn-clone" version = "1.0.19" @@ -2537,6 +2657,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "ena" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d248bdd43ce613d87415282f69b9bb99d947d290b10962dd6c56233312c2ad5" +dependencies = [ + "log", +] + [[package]] name = "encode_unicode" version = "1.0.0" @@ -2552,6 +2681,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + [[package]] name = "enum-as-inner" version = "0.6.1" @@ -2567,7 +2702,7 @@ dependencies = [ [[package]] name = "enum-compat-util" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "serde_yaml 0.8.26", ] @@ -2599,16 +2734,31 @@ dependencies = [ "typeid", ] +[[package]] +name = "erased-serde" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c138974f9d5e7fe373eb04df7cae98833802ae4b11c24ac7039a21d5af4b26c" +dependencies = [ + "serde", +] + [[package]] name = "errno" -version = "0.3.11" +version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e" +checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" dependencies = [ "libc", "windows-sys 0.59.0", ] +[[package]] +name = "error-code" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" + [[package]] name = "esaxx-rs" version = "0.1.10" @@ -2631,9 +2781,9 @@ dependencies = [ [[package]] name = "ethnum" -version = "1.5.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0939f82868b77ef93ce3c3c3daf2b3c526b456741da5a1a4559e590965b6026b" +checksum = "ca81e6b4777c89fd810c25a4be2b1bd93ea034fbe58e6a75216a34c6b82c539b" [[package]] name = "event-listener" @@ -2848,6 +2998,17 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "fd-lock" +version = "4.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" +dependencies = [ + "cfg-if", + "rustix", + "windows-sys 0.59.0", +] + [[package]] name = "ff" version = "0.12.1" @@ -2922,6 +3083,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "flate2" version = "1.1.1" @@ -3131,17 +3298,27 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generator" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc6bd114ceda131d3b1d665eba35788690ad37f5916457286b32ab6fd3c438dd" +checksum = "d18470a76cb7f8ff746cf1f7470914f900252ec36bbc40b569d74b1258446827" dependencies = [ + "cc", "cfg-if", "libc", "log", "rustversion", - "windows 0.58.0", + "windows 0.61.1", ] [[package]] @@ -3365,15 +3542,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - -[[package]] -name = "hermit-abi" -version = "0.4.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" +checksum = "f154ce46856750ed433c8649605bf7ed2de3bc35fd9d2a9f30cddd873c80cb08" [[package]] name = "hex" @@ -3596,8 +3767,8 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.5" -source = "git+https://github.com/rustls/hyper-rustls?branch=main#8f9728a8cb9b5c52b0ef23169dd968e98b8ef42a" +version = "0.27.6" +source = "git+https://github.com/rustls/hyper-rustls?branch=main#e6a23710aa02b81ccf03d54801df8faace53eb68" dependencies = [ "http 1.3.1", "hyper", @@ -3609,7 +3780,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", - "webpki-roots 0.26.11", + "webpki-roots 1.0.0", ] [[package]] @@ -3643,22 +3814,28 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.11" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497bbc33a26fdd4af9ed9c70d63f61cf56a938375fbb32df34db9b1cd6d643f2" +checksum = "b1c293b6b3d21eca78250dc7dbebd6b9210ec5530e038cbfe0661b5c47ab06e8" dependencies = [ + "base64 0.22.1", "bytes", "futures-channel", + "futures-core", "futures-util", "http 1.3.1", "http-body", "hyper", + "ipnet", "libc", + "percent-encoding", "pin-project-lite", "socket2", + "system-configuration", "tokio", "tower-service", "tracing", + "windows-registry", ] [[package]] @@ -3673,7 +3850,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.61.0", + "windows-core 0.61.2", ] [[package]] @@ -3734,9 +3911,9 @@ checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" [[package]] name = "icu_properties" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2549ca8c7241c82f59c80ba2a6f415d931c5b58d24fb8412caa1a1f02c49139a" +checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" dependencies = [ "displaydoc", "icu_collections", @@ -3750,9 +3927,9 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8197e866e47b68f8f7d95249e172903bec06004b18b2937f1095d40a0c57de04" +checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" [[package]] name = "icu_provider" @@ -3964,6 +4141,15 @@ dependencies = [ "similar", ] +[[package]] +name = "inventory" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab08d7cd2c5897f2c949e5383ea7c7db03fb19130ffcfbf7eda795137ae3cb83" +dependencies = [ + "rustversion", +] + [[package]] name = "ipconfig" version = "0.3.2" @@ -3992,6 +4178,17 @@ dependencies = [ "serde", ] +[[package]] +name = "is-terminal" +version = "0.4.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.59.0", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -4313,6 +4510,37 @@ dependencies = [ "cpufeatures", ] +[[package]] +name = "lalrpop" +version = "0.19.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a1cbf952127589f2851ab2046af368fd20645491bb4b376f04b7f94d7a9837b" +dependencies = [ + "ascii-canvas", + "bit-set 0.5.3", + "diff", + "ena", + "is-terminal", + "itertools 0.10.5", + "lalrpop-util", + "petgraph 0.6.5", + "regex", + "regex-syntax 0.6.29", + "string_cache", + "term", + "tiny-keccak", + "unicode-xid", +] + +[[package]] +name = "lalrpop-util" +version = "0.19.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3c48237b9604c5a4702de6b824e02006c3214327564636aef27c1028a8fa0ed" +dependencies = [ + "regex", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -4322,6 +4550,15 @@ dependencies = [ "spin", ] +[[package]] +name = "lcov" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ccfa6d5e585a884db65b37f38184e4364eaf74d884ac35d0a90fe9baf80b723" +dependencies = [ + "thiserror 1.0.69", +] + [[package]] name = "leb128" version = "0.2.5" @@ -4336,9 +4573,9 @@ checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "libloading" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a793df0d7afeac54f95b471d3af7f0d4fb975699f972341a4b76988d49cdf0c" +checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" dependencies = [ "cfg-if", "windows-targets 0.53.0", @@ -4769,7 +5006,7 @@ dependencies = [ "thiserror 2.0.12", "tracing", "yamux 0.12.1", - "yamux 0.13.4", + "yamux 0.13.5", ] [[package]] @@ -4778,7 +5015,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "libc", ] @@ -4809,30 +5046,24 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" [[package]] name = "linkme" -version = "0.3.32" +version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22d227772b5999ddc0690e733f734f95ca05387e329c4084fe65678c51198ffe" +checksum = "a1b1703c00b2a6a70738920544aa51652532cacddfec2e162d2e29eae01e665c" dependencies = [ "linkme-impl", ] [[package]] name = "linkme-impl" -version = "0.3.32" +version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71a98813fa0073a317ed6a8055dcd4722a49d9b862af828ee68449adb799b6be" +checksum = "04d55ca5d5a14363da83bf3c33874b8feaa34653e760d5216d7ef9829c88001a" dependencies = [ "proc-macro2", "quote", "syn 2.0.101", ] -[[package]] -name = "linux-raw-sys" -version = "0.4.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" - [[package]] name = "linux-raw-sys" version = "0.9.4" @@ -4847,9 +5078,9 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" [[package]] name = "lock_api" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" dependencies = [ "autocfg", "scopeguard", @@ -4865,13 +5096,36 @@ dependencies = [ ] [[package]] -name = "loki-api" -version = "0.1.3" +name = "logos" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdc38a304f59a03e6efa3876766a48c70a766a93f88341c3fff4212834b8e327" +checksum = "bf8b031682c67a8e3d5446840f9573eb7fe26efe7ec8d195c9ac4c0647c502f1" dependencies = [ - "prost", - "prost-types", + "logos-derive", +] + +[[package]] +name = "logos-derive" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d849148dbaf9661a6151d1ca82b13bb4c4c128146a88d05253b38d4e2f496c" +dependencies = [ + "beef", + "fnv", + "proc-macro2", + "quote", + "regex-syntax 0.6.29", + "syn 1.0.109", +] + +[[package]] +name = "loki-api" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdc38a304f59a03e6efa3876766a48c70a766a93f88341c3fff4212834b8e327" +dependencies = [ + "prost", + "prost-types", ] [[package]] @@ -4911,6 +5165,19 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "lsp-types" +version = "0.94.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66bfd44a06ae10647fe3f8214762e9369fd4248df1350924b4ef9e770a85ea1" +dependencies = [ + "bitflags 1.3.2", + "serde", + "serde_json", + "serde_repr", + "url", +] + [[package]] name = "lsp-types" version = "0.95.1" @@ -4926,9 +5193,9 @@ dependencies = [ [[package]] name = "macro_rules_attribute" -version = "0.2.0" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a82271f7bc033d84bbca59a3ce3e4159938cb08a9c3aebbe54d215131518a13" +checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520" dependencies = [ "macro_rules_attribute-proc_macro", "paste", @@ -4936,9 +5203,15 @@ dependencies = [ [[package]] name = "macro_rules_attribute-proc_macro" -version = "0.2.0" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" + +[[package]] +name = "maplit" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" [[package]] name = "matchers" @@ -4983,6 +5256,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.17" @@ -5016,13 +5298,13 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ "libc", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -5068,7 +5350,7 @@ dependencies = [ [[package]] name = "move-abstract-interpreter" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "move-binary-format", "move-bytecode-verifier-meter", @@ -5077,12 +5359,12 @@ dependencies = [ [[package]] name = "move-abstract-stack" version = "0.0.1" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" [[package]] name = "move-binary-format" version = "0.0.3" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "enum-compat-util", @@ -5097,12 +5379,12 @@ dependencies = [ [[package]] name = "move-borrow-graph" version = "0.0.1" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" [[package]] name = "move-bytecode-source-map" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "bcs", @@ -5118,20 +5400,20 @@ dependencies = [ [[package]] name = "move-bytecode-utils" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "indexmap 2.9.0", "move-binary-format", "move-core-types", - "petgraph", + "petgraph 0.5.1", "serde-reflection", ] [[package]] name = "move-bytecode-verifier" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "move-abstract-interpreter", "move-abstract-stack", @@ -5140,13 +5422,13 @@ dependencies = [ "move-bytecode-verifier-meter", "move-core-types", "move-vm-config", - "petgraph", + "petgraph 0.5.1", ] [[package]] name = "move-bytecode-verifier-meter" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "move-binary-format", "move-core-types", @@ -5156,7 +5438,7 @@ dependencies = [ [[package]] name = "move-command-line-common" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "bcs", @@ -5177,7 +5459,7 @@ dependencies = [ [[package]] name = "move-compiler" version = "0.0.1" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "bcs", @@ -5186,7 +5468,7 @@ dependencies = [ "dunce", "hex", "insta", - "lsp-types", + "lsp-types 0.95.1", "move-binary-format", "move-borrow-graph", "move-bytecode-source-map", @@ -5198,7 +5480,7 @@ dependencies = [ "move-proc-macros", "move-symbol-pool", "once_cell", - "petgraph", + "petgraph 0.5.1", "rayon", "regex", "serde", @@ -5212,7 +5494,7 @@ dependencies = [ [[package]] name = "move-core-types" version = "0.0.4" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "bcs", @@ -5236,7 +5518,7 @@ dependencies = [ [[package]] name = "move-coverage" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "bcs", @@ -5244,20 +5526,23 @@ dependencies = [ "codespan", "colored", "indexmap 2.9.0", + "lcov", "move-abstract-interpreter", "move-binary-format", "move-bytecode-source-map", "move-command-line-common", + "move-compiler", "move-core-types", "move-ir-types", - "petgraph", + "move-trace-format", + "petgraph 0.5.1", "serde", ] [[package]] name = "move-disassembler" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "bcs", @@ -5278,7 +5563,7 @@ dependencies = [ [[package]] name = "move-ir-to-bytecode" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "codespan-reporting", @@ -5296,7 +5581,7 @@ dependencies = [ [[package]] name = "move-ir-to-bytecode-syntax" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "hex", @@ -5309,7 +5594,7 @@ dependencies = [ [[package]] name = "move-ir-types" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "hex", "move-command-line-common", @@ -5322,7 +5607,7 @@ dependencies = [ [[package]] name = "move-proc-macros" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "enum-compat-util", "quote", @@ -5332,17 +5617,29 @@ dependencies = [ [[package]] name = "move-symbol-pool" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "once_cell", "phf", "serde", ] +[[package]] +name = "move-trace-format" +version = "0.0.1" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" +dependencies = [ + "move-binary-format", + "move-core-types", + "serde", + "serde_json", + "zstd", +] + [[package]] name = "move-vm-config" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "move-binary-format", "once_cell", @@ -5351,7 +5648,7 @@ dependencies = [ [[package]] name = "move-vm-profiler" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "move-vm-config", "once_cell", @@ -5363,7 +5660,7 @@ dependencies = [ [[package]] name = "move-vm-test-utils" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "move-binary-format", @@ -5377,7 +5674,7 @@ dependencies = [ [[package]] name = "move-vm-types" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "bcs", "move-binary-format", @@ -5499,7 +5796,7 @@ dependencies = [ [[package]] name = "mysten-common" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "antithesis_sdk", "anyhow", @@ -5509,13 +5806,10 @@ dependencies = [ "mysten-metrics", "once_cell", "parking_lot", - "prometheus", "rand 0.8.5", "reqwest", "snap", "sui-macros", - "sui-tls", - "sui-types", "tempfile", "tokio", "tracing", @@ -5524,7 +5818,7 @@ dependencies = [ [[package]] name = "mysten-metrics" version = "0.7.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "async-trait", "axum 0.8.4", @@ -5545,7 +5839,7 @@ dependencies = [ [[package]] name = "mysten-network" version = "0.2.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anemo", "anemo-tower", @@ -5684,6 +5978,21 @@ dependencies = [ "tokio", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + [[package]] name = "nix" version = "0.26.4" @@ -5695,6 +6004,18 @@ dependencies = [ "libc", ] +[[package]] +name = "nix" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" +dependencies = [ + "bitflags 2.9.1", + "cfg-if", + "cfg_aliases 0.1.1", + "libc", +] + [[package]] name = "no-std-compat" version = "0.4.1" @@ -5867,11 +6188,11 @@ dependencies = [ [[package]] name = "num_cpus" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" dependencies = [ - "hermit-abi 0.3.9", + "hermit-abi", "libc", ] @@ -5916,7 +6237,7 @@ name = "nvml-wrapper" version = "0.10.0" source = "git+https://github.com/atoma-network/nvml-wrapper.git?branch=main#0d416436404473bc11795dacc1c0c5a995d9aa09" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "libloading", "nvml-wrapper-sys", "static_assertions", @@ -5997,13 +6318,19 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + [[package]] name = "onig" -version = "6.4.0" +version = "6.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" +checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.9.1", "libc", "once_cell", "onig_sys", @@ -6011,9 +6338,9 @@ dependencies = [ [[package]] name = "onig_sys" -version = "69.8.1" +version = "69.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7" +checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc" dependencies = [ "cc", "pkg-config", @@ -6027,11 +6354,11 @@ checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" [[package]] name = "openssl" -version = "0.10.72" +version = "0.10.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" +checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "cfg-if", "foreign-types", "libc", @@ -6059,9 +6386,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-sys" -version = "0.9.108" +version = "0.9.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e145e1651e858e820e4860f7b9c5e169bc1d8ce1c86043be79fa7b7634821847" +checksum = "90096e2e47630d78b7d1c20952dc621f957103f8bc2c8359ec81290d75238571" dependencies = [ "cc", "libc", @@ -6292,9 +6619,9 @@ checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" [[package]] name = "parking_lot" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" dependencies = [ "lock_api", "parking_lot_core", @@ -6302,9 +6629,9 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.10" +version = "0.9.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" dependencies = [ "cfg-if", "libc", @@ -6319,7 +6646,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77144664f6aac5f629d7efa815f5098a054beeeca6ccafee5ec453fd2b0c53f9" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "ciborium", "coset", "data-encoding", @@ -6458,10 +6785,20 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "467d164a6de56270bd7c4d070df81d07beace25012d5103ced4e9ff08d6afdb7" dependencies = [ - "fixedbitset", + "fixedbitset 0.2.0", "indexmap 1.9.3", ] +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset 0.4.2", + "indexmap 2.9.0", +] + [[package]] name = "phf" version = "0.11.3" @@ -6587,15 +6924,15 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "polling" -version = "3.7.4" +version = "3.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a604568c3202727d1507653cb121dbd627a58684eb09a820fd746bee38b4442f" +checksum = "b53a684391ad002dd6a596ceb6c74fd004fdce75f4be2e3f615068abbea5fd50" dependencies = [ "cfg-if", "concurrent-queue", - "hermit-abi 0.4.0", + "hermit-abi", "pin-project-lite", - "rustix 0.38.44", + "rustix", "tracing", "windows-sys 0.59.0", ] @@ -6653,6 +6990,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "primeorder" version = "0.13.6" @@ -6789,7 +7132,7 @@ dependencies = [ [[package]] name = "prometheus-closure-metric" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "prometheus", @@ -6828,9 +7171,9 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14cae93065090804185d3b75f0bf93b8eeda30c7a9b4a33d3bdb3988d6229e50" dependencies = [ - "bit-set", - "bit-vec", - "bitflags 2.9.0", + "bit-set 0.8.0", + "bit-vec 0.8.0", + "bitflags 2.9.1", "lazy_static", "num-traits", "rand 0.8.5", @@ -6963,7 +7306,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "626214629cda6781b6dc1d316ba307189c85ba657213ce642d9c77670f8202c8" dependencies = [ "bytes", - "cfg_aliases", + "cfg_aliases 0.2.1", "futures-io", "pin-project-lite", "quinn-proto", @@ -7004,7 +7347,7 @@ version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee4e529991f949c5e25755532370b8af5d114acae52326361d68d47af64aa842" dependencies = [ - "cfg_aliases", + "cfg_aliases 0.2.1", "libc", "once_cell", "socket2", @@ -7039,6 +7382,16 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + [[package]] name = "rand" version = "0.8.5" @@ -7122,7 +7475,7 @@ version = "11.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", ] [[package]] @@ -7186,7 +7539,7 @@ version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", ] [[package]] @@ -7287,9 +7640,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.15" +version = "0.12.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d19c46a6fdd48bc4dab94b6103fccc55d34c67cc0ad04653aad4ea2a07cd7bbb" +checksum = "a2f8e5513d63f2e5b386eb5106dc67eaf3f84e95258e210489136b8b92ad6119" dependencies = [ "base64 0.22.1", "bytes", @@ -7316,33 +7669,31 @@ dependencies = [ "quinn", "rustls", "rustls-native-certs", - "rustls-pemfile", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", - "system-configuration", "tokio", "tokio-native-tls", "tokio-rustls", "tokio-util", "tower 0.5.2", + "tower-http 0.6.5", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 0.26.11", - "windows-registry", + "webpki-roots 1.0.0", ] [[package]] name = "resolv-conf" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7c8f7f733062b66dc1c63f9db168ac0b97a9210e247fa90fdc9ad08f51b302" +checksum = "95325155c684b1c89f7765e30bc1c42e4a6da51ca513615660cb8a62ef9a88e3" [[package]] name = "rfc6979" @@ -7405,7 +7756,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94" dependencies = [ "base64 0.21.7", - "bitflags 2.9.0", + "bitflags 2.9.1", "serde", "serde_derive", ] @@ -7479,16 +7830,16 @@ dependencies = [ "netlink-packet-utils", "netlink-proto", "netlink-sys", - "nix", + "nix 0.26.4", "thiserror 1.0.69", "tokio", ] [[package]] name = "rust-embed" -version = "8.7.1" +version = "8.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60e425e204264b144d4c929d126d0de524b40a961686414bab5040f7465c71be" +checksum = "025908b8682a26ba8d12f6f2d66b987584a4a87bc024abc5bbc12553a8cd178a" dependencies = [ "rust-embed-impl", "rust-embed-utils", @@ -7497,9 +7848,9 @@ dependencies = [ [[package]] name = "rust-embed-impl" -version = "8.7.0" +version = "8.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bf418c9a2e3f6663ca38b8a7134cc2c2167c9d69688860e8961e3faa731702e" +checksum = "6065f1a4392b71819ec1ea1df1120673418bf386f50de1d6f54204d836d4349c" dependencies = [ "proc-macro2", "quote", @@ -7510,9 +7861,9 @@ dependencies = [ [[package]] name = "rust-embed-utils" -version = "8.7.0" +version = "8.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d55b95147fe01265d06b3955db798bdaed52e60e2211c41137701b3aba8e21" +checksum = "f6cc0c81648b20b70c491ff8cce00c1c3b223bb8ed2b5d41f0e54c6c4c0a3594" dependencies = [ "sha2 0.10.9", "walkdir", @@ -7580,29 +7931,16 @@ dependencies = [ "nom", ] -[[package]] -name = "rustix" -version = "0.38.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" -dependencies = [ - "bitflags 2.9.0", - "errno", - "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", -] - [[package]] name = "rustix" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "errno", "libc", - "linux-raw-sys 0.9.4", + "linux-raw-sys", "windows-sys 0.59.0", ] @@ -7658,7 +7996,7 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19787cda76408ec5404443dc8b31795c87cd8fec49762dc75fa727740d34acc1" dependencies = [ - "core-foundation 0.10.0", + "core-foundation 0.10.1", "core-foundation-sys", "jni", "log", @@ -7702,9 +8040,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.20" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" [[package]] name = "rusty-fork" @@ -7718,6 +8056,28 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "rustyline" +version = "14.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7803e8936da37efd9b6d4478277f4b2b9bb5cdb37a113e8d63222e58da647e63" +dependencies = [ + "bitflags 2.9.1", + "cfg-if", + "clipboard-win", + "fd-lock", + "home", + "libc", + "log", + "memchr", + "nix 0.28.0", + "radix_trie", + "unicode-segmentation", + "unicode-width 0.1.14", + "utf8parse", + "windows-sys 0.52.0", +] + [[package]] name = "rw-stream-sink" version = "0.4.0" @@ -7762,6 +8122,48 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "schemafy" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8aea5ba40287dae331f2c48b64dbc8138541f5e97ee8793caa7948c1f31d86d5" +dependencies = [ + "Inflector", + "schemafy_core", + "schemafy_lib", + "serde", + "serde_derive", + "serde_json", + "serde_repr", + "syn 1.0.109", +] + +[[package]] +name = "schemafy_core" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41781ae092f4fd52c9287efb74456aea0d3b90032d2ecad272bd14dbbcb0511b" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "schemafy_lib" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e953db32579999ca98c451d80801b6f6a7ecba6127196c5387ec0774c528befa" +dependencies = [ + "Inflector", + "proc-macro2", + "quote", + "schemafy_core", + "serde", + "serde_derive", + "serde_json", + "syn 1.0.109", +] + [[package]] name = "schemars" version = "0.8.22" @@ -7858,7 +8260,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "core-foundation 0.9.4", "core-foundation-sys", "libc", @@ -7871,8 +8273,8 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" dependencies = [ - "bitflags 2.9.0", - "core-foundation 0.10.0", + "bitflags 2.9.1", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -8267,7 +8669,7 @@ dependencies = [ [[package]] name = "shared-crypto" version = "0.0.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "bcs", "eyre", @@ -8386,18 +8788,18 @@ dependencies = [ [[package]] name = "snafu" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" +checksum = "320b01e011bf8d5d7a4a4a4be966d9160968935849c83b918827f6a435e7f627" dependencies = [ "snafu-derive", ] [[package]] name = "snafu-derive" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" +checksum = "1961e2ef424c1424204d3a5d6975f934f56b6d50ff5732382d84ebf460e147f7" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -8430,9 +8832,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.9" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" dependencies = [ "libc", "windows-sys 0.52.0", @@ -8597,7 +8999,7 @@ checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" dependencies = [ "atoi", "base64 0.22.1", - "bitflags 2.9.0", + "bitflags 2.9.1", "byteorder", "bytes", "crc", @@ -8639,7 +9041,7 @@ checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" dependencies = [ "atoi", "base64 0.22.1", - "bitflags 2.9.0", + "bitflags 2.9.1", "byteorder", "crc", "dotenvy", @@ -8711,12 +9113,114 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "starlark" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f53849859f05d9db705b221bd92eede93877fd426c1b4a3c3061403a5912a8f" +dependencies = [ + "allocative", + "anyhow", + "bumpalo", + "cmp_any", + "debugserver-types", + "derivative", + "derive_more 1.0.0", + "display_container", + "dupe", + "either", + "erased-serde", + "hashbrown 0.14.5", + "inventory", + "itertools 0.13.0", + "maplit", + "memoffset", + "num-bigint 0.4.6", + "num-traits", + "once_cell", + "paste", + "ref-cast", + "regex", + "rustyline", + "serde", + "serde_json", + "starlark_derive", + "starlark_map", + "starlark_syntax", + "static_assertions", + "strsim 0.10.0", + "textwrap", + "thiserror 1.0.69", +] + +[[package]] +name = "starlark_derive" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe58bc6c8b7980a1fe4c9f8f48200c3212db42ebfe21ae6a0336385ab53f082a" +dependencies = [ + "dupe", + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "starlark_map" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92659970f120df0cc1c0bb220b33587b7a9a90e80d4eecc5c5af5debb950173d" +dependencies = [ + "allocative", + "dupe", + "equivalent", + "fxhash", + "hashbrown 0.14.5", + "serde", +] + +[[package]] +name = "starlark_syntax" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe53b3690d776aafd7cb6b9fed62d94f83280e3b87d88e3719cc0024638461b3" +dependencies = [ + "allocative", + "annotate-snippets", + "anyhow", + "derivative", + "derive_more 1.0.0", + "dupe", + "lalrpop", + "lalrpop-util", + "logos", + "lsp-types 0.94.1", + "memchr", + "num-bigint 0.4.6", + "num-traits", + "once_cell", + "starlark_map", + "thiserror 1.0.69", +] + [[package]] name = "static_assertions" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "string_cache" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared", + "precomputed-hash", +] + [[package]] name = "stringprep" version = "0.1.5" @@ -8799,7 +9303,7 @@ checksum = "734676eb262c623cec13c3155096e08d1f8f29adce39ba17948b18dad1e54142" [[package]] name = "sui-config" version = "0.0.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anemo", "anyhow", @@ -8818,19 +9322,22 @@ dependencies = [ "rand 0.8.5", "reqwest", "serde", + "serde_json", "serde_with", "serde_yaml 0.8.26", + "starlark", "sui-keys", "sui-protocol-config", "sui-rpc-api", "sui-types", + "thiserror 1.0.69", "tracing", ] [[package]] name = "sui-enum-compat-util" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "serde_yaml 0.8.26", ] @@ -8838,7 +9345,7 @@ dependencies = [ [[package]] name = "sui-http" version = "0.0.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "bytes", "http 1.3.1", @@ -8858,7 +9365,7 @@ dependencies = [ [[package]] name = "sui-json" version = "0.0.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "bcs", @@ -8875,7 +9382,7 @@ dependencies = [ [[package]] name = "sui-json-rpc-api" version = "0.0.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "fastcrypto 0.1.8", @@ -8895,7 +9402,7 @@ dependencies = [ [[package]] name = "sui-json-rpc-types" version = "0.0.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "bcs", @@ -8927,7 +9434,7 @@ dependencies = [ [[package]] name = "sui-keys" version = "0.0.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "bip32", @@ -8946,7 +9453,7 @@ dependencies = [ [[package]] name = "sui-macros" version = "0.7.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "futures", "once_cell", @@ -8956,8 +9463,8 @@ dependencies = [ [[package]] name = "sui-open-rpc" -version = "1.47.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +version = "1.49.1" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "bcs", "schemars", @@ -8969,7 +9476,7 @@ dependencies = [ [[package]] name = "sui-open-rpc-macros" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "derive-syn-parse", "itertools 0.13.0", @@ -8982,7 +9489,7 @@ dependencies = [ [[package]] name = "sui-package-resolver" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "async-trait", "bcs", @@ -9001,7 +9508,7 @@ dependencies = [ [[package]] name = "sui-proc-macros" version = "0.7.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "msim-macros", "proc-macro2", @@ -9013,7 +9520,7 @@ dependencies = [ [[package]] name = "sui-protocol-config" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "clap", "move-vm-config", @@ -9028,7 +9535,7 @@ dependencies = [ [[package]] name = "sui-protocol-config-macros" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "proc-macro2", "quote", @@ -9038,7 +9545,7 @@ dependencies = [ [[package]] name = "sui-rpc-api" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "async-stream", @@ -9064,7 +9571,6 @@ dependencies = [ "serde_with", "sui-protocol-config", "sui-sdk-types", - "sui-transaction-builder 0.0.4", "sui-types", "tap", "thiserror 1.0.69", @@ -9081,8 +9587,8 @@ dependencies = [ [[package]] name = "sui-sdk" -version = "1.47.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +version = "1.49.1" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "async-trait", @@ -9105,7 +9611,7 @@ dependencies = [ "sui-json-rpc-api", "sui-json-rpc-types", "sui-keys", - "sui-transaction-builder 0.0.0", + "sui-transaction-builder", "sui-types", "thiserror 1.0.69", "tokio", @@ -9131,32 +9637,10 @@ dependencies = [ "winnow", ] -[[package]] -name = "sui-tls" -version = "0.0.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" -dependencies = [ - "anyhow", - "arc-swap", - "axum 0.8.4", - "axum-server", - "ed25519", - "fastcrypto 0.1.8", - "pkcs8 0.10.2", - "rcgen", - "reqwest", - "rustls", - "rustls-webpki 0.103.3", - "tokio", - "tokio-rustls", - "tower-layer", - "x509-parser 0.17.0", -] - [[package]] name = "sui-transaction-builder" version = "0.0.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anyhow", "async-trait", @@ -9170,24 +9654,10 @@ dependencies = [ "sui-types", ] -[[package]] -name = "sui-transaction-builder" -version = "0.0.4" -source = "git+https://github.com/MystenLabs/sui-rust-sdk.git?rev=83ff809bc11cbabda21b60130e1f5420170548bf#83ff809bc11cbabda21b60130e1f5420170548bf" -dependencies = [ - "base64ct", - "bcs", - "serde", - "serde_json", - "serde_with", - "sui-sdk-types", - "thiserror 2.0.12", -] - [[package]] name = "sui-types" version = "0.1.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "anemo", "anyhow", @@ -9215,6 +9685,7 @@ dependencies = [ "move-core-types", "move-vm-profiler", "move-vm-test-utils", + "mysten-common", "mysten-metrics", "mysten-network", "nonempty", @@ -9328,7 +9799,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "core-foundation 0.9.4", "system-configuration-sys", ] @@ -9388,10 +9859,21 @@ dependencies = [ "fastrand", "getrandom 0.3.3", "once_cell", - "rustix 1.0.7", + "rustix", "windows-sys 0.59.0", ] +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -9407,10 +9889,19 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45c6481c4829e4cc63825e62c49186a34538b7b2750b73b266581ffb612fb5ed" dependencies = [ - "rustix 1.0.7", + "rustix", "windows-sys 0.59.0", ] +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width 0.1.14", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -9630,7 +10121,7 @@ dependencies = [ [[package]] name = "tokio-rustls" version = "0.26.2" -source = "git+https://github.com/rustls/tokio-rustls?branch=main#8092a899759480b86544207c434b58ace3083346" +source = "git+https://github.com/rustls/tokio-rustls?branch=main#6a775e132632340d7f788cf1eba1f618d0d9e7b2" dependencies = [ "rustls", "tokio", @@ -9897,7 +10388,7 @@ checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" dependencies = [ "async-compression", "base64 0.21.7", - "bitflags 2.9.0", + "bitflags 2.9.1", "bytes", "futures-core", "futures-util", @@ -9926,10 +10417,14 @@ version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5cc2d9e086a412a451384326f521c8123a99a466b329941a9403696bff9b0da2" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "bytes", + "futures-util", "http 1.3.1", + "http-body", + "iri-string", "pin-project-lite", + "tower 0.5.2", "tower-layer", "tower-service", ] @@ -10110,7 +10605,7 @@ dependencies = [ [[package]] name = "typed-store-error" version = "0.4.0" -source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.47.0#f3e72b60708b682f1d286b8d218977c4338e39d9" +source = "git+https://github.com/mystenlabs/sui?tag=testnet-v1.49.1#3b1d6b3bd63f175b774da557f89af3619b74d783" dependencies = [ "serde", "thiserror 1.0.69", @@ -10400,13 +10895,15 @@ dependencies = [ [[package]] name = "uuid" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" +checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" dependencies = [ "getrandom 0.3.3", + "js-sys", "rand 0.9.1", "serde", + "wasm-bindgen", ] [[package]] @@ -10741,12 +11238,24 @@ dependencies = [ [[package]] name = "windows" -version = "0.58.0" +version = "0.61.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" +checksum = "c5ee8f3d025738cb02bad7868bbb5f8a6327501e870bf51f1b455b0a2454a419" dependencies = [ - "windows-core 0.58.0", - "windows-targets 0.52.6", + "windows-collections", + "windows-core 0.61.2", + "windows-future", + "windows-link", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" +dependencies = [ + "windows-core 0.61.2", ] [[package]] @@ -10773,46 +11282,33 @@ dependencies = [ [[package]] name = "windows-core" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" -dependencies = [ - "windows-implement 0.58.0", - "windows-interface 0.58.0", - "windows-result 0.2.0", - "windows-strings 0.1.0", - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-core" -version = "0.61.0" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4763c1de310c86d75a878046489e2e5ba02c649d185f21c67d4cf8a56d098980" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ "windows-implement 0.60.0", "windows-interface 0.59.1", "windows-link", - "windows-result 0.3.2", - "windows-strings 0.4.0", + "windows-result 0.3.4", + "windows-strings 0.4.2", ] [[package]] -name = "windows-implement" -version = "0.57.0" +name = "windows-future" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" +checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", + "windows-core 0.61.2", + "windows-link", + "windows-threading", ] [[package]] name = "windows-implement" -version = "0.58.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" +checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" dependencies = [ "proc-macro2", "quote", @@ -10841,17 +11337,6 @@ dependencies = [ "syn 2.0.101", ] -[[package]] -name = "windows-interface" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - [[package]] name = "windows-interface" version = "0.59.1" @@ -10869,13 +11354,23 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" +[[package]] +name = "windows-numerics" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" +dependencies = [ + "windows-core 0.61.2", + "windows-link", +] + [[package]] name = "windows-registry" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" dependencies = [ - "windows-result 0.3.2", + "windows-result 0.3.4", "windows-strings 0.3.1", "windows-targets 0.53.0", ] @@ -10891,32 +11386,13 @@ dependencies = [ [[package]] name = "windows-result" -version = "0.2.0" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-result" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ "windows-link", ] -[[package]] -name = "windows-strings" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" -dependencies = [ - "windows-result 0.2.0", - "windows-targets 0.52.6", -] - [[package]] name = "windows-strings" version = "0.3.1" @@ -10928,9 +11404,9 @@ dependencies = [ [[package]] name = "windows-strings" -version = "0.4.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ba9642430ee452d5a7aa78d72907ebe8cfda358e8cb7918a2050581322f97" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ "windows-link", ] @@ -11033,6 +11509,15 @@ dependencies = [ "windows_x86_64_msvc 0.53.0", ] +[[package]] +name = "windows-threading" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" +dependencies = [ + "windows-link", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" @@ -11238,7 +11723,7 @@ version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", ] [[package]] @@ -11373,16 +11858,16 @@ dependencies = [ [[package]] name = "yamux" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17610762a1207ee816c6fadc29220904753648aba0a9ed61c7b8336e80a559c4" +checksum = "3da1acad1c2dc53f0dde419115a38bd8221d8c3e47ae9aeceaf453266d29307e" dependencies = [ "futures", "log", "nohash-hasher", "parking_lot", "pin-project", - "rand 0.8.5", + "rand 0.9.1", "static_assertions", "web-time", ] diff --git a/Cargo.toml b/Cargo.toml index 37ce53e5..9bb327bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,8 +70,8 @@ serde_yaml = "0.9.34" serial_test = "3.1.1" sha2 = "0.10.9" sqlx = "0.8.6" -sui-keys = { git = "https://github.com/mystenlabs/sui", package = "sui-keys", tag = "testnet-v1.47.0" } -sui-sdk = { git = "https://github.com/mystenlabs/sui", package = "sui-sdk", tag = "testnet-v1.47.0" } +sui-keys = { git = "https://github.com/mystenlabs/sui", package = "sui-keys", tag = "testnet-v1.49.1" } +sui-sdk = { git = "https://github.com/mystenlabs/sui", package = "sui-sdk", tag = "testnet-v1.49.1" } sysinfo = "0.33.1" tempfile = "3.20.0" thiserror = "2.0.12" diff --git a/Dockerfile b/Dockerfile index cecda550..6aea2fb9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,8 +22,8 @@ RUN echo "* soft nofile 65535" >> /etc/security/limits.conf && \ echo "* soft nproc 65535" >> /etc/security/limits.conf && \ echo "* hard nproc 65535" >> /etc/security/limits.conf -# Install Rust 1.84.0 -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.84.0 \ +# Install Rust 1.87.0 +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.87.0 \ && . "$HOME/.cargo/env" # Add cargo to PATH diff --git a/atoma-bin/atoma_node.rs b/atoma-bin/atoma_node.rs index b8103256..ed234a1c 100644 --- a/atoma-bin/atoma_node.rs +++ b/atoma-bin/atoma_node.rs @@ -213,11 +213,13 @@ async fn main() -> Result<()> { let keystore = FileBasedKeystore::new(&config.sui.sui_keystore_path().into()) .context("Failed to initialize keystore")?; - let mut wallet_ctx = WalletContext::new( - &PathBuf::from(config.sui.sui_config_path()), - config.sui.request_timeout(), - config.sui.max_concurrent_requests(), - )?; + let mut wallet_ctx = WalletContext::new(&PathBuf::from(config.sui.sui_config_path()))?; + if let Some(request_timeout) = config.sui.request_timeout() { + wallet_ctx = wallet_ctx.with_request_timeout(request_timeout); + } + if let Some(max_concurrent_requests) = config.sui.max_concurrent_requests() { + wallet_ctx = wallet_ctx.with_max_concurrent_requests(max_concurrent_requests); + } let address = wallet_ctx.active_address()?; let address_index = args.address_index.unwrap_or_else(|| { wallet_ctx @@ -373,9 +375,34 @@ async fn main() -> Result<()> { keystore: Arc::new(keystore), address_index, whitelist_sui_addresses_for_fiat: config.service.whitelist_sui_addresses_for_fiat, + too_many_requests: Arc::new(DashMap::new()), + too_many_requests_timeout_ms: u128::from(config.service.too_many_requests_timeout_ms), running_num_requests: Arc::new(RequestCounter::new()), + memory_lower_threshold: config.service.memory_lower_threshold, + memory_upper_threshold: config.service.memory_upper_threshold, + max_num_queued_requests: config.service.max_num_queued_requests, }; + let chat_completions_service_urls = app_state + .chat_completions_service_urls + .iter() + .flat_map(|(model, urls)| { + urls.iter() + .map(|(url, job, max_number_of_running_requests)| { + ( + model.clone(), + url.clone(), + job.clone(), + *max_number_of_running_requests, + ) + }) + }) + .collect(); + atoma_service::handlers::inference_service_metrics::start_metrics_updater( + chat_completions_service_urls, + config.service.metrics_update_interval, + ); + let daemon_app_state = DaemonState { atoma_state: AtomaState::new_from_url(&config.state.database_url).await?, client, diff --git a/atoma-daemon/src/components/openapi.rs b/atoma-daemon/src/components/openapi.rs index 2f90dd6d..ab879e02 100644 --- a/atoma-daemon/src/components/openapi.rs +++ b/atoma-daemon/src/components/openapi.rs @@ -46,7 +46,7 @@ pub fn openapi_routes() -> Router { let spec_path = docs_dir.join("openapi.yml"); fs::write(&spec_path, spec).expect("Failed to write OpenAPI spec to file"); - println!("OpenAPI spec written to: {spec_path:?}"); + println!("OpenAPI spec written to: {}", spec_path.display()); } Router::new() diff --git a/atoma-p2p-tester/Dockerfile b/atoma-p2p-tester/Dockerfile index 24495bc7..63684040 100644 --- a/atoma-p2p-tester/Dockerfile +++ b/atoma-p2p-tester/Dockerfile @@ -18,8 +18,8 @@ RUN apt-get update && apt-get install -y \ ca-certificates \ && rm -rf /var/lib/apt/lists/* -# Install Rust 1.84.0 -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.84.0 \ +# Install Rust 1.87.0 +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.87.0 \ && . "$HOME/.cargo/env" # Add cargo to PATH diff --git a/atoma-p2p/src/service.rs b/atoma-p2p/src/service.rs index 64a43e25..5117d8be 100644 --- a/atoma-p2p/src/service.rs +++ b/atoma-p2p/src/service.rs @@ -1029,7 +1029,8 @@ impl AtomaP2pNode { return Ok(()); } // Directly deserialize SignedNodeMessage using new method - let signed_node_message = SignedNodeMessage::deserialize_with_signature(&message_data)?; + let signed_node_message = + SignedNodeMessage::deserialize_with_signature(&message_data).map_err(|e| *e)?; let signature_len = signed_node_message.signature.len(); trace!( target = "atoma-p2p", @@ -1058,7 +1059,7 @@ impl AtomaP2pNode { ); // NOTE: We should reject the message if it fails to validate // as it means the node is not being following the current protocol - if let AtomaP2pNodeError::UrlParseError(_) = e { + if let AtomaP2pNodeError::UrlParseError(_) = *e { // We remove the peer from the gossipsub topic, because it is not a valid URL and therefore cannot be reached // by clients for processing OpenAI api compatible AI requests, so these peers are not useful for the network self.swarm @@ -1192,7 +1193,9 @@ impl AtomaP2pNode { node_message, signature: Bytes::copy_from_slice(signature.as_ref()), }; - let serialized_signed_node_message = signed_node_message.serialize_with_signature()?; + let serialized_signed_node_message = signed_node_message + .serialize_with_signature() + .map_err(|e| *e)?; let topic = gossipsub::IdentTopic::new(METRICS_GOSPUBSUB_TOPIC); self.swarm .behaviour_mut() diff --git a/atoma-p2p/src/tests.rs b/atoma-p2p/src/tests.rs index 7f32cebc..0d3e24f2 100644 --- a/atoma-p2p/src/tests.rs +++ b/atoma-p2p/src/tests.rs @@ -161,7 +161,10 @@ async fn test_validate_usage_metrics_message_invalid_url() { &tx, ) .await; - assert!(matches!(result, Err(AtomaP2pNodeError::UrlParseError(_)))); + assert!(matches!( + result, + Err(e) if matches!(*e, AtomaP2pNodeError::UrlParseError(_)) + )); } #[tokio::test] @@ -185,7 +188,7 @@ async fn test_validate_usage_metrics_message_expired_timestamp() { .await; assert!(matches!( result, - Err(AtomaP2pNodeError::InvalidPublicAddressError(_)) + Err(e) if matches!(*e, AtomaP2pNodeError::InvalidPublicAddressError(_)) )); } @@ -210,7 +213,7 @@ async fn test_validate_usage_metrics_message_future_timestamp() { .await; assert!(matches!( result, - Err(AtomaP2pNodeError::InvalidPublicAddressError(_)) + Err(e) if matches!(*e, AtomaP2pNodeError::InvalidPublicAddressError(_)) )); } @@ -244,7 +247,7 @@ async fn test_validate_usage_metrics_message_invalid_signature() { .await; assert!(matches!( result, - Err(AtomaP2pNodeError::SignatureVerificationError(_)) + Err(e) if matches!(*e, AtomaP2pNodeError::SignatureVerificationError(_)) )); } @@ -283,7 +286,7 @@ async fn test_validate_usage_metrics_message_invalid_node_ownership() { assert!(matches!( result, - Err(AtomaP2pNodeError::NodeSmallIdOwnershipVerificationError(_)) + Err(e) if matches!(*e, AtomaP2pNodeError::NodeSmallIdOwnershipVerificationError(_)) )); } @@ -312,7 +315,7 @@ async fn test_validate_usage_metrics_message_state_manager_error() { .await; assert!(matches!( result, - Err(AtomaP2pNodeError::StateManagerError(_)) + Err(e) if matches!(*e, AtomaP2pNodeError::StateManagerError(_)) )); } @@ -352,6 +355,6 @@ async fn test_validate_usage_metrics_message_response_channel_error() { .await; assert!(matches!( result, - Err(AtomaP2pNodeError::NodeSmallIdOwnershipVerificationError(_)) + Err(e) if matches!(*e, AtomaP2pNodeError::NodeSmallIdOwnershipVerificationError(_)) )); } diff --git a/atoma-p2p/src/types.rs b/atoma-p2p/src/types.rs index 99d3d81b..31a95310 100644 --- a/atoma-p2p/src/types.rs +++ b/atoma-p2p/src/types.rs @@ -17,7 +17,7 @@ pub const SECP256K1_SIGNATURE_LENGTH: usize = 98; /// see pub const SECP256R1_SIGNATURE_LENGTH: usize = 98; -type Result = std::result::Result; +type Result> = std::result::Result; /// An enum representing different types of events that can be emitted by the Atoma P2P node. pub enum AtomaP2pEvent { @@ -138,8 +138,7 @@ pub struct NodeMessage { impl SerializeWithHash for NodeMessage { fn serialize_with_hash(&self) -> Result { let mut buffer = BytesMut::new(); - ciborium::into_writer(self, (&mut buffer).writer()) - .map_err(AtomaP2pNodeError::UsageMetricsSerializeError)?; + ciborium::into_writer(self, (&mut buffer).writer()).map_err(|e| Box::new(e.into()))?; Ok(SerializedMessage { hash: blake3::hash(buffer.as_ref()), message: buffer.freeze(), @@ -185,35 +184,36 @@ pub trait SerializeWithSignature { } impl SerializeWithSignature for SignedNodeMessage { - fn serialize_with_signature(&self) -> Result { + fn serialize_with_signature(&self) -> Result { let mut buffer = BytesMut::with_capacity(1024); buffer.extend_from_slice(&self.signature); // Serialize node message ciborium::into_writer(&self.node_message, (&mut buffer).writer()) - .map_err(AtomaP2pNodeError::UsageMetricsSerializeError)?; + .map_err(|e| Box::new(e.into()))?; Ok(buffer.freeze()) } - fn deserialize_with_signature(data: &[u8]) -> Result { + fn deserialize_with_signature(data: &[u8]) -> Result { let signature_len = data .first() .map(|&flag| match flag { f if f == Ed25519SuiSignature::SCHEME.flag() => Ok(ED25519_SIGNATURE_LENGTH), f if f == Secp256k1SuiSignature::SCHEME.flag() => Ok(SECP256K1_SIGNATURE_LENGTH), f if f == Secp256r1SuiSignature::SCHEME.flag() => Ok(SECP256R1_SIGNATURE_LENGTH), - f => Err(AtomaP2pNodeError::SignatureParseError(format!( + f => Err(Box::new(AtomaP2pNodeError::SignatureParseError(format!( "Invalid signature scheme, expected 0x00, 0x01 or 0x02, received {f:#04x}", - ))), + )))), }) .ok_or_else(|| { - AtomaP2pNodeError::SignatureParseError( + Box::new(AtomaP2pNodeError::SignatureParseError( "Invalid signature scheme: the data is empty".to_string(), - ) + )) })??; let signature = Bytes::copy_from_slice(&data[0..signature_len]); - let node_message = ciborium::from_reader(&data[signature_len..])?; + let node_message = + ciborium::from_reader(&data[signature_len..]).map_err(|e| Box::new(e.into()))?; Ok(Self { node_message, signature, diff --git a/atoma-p2p/src/utils.rs b/atoma-p2p/src/utils.rs index 21774be5..1c726462 100644 --- a/atoma-p2p/src/utils.rs +++ b/atoma-p2p/src/utils.rs @@ -75,7 +75,7 @@ const EXPIRED_TIMESTAMP_THRESHOLD: u64 = 10 * 60; // 10 minutes #[instrument(level = "debug", skip_all)] pub fn validate_node_message_country_url_timestamp( node_message: &NodeMessage, -) -> Result<(), AtomaP2pNodeError> { +) -> Result<(), Box> { let now = std::time::Instant::now().elapsed().as_secs(); let country = node_message.node_metadata.country.as_str(); @@ -91,7 +91,7 @@ pub fn validate_node_message_country_url_timestamp( "Invalid URL format, received address: {}", node_message.node_metadata.node_public_url ); - AtomaP2pNodeError::UrlParseError(e) + Box::new(AtomaP2pNodeError::UrlParseError(e)) })?; // Check if the timestamp is within a reasonable time frame @@ -105,18 +105,20 @@ pub fn validate_node_message_country_url_timestamp( node_message.node_metadata.timestamp, now ); - return Err(AtomaP2pNodeError::InvalidPublicAddressError( + return Err(Box::new(AtomaP2pNodeError::InvalidPublicAddressError( "Timestamp is too far in the past".to_string(), - )); + ))); } Ok(()) } /// Custom validation function for ISO 3166-1 alpha-2 country codes -fn validate_country_code(code: &str) -> Result<(), AtomaP2pNodeError> { +fn validate_country_code(code: &str) -> Result<(), Box> { isocountry::CountryCode::for_alpha2(code).map_err(|_| { - AtomaP2pNodeError::InvalidCountryCodeError("Country code is invalid.".to_string()) + Box::new(AtomaP2pNodeError::InvalidCountryCodeError( + "Country code is invalid.".to_string(), + )) })?; Ok(()) } @@ -331,7 +333,7 @@ pub async fn validate_signed_node_message( node_message_hash: &[u8; 32], signature: &[u8], state_manager_sender: &Sender, -) -> Result<(), AtomaP2pNodeError> { +) -> Result<(), Box> { // Validate the message's node public URL and timestamp validate_node_message_country_url_timestamp(node_message)?; // Verify the signature of the message diff --git a/atoma-service/src/components/openapi.rs b/atoma-service/src/components/openapi.rs index c963eb0a..8887dfd6 100644 --- a/atoma-service/src/components/openapi.rs +++ b/atoma-service/src/components/openapi.rs @@ -62,7 +62,7 @@ pub fn openapi_routes() -> Router { let spec_path = docs_dir.join("openapi.yml"); fs::write(&spec_path, spec).expect("Failed to write OpenAPI spec to file"); - println!("OpenAPI spec written to: {:?}", spec_path); + println!("OpenAPI spec written to: {}", spec_path.display()); } Router::new() diff --git a/atoma-service/src/config.rs b/atoma-service/src/config.rs index 9538fb49..9e2aa509 100644 --- a/atoma-service/src/config.rs +++ b/atoma-service/src/config.rs @@ -11,9 +11,7 @@ use serde::Deserialize; pub struct AtomaServiceConfig { /// URL for the chat completions service with maximum concurrency settings. /// - /// This is an optional field that, if provided, specifies the endpoint - /// for the chat completions service used by the Atoma Service, together with its - /// associated Prometheus job name. + /// This field specifies the endpoint for the chat completions service used by the Atoma Service. pub chat_completions_service_urls: HashMap>, /// URL for the embeddings service. @@ -59,6 +57,18 @@ pub struct AtomaServiceConfig { /// List of allowed sui addresses for fiat payments. pub whitelist_sui_addresses_for_fiat: Vec, + + /// The timeout for the too many requests error in milliseconds. + pub too_many_requests_timeout_ms: u64, + + ///Lower threshold for memory usage, if the memory usage goes below this value, the service will not be considered overloaded + pub memory_lower_threshold: f64, + + /// Upper threshold for memory usage, if the memory usage goes above this value, the service will be considered overloaded + pub memory_upper_threshold: f64, + + /// The maximum number of queued requests for each inference service. + pub max_num_queued_requests: f64, } impl AtomaServiceConfig { diff --git a/atoma-service/src/handlers/chat_completions.rs b/atoma-service/src/handlers/chat_completions.rs index eb320143..f53815c1 100644 --- a/atoma-service/src/handlers/chat_completions.rs +++ b/atoma-service/src/handlers/chat_completions.rs @@ -3,7 +3,9 @@ use crate::{ handle_concurrent_requests_count_decrement, metrics::{ CHAT_COMPLETIONS_CONFIDENTIAL_NUM_REQUESTS, CHAT_COMPLETIONS_ESTIMATED_TOTAL_TOKENS, - TOTAL_FAILED_CHAT_CONFIDENTIAL_REQUESTS, TOTAL_FAILED_CHAT_REQUESTS, + TOTAL_BAD_REQUESTS, TOTAL_FAILED_CHAT_CONFIDENTIAL_REQUESTS, + TOTAL_FAILED_CHAT_REQUESTS, TOTAL_LOCKED_REQUESTS, TOTAL_TOO_EARLY_REQUESTS, + TOTAL_TOO_MANY_REQUESTS, TOTAL_UNAUTHORIZED_REQUESTS, }, sign_response_and_update_stack_hash, update_fiat_amount, update_stack_num_compute_units, }, @@ -266,8 +268,30 @@ pub async fn chat_completions_handler( Ok(response) } Err(e) => { - TOTAL_FAILED_CHAT_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + _ => { + TOTAL_FAILED_CHAT_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + } + // NOTE: We need to update the stack number of tokens as the service failed to generate // a proper response. For this reason, we set the total number of tokens to 0. // This will ensure that the stack number of tokens is not updated, and the stack @@ -476,9 +500,29 @@ pub async fn confidential_chat_completions_handler( Ok(response) } Err(e) => { - TOTAL_FAILED_CHAT_CONFIDENTIAL_REQUESTS - .add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + _ => { + TOTAL_FAILED_CHAT_CONFIDENTIAL_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + } // NOTE: We need to update the stack number of tokens as the service failed to generate // a proper response. For this reason, we set the total number of tokens to 0. // This will ensure that the stack number of tokens is not updated, and the stack @@ -863,7 +907,9 @@ async fn handle_streaming_response( get_best_available_chat_completions_service_url( &state.running_num_requests, chat_completions_service_urls, - model, + &model.to_lowercase(), + state.memory_upper_threshold, + state.max_num_queued_requests, ) .await .map_err(|e| AtomaServiceError::ChatCompletionsServiceUnavailable { @@ -871,6 +917,9 @@ async fn handle_streaming_response( endpoint: endpoint.clone(), })?; if status_code == StatusCode::TOO_MANY_REQUESTS { + state + .too_many_requests + .insert(model.to_string(), Instant::now()); return Err(AtomaServiceError::ChatCompletionsServiceUnavailable { message: "Too many requests".to_string(), endpoint: endpoint.clone(), @@ -1294,6 +1343,8 @@ pub mod utils { &state.running_num_requests, chat_completions_service_url_services, model, + state.memory_upper_threshold, + state.max_num_queued_requests, ) .await .map_err(|e| AtomaServiceError::ChatCompletionsServiceUnavailable { @@ -1301,6 +1352,9 @@ pub mod utils { endpoint: endpoint.to_string(), })?; if status_code == StatusCode::TOO_MANY_REQUESTS { + state + .too_many_requests + .insert(model.to_string(), Instant::now()); return Err(AtomaServiceError::ChatCompletionsServiceUnavailable { message: "Too many requests".to_string(), endpoint: endpoint.to_string(), diff --git a/atoma-service/src/handlers/completions.rs b/atoma-service/src/handlers/completions.rs index fac65537..4b44ded0 100644 --- a/atoma-service/src/handlers/completions.rs +++ b/atoma-service/src/handlers/completions.rs @@ -3,7 +3,9 @@ use crate::{ handle_concurrent_requests_count_decrement, metrics::{ CHAT_COMPLETIONS_CONFIDENTIAL_NUM_REQUESTS, CHAT_COMPLETIONS_ESTIMATED_TOTAL_TOKENS, - TOTAL_FAILED_CHAT_CONFIDENTIAL_REQUESTS, TOTAL_FAILED_CHAT_REQUESTS, + TOTAL_BAD_REQUESTS, TOTAL_FAILED_CHAT_CONFIDENTIAL_REQUESTS, + TOTAL_FAILED_CHAT_REQUESTS, TOTAL_LOCKED_REQUESTS, TOTAL_TOO_EARLY_REQUESTS, + TOTAL_TOO_MANY_REQUESTS, TOTAL_UNAUTHORIZED_REQUESTS, }, sign_response_and_update_stack_hash, update_fiat_amount, update_stack_num_compute_units, }, @@ -240,8 +242,29 @@ pub async fn completions_handler( Ok(response) } Err(e) => { - TOTAL_FAILED_CHAT_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + _ => { + TOTAL_FAILED_CHAT_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + } // NOTE: We need to update the stack number of tokens as the service failed to generate // a proper response. For this reason, we set the total number of tokens to 0. // This will ensure that the stack number of tokens is not updated, and the stack @@ -450,9 +473,29 @@ pub async fn confidential_completions_handler( Ok(response) } Err(e) => { - TOTAL_FAILED_CHAT_CONFIDENTIAL_REQUESTS - .add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + _ => { + TOTAL_FAILED_CHAT_CONFIDENTIAL_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.to_owned())]); + } + } if let Some(stack_small_id) = stack_small_id { // NOTE: We need to update the stack number of tokens as the service failed to generate // a proper response. For this reason, we set the total number of tokens to 0. @@ -838,6 +881,8 @@ async fn handle_streaming_response( &state.running_num_requests, chat_completions_service_urls, model, + state.memory_upper_threshold, + state.max_num_queued_requests, ) .await .map_err(|e| AtomaServiceError::ChatCompletionsServiceUnavailable { @@ -845,6 +890,9 @@ async fn handle_streaming_response( endpoint: endpoint.clone(), })?; if status_code == StatusCode::TOO_MANY_REQUESTS { + state + .too_many_requests + .insert(model.to_string(), Instant::now()); return Err(AtomaServiceError::ChatCompletionsServiceUnavailable { message: "Too many requests".to_string(), endpoint: endpoint.clone(), @@ -1257,6 +1305,8 @@ pub mod utils { &state.running_num_requests, completions_service_url_services, model, + state.memory_upper_threshold, + state.max_num_queued_requests, ) .await .map_err(|e| AtomaServiceError::ChatCompletionsServiceUnavailable { @@ -1264,6 +1314,9 @@ pub mod utils { endpoint: endpoint.to_string(), })?; if status_code == StatusCode::TOO_MANY_REQUESTS { + state + .too_many_requests + .insert(model.to_string(), Instant::now()); return Err(AtomaServiceError::ChatCompletionsServiceUnavailable { message: "Too many requests".to_string(), endpoint: endpoint.to_string(), diff --git a/atoma-service/src/handlers/embeddings.rs b/atoma-service/src/handlers/embeddings.rs index 31ce2a8f..7f3ba54d 100644 --- a/atoma-service/src/handlers/embeddings.rs +++ b/atoma-service/src/handlers/embeddings.rs @@ -7,9 +7,10 @@ use crate::{ handle_confidential_compute_encryption_response, metrics::{ TEXT_EMBEDDINGS_CONFIDENTIAL_NUM_REQUESTS, TEXT_EMBEDDINGS_LATENCY_METRICS, - TEXT_EMBEDDINGS_NUM_REQUESTS, TOTAL_COMPLETED_REQUESTS, TOTAL_FAILED_REQUESTS, - TOTAL_FAILED_TEXT_EMBEDDING_CONFIDENTIAL_REQUESTS, - TOTAL_FAILED_TEXT_EMBEDDING_REQUESTS, + TEXT_EMBEDDINGS_NUM_REQUESTS, TOTAL_BAD_REQUESTS, TOTAL_COMPLETED_REQUESTS, + TOTAL_FAILED_REQUESTS, TOTAL_FAILED_TEXT_EMBEDDING_CONFIDENTIAL_REQUESTS, + TOTAL_FAILED_TEXT_EMBEDDING_REQUESTS, TOTAL_LOCKED_REQUESTS, TOTAL_TOO_EARLY_REQUESTS, + TOTAL_TOO_MANY_REQUESTS, TOTAL_UNAUTHORIZED_REQUESTS, }, sign_response_and_update_stack_hash, update_fiat_amount, update_stack_num_compute_units, }, @@ -18,6 +19,7 @@ use crate::{ types::{ConfidentialComputeRequest, ConfidentialComputeResponse}, }; use axum::{extract::State, Extension, Json}; +use hyper::StatusCode; use opentelemetry::KeyValue; use reqwest::Client; use serde_json::Value; @@ -138,9 +140,28 @@ pub async fn embeddings_handler( Ok(response) } Err(e) => { - TOTAL_FAILED_TEXT_EMBEDDING_REQUESTS - .add(1, &[KeyValue::new("model", model.as_str().to_owned())]); - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new("model", model.as_str().to_owned())]); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + _ => { + TOTAL_FAILED_TEXT_EMBEDDING_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + } if let Some(stack_small_id) = stack_small_id { let concurrent_requests = handle_concurrent_requests_count_decrement( &state.concurrent_requests_per_stack, @@ -312,9 +333,28 @@ pub async fn confidential_embeddings_handler( Ok(response) } Err(e) => { - TOTAL_FAILED_TEXT_EMBEDDING_CONFIDENTIAL_REQUESTS - .add(1, &[KeyValue::new("model", model.as_str().to_owned())]); - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new("model", model.as_str().to_owned())]); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + _ => { + TOTAL_FAILED_TEXT_EMBEDDING_CONFIDENTIAL_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + } if let Some(stack_small_id) = stack_small_id { let concurrent_requests = handle_concurrent_requests_count_decrement( &state.concurrent_requests_per_stack, diff --git a/atoma-service/src/handlers/image_generations.rs b/atoma-service/src/handlers/image_generations.rs index 12b2251a..b1fb79c4 100644 --- a/atoma-service/src/handlers/image_generations.rs +++ b/atoma-service/src/handlers/image_generations.rs @@ -6,8 +6,10 @@ use crate::{ handle_concurrent_requests_count_decrement, metrics::{ IMAGE_GEN_CONFIDENTIAL_NUM_REQUESTS, IMAGE_GEN_LATENCY_METRICS, IMAGE_GEN_NUM_REQUESTS, - TOTAL_COMPLETED_REQUESTS, TOTAL_FAILED_IMAGE_CONFIDENTIAL_GENERATION_REQUESTS, - TOTAL_FAILED_IMAGE_GENERATION_REQUESTS, TOTAL_FAILED_REQUESTS, + TOTAL_BAD_REQUESTS, TOTAL_COMPLETED_REQUESTS, + TOTAL_FAILED_IMAGE_CONFIDENTIAL_GENERATION_REQUESTS, + TOTAL_FAILED_IMAGE_GENERATION_REQUESTS, TOTAL_FAILED_REQUESTS, TOTAL_LOCKED_REQUESTS, + TOTAL_TOO_EARLY_REQUESTS, TOTAL_TOO_MANY_REQUESTS, TOTAL_UNAUTHORIZED_REQUESTS, }, update_fiat_amount, update_stack_num_compute_units, }, @@ -16,6 +18,7 @@ use crate::{ types::{ConfidentialComputeRequest, ConfidentialComputeResponse}, }; use axum::{extract::State, Extension, Json}; +use hyper::StatusCode; use opentelemetry::KeyValue; use reqwest::Client; use serde_json::Value; @@ -118,28 +121,48 @@ pub async fn image_generations_handler( let model = payload .get(MODEL_KEY) .and_then(|m| m.as_str()) - .unwrap_or("unknown"); + .unwrap_or("unknown") + .to_string(); match handle_image_generations_response( &state, - payload.clone(), + payload, payload_hash, stack_small_id, client_encryption_metadata, &endpoint, timer, - model.to_string(), + model.clone(), ) .await { Ok(response) => { - TOTAL_COMPLETED_REQUESTS.add(1, &[KeyValue::new("model", model.to_owned())]); + TOTAL_COMPLETED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); Ok(response) } Err(e) => { - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new("model", model.to_owned())]); - TOTAL_FAILED_IMAGE_GENERATION_REQUESTS - .add(1, &[KeyValue::new("model", model.to_owned())]); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + _ => { + TOTAL_FAILED_IMAGE_GENERATION_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + } if let Some(stack_small_id) = stack_small_id { let concurrent_requests = handle_concurrent_requests_count_decrement( &state.concurrent_requests_per_stack, @@ -263,13 +286,13 @@ pub async fn confidential_image_generations_handler( match handle_image_generations_response( &state, - payload.clone(), + payload, payload_hash, stack_small_id, client_encryption_metadata, &endpoint, timer, - model.to_string(), + model.clone(), ) .await { @@ -306,9 +329,28 @@ pub async fn confidential_image_generations_handler( Ok(response) } Err(e) => { - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new("model", model.clone())]); - TOTAL_FAILED_IMAGE_CONFIDENTIAL_GENERATION_REQUESTS - .add(1, &[KeyValue::new("model", model.clone())]); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + _ => { + TOTAL_FAILED_IMAGE_CONFIDENTIAL_GENERATION_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + } + } if let Some(stack_small_id) = stack_small_id { let concurrent_requests = handle_concurrent_requests_count_decrement( &state.concurrent_requests_per_stack, diff --git a/atoma-service/src/handlers/metrics.rs b/atoma-service/src/handlers/metrics.rs index 4497f291..f10a1f5d 100644 --- a/atoma-service/src/handlers/metrics.rs +++ b/atoma-service/src/handlers/metrics.rs @@ -1,6 +1,6 @@ use opentelemetry::{ global, - metrics::{Counter, Histogram, Meter, UpDownCounter}, + metrics::{Counter, Gauge, Histogram, Meter, UpDownCounter}, }; use std::sync::LazyLock; @@ -346,6 +346,81 @@ pub static TOTAL_FAILED_CHAT_REQUESTS: LazyLock> = LazyLock::new(|| .build() }); +/// Counter metric that tracks the total number of too many requests. +/// +/// # Metric Details +/// - Name: `atoma_total_too_many_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_TOO_MANY_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_too_many_requests") + .with_description("Total number of too many requests") + .with_unit("requests") + .build() +}); + +/// Counter metric that tracks the total number of unauthorized requests. +/// +/// # Metric Details +/// - Name: `atoma_total_unauthorized_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_UNAUTHORIZED_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_unauthorized_requests") + .with_description("Total number of unauthorized requests") + .with_unit("requests") + .build() +}); + +/// Counter metric that tracks the total number of too early requests. +/// +/// # Metric Details +/// - Name: `atoma_total_too_early_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_TOO_EARLY_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_too_early_requests") + .with_description("Total number of too early requests") + .with_unit("requests") + .build() +}); + +/// Counter metric that tracks the total number of locked requests. +/// +/// # Metric Details +/// - Name: `atoma_total_locked_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_LOCKED_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_locked_requests") + .with_description("Total number of locked requests") + .with_unit("requests") + .build() +}); + +/// Counter metric that tracks the total number of bad request requests. +/// +/// # Metric Details +/// - Name: `atoma_TOTAL_BAD_REQUESTS` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_BAD_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_TOTAL_BAD_REQUESTS") + .with_description("Total number of bad request requests") + .with_unit("requests") + .build() +}); + /// Counter metric that tracks the total number of confidential chat requests. /// /// # Metric Details @@ -541,3 +616,18 @@ pub static SIGNATURE_VERIFICATION_MIDDLEWARE_SUCCESSFUL_TIME: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_gauge("atoma_num_running_requests") + .with_description("Number of running requests") + .with_unit("requests") + .build() +}); diff --git a/atoma-service/src/handlers/mod.rs b/atoma-service/src/handlers/mod.rs index 7b86d750..ed5dadab 100644 --- a/atoma-service/src/handlers/mod.rs +++ b/atoma-service/src/handlers/mod.rs @@ -53,9 +53,11 @@ pub const COMPLETION_TOKENS_KEY: &str = "completion_tokens"; const VLLM_RUNNING_REQUESTS_QUERY: &str = "num_requests_running"; const VLLM_QUEUED_REQUESTS_QUERY: &str = "num_requests_waiting"; +const VLLM_MEMORY_USAGE_QUERY: &str = "gpu_cache_usage_perc"; const VLLM_SERVICE_PREFIX: &str = "vllm:"; const SGLANG_RUNNING_REQUESTS_QUERY: &str = "num_running_reqs"; const SGLANG_QUEUED_REQUESTS_QUERY: &str = "num_queue_reqs"; +const SGLANG_MEMORY_USAGE_QUERY: &str = "token_usage"; const SGLANG_SERVICE_PREFIX: &str = "sglang:"; #[derive(Debug, Clone)] @@ -81,6 +83,14 @@ impl InferenceService { } } + #[must_use] + pub const fn get_usage(&self) -> &'static str { + match self { + Self::Vllm => VLLM_MEMORY_USAGE_QUERY, + Self::SgLang => SGLANG_MEMORY_USAGE_QUERY, + } + } + #[must_use] pub const fn get_service_prefix(&self) -> &'static str { match self { @@ -572,14 +582,505 @@ pub fn handle_status_code_error( } pub mod inference_service_metrics { + use futures::future::join_all; + use prometheus_parse::Scrape; + use prometheus_parse::Value; + use rand::Rng; + use std::sync::Arc; + use std::sync::LazyLock; + use std::time::Duration; + use tokio::sync::RwLock; + use tokio::time; + use tracing::info; + use crate::handlers::InferenceService; use hyper::StatusCode; - use rand::seq::SliceRandom; use tracing::instrument; use super::request_counter::RequestCounter; pub type Result = std::result::Result; + type MetricValue = ChatCompletionsMetrics; + type MetricResult = Result; + type MetricsVec = Vec; + type CachedMetrics = Option; + type MetricsLock = Arc>; + + /// The default interval for updating the metrics + const DEFAULT_METRICS_UPDATE_INTERVAL_MILLIS: u64 = 35; + + /// The timeout for the Prometheus metrics queries + const METRICS_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(2); + + /// The HTTP client for the metrics queries + static HTTP_CLIENT: LazyLock = LazyLock::new(|| { + reqwest::Client::builder() + .timeout(METRICS_TIMEOUT) + .build() + .expect("Failed to create HTTP client") + }); + + /// Chat completions metrics + #[derive(Debug, Clone, PartialEq)] + pub struct ChatCompletionsMetrics { + /// The model name + model: String, + /// The chat completions service url + chat_completions_service_url: String, + /// The number of queue requests + num_queued_requests: f64, + /// The number of running requests + num_running_requests: f64, + /// The memory usage in fraction, e.g. 1.00 means 100% memory usage + memory_usage: f64, + /// The maximum number of running requests allowed for this url. + max_number_of_running_requests: usize, + } + + impl Eq for ChatCompletionsMetrics {} + + impl PartialOrd for ChatCompletionsMetrics { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } + } + + impl Ord for ChatCompletionsMetrics { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.num_queued_requests + .total_cmp(&other.num_queued_requests) + .then_with(|| { + self.memory_usage + .total_cmp(&other.memory_usage) + .then_with(|| { + self.num_running_requests + .total_cmp(&other.num_running_requests) + }) + }) + } + } + + impl ChatCompletionsMetrics { + #[must_use] + pub fn above_upper_threshold_exceeded(&self, threshold: f64) -> bool { + self.memory_usage > threshold + } + + #[must_use] + pub fn under_lower_threshold(&self, threshold: f64) -> bool { + self.memory_usage <= threshold + } + } + + /// Cache structure to store metrics + #[derive(Debug, Default)] + struct MetricsCache { + metrics: MetricsLock, + } + + impl MetricsCache { + fn new() -> Self { + Self { + metrics: Arc::new(RwLock::new(None)), + } + } + + async fn get_metrics(&self) -> Option { + self.metrics.read().await.clone() + } + + async fn update_metrics(&self, new_metrics: Vec>) { + *self.metrics.write().await = Some(new_metrics); + } + } + + /// Global metrics cache + #[allow(clippy::redundant_closure)] + static VLLM_METRICS_CACHE: LazyLock = LazyLock::new(|| MetricsCache::new()); + + /// Global metrics cache + #[allow(clippy::redundant_closure)] + static SGLANG_METRICS_CACHE: LazyLock = LazyLock::new(|| MetricsCache::new()); + + /// Start the background task to update metrics every 500 milliseconds + /// + /// # Arguments + /// + /// * `chat_completions_service_urls` - A vector of tuples containing the model name, the chat completions service URL and the job name. + /// * `metrics_update_interval` - The interval in seconds to update the metrics. + #[instrument(level = "info", skip_all)] + pub fn start_metrics_updater( + chat_completions_service_urls: Vec<(String, String, String, usize)>, + metrics_update_interval: Option, + ) { + type ChatCompletionsServiceUrls = Vec<(String, String, String, usize)>; + info!( + target = "atoma-service", + module = "inference_service_metrics", + level = "info", + "Starting metrics updater with {chat_completions_service_urls:?}" + ); + let (vllm_chat_completions_service_urls, sglang_chat_completions_service_urls): ( + ChatCompletionsServiceUrls, + ChatCompletionsServiceUrls, + ) = chat_completions_service_urls + .iter() + .cloned() + .partition(|(_, _, job, _)| job.contains("vllm")); + info!( + target = "atoma-service", + module = "inference_service_metrics", + level = "info", + "Partitioned chat completions service urls: vllm: {vllm_chat_completions_service_urls:?}, sglang: {sglang_chat_completions_service_urls:?}" + ); + let vllm_chat_completions_service_urls = Arc::new(vllm_chat_completions_service_urls); + let sglang_chat_completions_service_urls = Arc::new(sglang_chat_completions_service_urls); + tokio::spawn(async move { + let metrics_interval = + metrics_update_interval.unwrap_or(DEFAULT_METRICS_UPDATE_INTERVAL_MILLIS); + info!( + target = "atoma-service", + module = "inference_service_metrics", + level = "info", + "Metrics update interval: {metrics_interval} milliseconds" + ); + let mut interval = time::interval(Duration::from_millis(metrics_interval)); + loop { + interval.tick().await; + if !vllm_chat_completions_service_urls.is_empty() { + let vllm_metrics = + get_metrics(&InferenceService::Vllm, &vllm_chat_completions_service_urls) + .await; + if vllm_metrics.iter().any(std::result::Result::is_ok) { + VLLM_METRICS_CACHE.update_metrics(vllm_metrics).await; + } else { + tracing::warn!( + "Failed to retrieve any valid vLLM metrics, not updating cache" + ); + } + } + if !sglang_chat_completions_service_urls.is_empty() { + let sglang_metrics = get_metrics( + &InferenceService::SgLang, + &sglang_chat_completions_service_urls, + ) + .await; + if sglang_metrics.iter().any(std::result::Result::is_ok) { + SGLANG_METRICS_CACHE.update_metrics(sglang_metrics).await; + } else { + tracing::warn!( + "Failed to retrieve any valid SgLang metrics, not updating cache" + ); + } + } + } + }); + } + + /// Fetches metrics from the specified chat completions service URL. + /// + /// This function retrieves metrics from the specified chat completions service URL + /// and parses the response to extract relevant metrics such as the number of queue + /// requests and running requests. It handles errors gracefully and returns a vector + /// of results, where each result contains the metrics for a specific service URL. + /// + /// # Arguments + /// + /// * `inference_service` - The inference service type (vLLM or SgLang). + /// * `jobs_with_url` - A slice of tuples containing model name, the chat completions service URL + /// and the job name (e.g., "vllm-service", "sglang-service"). + /// + /// # Returns + /// + /// Returns a `Vec>`, where each result contains + /// the metrics for a specific service URL. If an error occurs while fetching or parsing + /// the metrics, the error is returned in the result. + /// + /// # Errors + /// + /// * `ChatCompletionsMetricsError::NoMetricsFound`: If no metrics are found for the + /// specified job or if the metrics response is invalid. + /// * Other variants of `ChatCompletionsMetricsError` may be returned if underlying + /// issues occur during metric collection from Prometheus (e.g., network errors, + /// parsing errors), though the function attempts to handle missing individual metrics + /// gracefully. + async fn get_metrics( + inference_service: &InferenceService, + jobs_with_url: &[(String, String, String, usize)], // (model, url, job, max_concurrent_requests) + ) -> Vec> { + let tasks = + jobs_with_url + .iter() + .map(|(model, chat_completions_service_url, job, max_number_of_running_requests)| async move { + let response = HTTP_CLIENT + .get(format!("{chat_completions_service_url}/metrics")) + .send() + .await + .map_err(|_| { + ChatCompletionsMetricsError::NoMetricsFound(job.to_string()) + })?; + let body = response.text().await?; + let lines = body + .lines() + .map(|line| Ok(line.replace(inference_service.get_service_prefix(), ""))); + let metrics = Scrape::parse(lines).unwrap(); + let num_queued_requests = extract_metric( + &metrics, + inference_service.get_queued_requests_metric_name(), + job, + )?; + let num_running_requests = extract_metric( + &metrics, + inference_service.get_running_requests_metric_name(), + job, + )?; + let memory_usage = extract_metric( + &metrics, + inference_service.get_usage(), + job, + )?; + + Ok(ChatCompletionsMetrics { + model: model.clone(), + chat_completions_service_url: chat_completions_service_url.clone(), + num_queued_requests, + num_running_requests, + memory_usage, + max_number_of_running_requests: *max_number_of_running_requests, + }) + }); + join_all(tasks).await + } + + /// Extracts a specific metric from the Scrape response. + /// + /// This function searches for a metric with the specified name in the + /// Scrape response and returns its value if found. + /// + /// # Arguments + /// + /// * `metrics` - The Scrape response containing the metrics. + /// * `name` - The name of the metric to extract. + /// * `job` - The job name used for error reporting. + /// + /// # Returns + /// + /// Returns a `Result` containing the metric value if found, + /// or an error if not found or if the value is not a Gauge. + /// + /// # Errors + /// + /// * `ChatCompletionsMetricsError::NoMetricsFound`: If the specified metric is not found + /// or if the value is not a Gauge. + /// * Other variants of `ChatCompletionsMetricsError` may be returned if underlying + /// issues occur during metric collection from Prometheus (e.g., network errors, + /// parsing errors), + /// though the function attempts to handle missing individual metrics gracefully. + fn extract_metric(metrics: &Scrape, name: &str, job: &str) -> Result { + metrics + .samples + .iter() + .find(|s| s.metric == name) + .ok_or_else(|| ChatCompletionsMetricsError::NoMetricsFound(job.to_string())) + .and_then(|sample| { + if let Value::Gauge(value) = sample.value { + Ok(value) + } else { + Err(ChatCompletionsMetricsError::NoMetricsFound(job.to_string())) + } + }) + } + + /// Retrieves all chat completions metrics for the specified model. + /// + /// This function fetches metrics from both vLLM and SgLang services, + /// partitions the service URLs based on the job type, and retrieves metrics + /// for each service. It returns a vector of `ChatCompletionsMetrics` for the specified model. + /// + /// # Arguments + /// + /// * `chat_completions_service_urls` - A vector of tuples containing the chat completions service URLs, + /// job names, and maximum concurrent requests. + /// * `model` - The model name for which to retrieve metrics. + /// + /// # Returns + /// + /// Returns a `Result>` containing the metrics for the specified model. + /// + /// # Errors + /// + /// * `ChatCompletionsMetricsError::NoChatCompletionsServiceUrlsFound`: If no chat completions service URLs are provided. + /// * Other variants of `ChatCompletionsMetricsError` may be returned if underlying + /// issues occur during metric collection from Prometheus (e.g., network errors, + /// parsing errors), though the function attempts to handle missing individual metrics gracefully. + #[instrument( + level = "info", + skip(chat_completions_service_urls, model), + fields(model = model) + )] + pub async fn get_all_metrics( + chat_completions_service_urls: &[(String, String, usize)], // (url, job, max_concurrent_requests) + model: &str, + ) -> Result> { + type ChatCompletionsServiceUrls = Vec<(String, String, usize)>; + + if chat_completions_service_urls.is_empty() { + tracing::warn!( + target = "atoma-service", + model = model, + "No chat completions service URLs provided for model." + ); + return Err( + ChatCompletionsMetricsError::NoChatCompletionsServiceUrlsFound(model.to_string()), + ); + } + tracing::debug!( + target = "atoma-service", + module = "inference_service_metrics", + level = "info", + "Getting best available chat completions service URL for model: {model} and urls: {chat_completions_service_urls:?}" + ); + let (vllm_chat_completions_service_urls, sglang_chat_completions_service_urls): ( + ChatCompletionsServiceUrls, + ChatCompletionsServiceUrls, + ) = chat_completions_service_urls + .iter() + .cloned() + .partition(|(_, job, _)| job.contains("vllm")); + + tracing::debug!( + target = "atoma-service", + module = "inference_service_metrics", + level = "info", + "Partitioned chat completions service urls: vllm: {vllm_chat_completions_service_urls:?}, sglang: {sglang_chat_completions_service_urls:?}" + ); + + // Get cached metrics + let vllm_metrics = if vllm_chat_completions_service_urls.is_empty() { + vec![] + } else if let Some(metrics) = VLLM_METRICS_CACHE.get_metrics().await { + metrics + } else { + info!( + target = "atoma-service", + module = "inference_service_metrics", + level = "info", + "No cached vLLM metrics, getting them directly" + ); + let vllm_chat_completions_service_urls_with_model: Vec<( + String, + String, + String, + usize, + )> = vllm_chat_completions_service_urls + .iter() + .map(|(url, job, max_concurrent_requests)| { + ( + model.to_string(), + url.clone(), + job.clone(), + *max_concurrent_requests, + ) + }) + .collect(); + get_metrics( + &InferenceService::Vllm, + &vllm_chat_completions_service_urls_with_model, + ) + .await + }; + let sglang_metrics = if sglang_chat_completions_service_urls.is_empty() { + vec![] + } else if let Some(metrics) = SGLANG_METRICS_CACHE.get_metrics().await { + metrics + } else { + info!( + target = "atoma-service", + module = "inference_service_metrics", + level = "info", + "No cached SgLang metrics, getting them directly" + ); + let sglang_chat_completions_service_urls_with_model: Vec<( + String, + String, + String, + usize, + )> = sglang_chat_completions_service_urls + .iter() + .map(|(url, job, max_concurrent_requests)| { + ( + model.to_string(), + url.clone(), + job.clone(), + *max_concurrent_requests, + ) + }) + .collect(); + get_metrics( + &InferenceService::SgLang, + &sglang_chat_completions_service_urls_with_model, + ) + .await + }; + + tracing::debug!( + target = "atoma-service", + module = "inference_service_metrics", + level = "info", + "Received vLLM metrics: {vllm_metrics:?}, SgLang metrics: {sglang_metrics:?}" + ); + + let mut metrics_results = Vec::new(); + for metric in vllm_metrics.into_iter().chain(sglang_metrics.into_iter()) { + match metric { + Ok(ChatCompletionsMetrics { + model: current_model, + chat_completions_service_url, + num_queued_requests, + num_running_requests, + memory_usage, + max_number_of_running_requests, + }) => { + tracing::info!( + target = "atoma-service", + module = "inference_service_metrics", + level = "info", + "current_model = {current_model}, model = {model}, they are equal = {}", + current_model == model + ); + if current_model.to_lowercase() != model.to_lowercase() { + // NOTE: We only want to consider metrics for the current model + continue; + } + info!( + target = "atoma-service", + module = "vllm_metrics", + level = "info", + "Received vLLM/SgLang metrics response for {chat_completions_service_url}:\n + num_queued_requests={num_queued_requests}, + num_running_requests={num_running_requests}" + ); + metrics_results.push(ChatCompletionsMetrics { + model: current_model, + chat_completions_service_url, + num_queued_requests, + num_running_requests, + memory_usage, + max_number_of_running_requests, + }); + } + Err(e) => { + tracing::warn!( + target = "atoma-service", + module = "vllm_metrics", + level = "error", + "Failed to get metrics for chat completions service url with error: {e}", + ); + } + } + } + Ok(metrics_results) + } /// Selects the best available chat completions service URL for a given model based on performance metrics. /// @@ -645,38 +1146,109 @@ pub mod inference_service_metrics { running_num_requests: &RequestCounter, chat_completions_service_urls: &[(String, String, usize)], // (url, job, max_concurrent_requests) model: &str, + memory_upper_threshold: f64, + max_num_queued_requests: f64, ) -> Result<(String, StatusCode)> { - // Ensure there are service URLs to choose from. - if chat_completions_service_urls.is_empty() { + let mut metrics_results = get_all_metrics(chat_completions_service_urls, model) + .await + .map_err(|e| { + tracing::error!( + target = "atoma-service", + level = "error", + "Failed to get metrics for model: {model} with error: {e}" + ); + e + })?; + if metrics_results.is_empty() { tracing::warn!( target = "atoma-service", - model = model, - "No chat completions service URLs provided for model." - ); - return Err( - ChatCompletionsMetricsError::NoChatCompletionsServiceUrlsFound(model.to_string()), + level = "warn", + "No metrics found for model: {model}", ); + // NOTE: In this case, we pick one of the urls at random + let random_index = rand::thread_rng().gen_range(0..chat_completions_service_urls.len()); + let best_url = chat_completions_service_urls[random_index].0.clone(); + return Ok((best_url, StatusCode::OK)); } - let mut shuffled_chat_completions_service_urls = chat_completions_service_urls.to_vec(); - shuffled_chat_completions_service_urls.shuffle(&mut rand::thread_rng()); - for (url_str, _job_name, max_concurrent_val) in &shuffled_chat_completions_service_urls { - if running_num_requests.increment(url_str, *max_concurrent_val) { - return Ok((url_str.clone(), StatusCode::OK)); + + // Select the best available chat completions service URL based on the number of queued and running requests. + metrics_results.sort(); + + for metric in metrics_results { + if running_num_requests.increment( + &metric.chat_completions_service_url, + metric.max_number_of_running_requests, + ) { + if metric.num_queued_requests > max_num_queued_requests { + tracing::debug!( + target = "atoma-service", + level = "debug", + "Number of queued requests for model: {model} is too high: {}", + metric.num_queued_requests + ); + continue; + } + if metric.above_upper_threshold_exceeded(memory_upper_threshold) { + tracing::debug!( + target = "atoma-service", + level = "debug", + "Memory usage for model: {model} is too high: {}", + metric.memory_usage + ); + continue; + } + let best_url = metric.chat_completions_service_url.clone(); + tracing::info!( + target = "atoma-service", + level = "info", + "Best available chat completions service URL for model: {model} is: {best_url} with and {} queue requests", + metric.num_queued_requests + ); + return Ok((best_url, StatusCode::OK)); } } - tracing::warn!( - target = "atoma-service", - model = model, - "No chat completions service URLs below max capacity found, returning TOO_MANY_REQUESTS status." - ); - return Ok((String::new(), StatusCode::TOO_MANY_REQUESTS)); } #[derive(Debug, thiserror::Error, Clone)] pub enum ChatCompletionsMetricsError { + #[error("Failed to get metrics: {0}")] + GetMetricsError(String), #[error("No chat completions service urls found for model: {0}")] NoChatCompletionsServiceUrlsFound(String), + #[error("Invalid metrics value: {0}")] + InvalidMetricsValue(String), + #[error("Invalid metrics response: {0}")] + InvalidMetricsResponse(String), + #[error("Failed to create HTTP client: {0}")] + FailedToCreateHttpClient(String), + #[error("No metrics found for job: {0}")] + NoMetricsFound(String), + } + + // From implementations to handle conversions from error types to our cloneable error type + impl From for ChatCompletionsMetricsError { + fn from(err: reqwest::Error) -> Self { + Self::GetMetricsError(err.to_string()) + } + } + + impl From for ChatCompletionsMetricsError { + fn from(err: std::num::ParseFloatError) -> Self { + Self::InvalidMetricsValue(err.to_string()) + } + } + + impl From for ChatCompletionsMetricsError { + fn from(err: serde_json::Error) -> Self { + Self::InvalidMetricsResponse(err.to_string()) + } + } + + impl From for ChatCompletionsMetricsError { + fn from(err: prometheus_http_query::Error) -> Self { + Self::FailedToCreateHttpClient(err.to_string()) + } } } diff --git a/atoma-service/src/handlers/request_counter.rs b/atoma-service/src/handlers/request_counter.rs index 73444fca..a0570e85 100644 --- a/atoma-service/src/handlers/request_counter.rs +++ b/atoma-service/src/handlers/request_counter.rs @@ -1,8 +1,9 @@ -use atoma_p2p::metrics::RUNNING_REQUESTS; use dashmap::{DashMap, Entry}; use opentelemetry::KeyValue; use tracing::error; +use super::metrics::NUM_RUNNING_REQUESTS; + /// A thread-safe request counter that tracks the number of requests being processed for each inference service. #[derive(Clone, Debug)] pub struct RequestCounter { @@ -35,7 +36,8 @@ impl RequestCounter { false } else { *entry += 1; - RUNNING_REQUESTS.record(*entry as u64, &[KeyValue::new("service", key.to_string())]); + NUM_RUNNING_REQUESTS + .record(*entry as u64, &[KeyValue::new("service", key.to_string())]); true } } @@ -46,7 +48,7 @@ impl RequestCounter { Entry::Occupied(mut entry) => { let count = entry.get_mut(); *count -= 1; - RUNNING_REQUESTS + NUM_RUNNING_REQUESTS .record(*count as u64, &[KeyValue::new("service", key.to_string())]); if *count == 0 { entry.remove(); diff --git a/atoma-service/src/middleware.rs b/atoma-service/src/middleware.rs index 8371a089..010d8667 100644 --- a/atoma-service/src/middleware.rs +++ b/atoma-service/src/middleware.rs @@ -811,6 +811,7 @@ pub async fn verify_permissions( message: "Model is not a string".to_string(), endpoint: endpoint.clone(), })?; + utils::check_if_too_many_requests(&state, model, &endpoint).await?; if !state.models.contains(&model.to_string()) { return Err(AtomaServiceError::InvalidBody { message: format!("Model not supported, supported models: {:?}", state.models), @@ -1015,6 +1016,7 @@ pub mod utils { completions::RequestModelCompletions, embeddings::RequestModelEmbeddings, image_generations::RequestModelImageGenerations, + inference_service_metrics::get_all_metrics, request_model::{RequestModel, TokensEstimate}, }; @@ -1587,4 +1589,77 @@ pub mod utils { } Ok(()) } + + /// Checks if the model has too many requests. + /// + /// This function checks if the model has too many requests by checking if the elapsed time since the first occurrence is less than the timeout. + /// + /// # Arguments + /// * `state` - The application state containing the too many requests map + /// * `model` - The model to check + /// * `endpoint` - The API endpoint path being accessed (used for error context) + /// + /// # Returns + /// * `Ok(())` - If the model has too many requests + /// * `Err(AtomaServiceError)` - If the model has too many requests + /// + /// # Errors + /// This function will return an error if: + /// - The model has too many requests + /// - The elapsed time since the first occurrence is less than the timeout + #[instrument(level = "info", skip_all, err)] + pub async fn check_if_too_many_requests( + state: &AppState, + model: &str, + endpoint: &str, + ) -> Result<(), AtomaServiceError> { + if let Some(a) = state.too_many_requests.get(model) { + if a.elapsed().as_millis() < state.too_many_requests_timeout_ms { + tracing::debug!( + target = "atoma-service", + level = "debug", + "Model {} is in the `too_many_requests` map, but the elapsed time since the first occurrence is less than the timeout.", + model + ); + return Ok(()); + } + let chat_completions_service_urls = state + .chat_completions_service_urls + .get(&model.to_lowercase()) + .ok_or_else(|| { + AtomaServiceError::InternalError { + message: format!( + "Chat completions service URL not found, likely that model is not supported by the current node: {}", + model + ), + endpoint: endpoint.to_string(), + } + })?; + let metrics = get_all_metrics(chat_completions_service_urls, model) + .await + .map_err(|e| AtomaServiceError::InternalError { + message: format!("Failed to get metrics for model {model}, with error: {e}"), + endpoint: endpoint.to_string(), + })?; + if metrics + .iter() + .any(|metric| metric.under_lower_threshold(state.memory_lower_threshold)) + { + state.too_many_requests.remove(model); + tracing::debug!( + target = "atoma-service", + level = "debug", + "Model {} is in the `too_many_requests` map, but metrics indicate that it is no longer exceeding the lower threshold. Removing from the map.", + model + ); + } + } else { + tracing::debug!( + target = "atoma-service", + level = "debug", + "Model is not in the `too_many_requests` map, so no action is needed here. Processing can continue." + ); + } + Ok(()) + } } diff --git a/atoma-service/src/server.rs b/atoma-service/src/server.rs index 7f4638f6..575870ff 100644 --- a/atoma-service/src/server.rs +++ b/atoma-service/src/server.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, sync::Arc}; +use std::{collections::HashMap, sync::Arc, time::Instant}; use atoma_confidential::types::{ ConfidentialComputeDecryptionRequest, ConfidentialComputeDecryptionResponse, @@ -206,8 +206,25 @@ pub struct AppState { /// The Sui address of the clients that are allowed to use fiat. pub whitelist_sui_addresses_for_fiat: Vec, + /// When was the too many requests triggered for each model. + pub too_many_requests: Arc>, + + /// The time for which we triiger too many requests since the first occurrence. + pub too_many_requests_timeout_ms: u128, + /// Number of running requests for each inference service. pub running_num_requests: Arc, + + /// The upper memory threshold for the node. + /// This threshold is used to determine when the node should start rejecting. + pub memory_upper_threshold: f64, + + /// The lower memory threshold for the node. + /// This threshold is used to determine when the node can start accepting requests again. + pub memory_lower_threshold: f64, + + /// The maximum number of queued requests for each inference service. + pub max_num_queued_requests: f64, } /// Creates and configures the main router for the application. diff --git a/atoma-service/src/streamer.rs b/atoma-service/src/streamer.rs index abe26f78..9ded19e1 100644 --- a/atoma-service/src/streamer.rs +++ b/atoma-service/src/streamer.rs @@ -134,6 +134,7 @@ pub struct Streamer { /// kills the connection before the final chunk is sent. If, instead, /// the last chunk is handled, the value is updated to the actual number of tokens /// returned by the LLM inference service + #[allow(clippy::struct_field_names)] streamer_computed_num_tokens: i64, /// The number of input tokens for the request num_input_tokens: i64, diff --git a/atoma-service/src/tests.rs b/atoma-service/src/tests.rs index c5f726cc..c180ab41 100644 --- a/atoma-service/src/tests.rs +++ b/atoma-service/src/tests.rs @@ -341,7 +341,12 @@ mod middleware { address_index: 0, stack_retrieve_sender, whitelist_sui_addresses_for_fiat: vec![], + too_many_requests: Arc::new(DashMap::new()), + too_many_requests_timeout_ms: 0, running_num_requests: Arc::new(RequestCounter::new()), + memory_lower_threshold: 1.0, + memory_upper_threshold: 1.0, + max_num_queued_requests: 0.0, }, public_key, signature, diff --git a/atoma-sui/src/client.rs b/atoma-sui/src/client.rs index 43ac0237..0e078f34 100644 --- a/atoma-sui/src/client.rs +++ b/atoma-sui/src/client.rs @@ -86,11 +86,13 @@ impl Client { pub async fn new(config: SuiConfig) -> Result { let sui_config_path = config.sui_config_path(); let sui_config_path = Path::new(&sui_config_path); - let mut wallet_ctx = WalletContext::new( - sui_config_path, - config.request_timeout(), - config.max_concurrent_requests(), - )?; + let mut wallet_ctx = WalletContext::new(sui_config_path)?; + if let Some(request_timeout) = config.request_timeout() { + wallet_ctx = wallet_ctx.with_request_timeout(request_timeout); + } + if let Some(max_concurrent_requests) = config.max_concurrent_requests() { + wallet_ctx = wallet_ctx.with_max_concurrent_requests(max_concurrent_requests); + } let active_address = wallet_ctx.active_address()?; info!("Current active address: {}", active_address); let node_badge = utils::get_node_badge( diff --git a/config.example.toml b/config.example.toml index afbc3765..7a91191c 100644 --- a/config.example.toml +++ b/config.example.toml @@ -46,11 +46,15 @@ image_generations_service_url = "http://image-generations:80" # List of models to be used by the service, the current value here is just a placeholder, please change it to the models you want to deploy environment = "development" # or "production" (for use in sentry, you need to set the Sentry DSN) heartbeat_url = "my-heartbeat-url" +max_num_queued_requests = 1 # Maximum number of queued requests for each inference service, this is used to limit the number of requests that can be queued for each service, if the number of queued requests exceeds this value, the service will be considered overloaded and will not accept new requests +memory_lower_threshold = 0.75 # Lower threshold for memory usage, if the memory usage goes below this value, the service will not be considered overloaded +memory_upper_threshold = 0.9 # Upper threshold for memory usage, if the memory usage goes above this value, the service will be considered overloaded metrics_update_interval = 30 models = [ "Infermatic/Llama-3.3-70B-Instruct-FP8-Dynamic" ] revisions = [ "main" ] sentry_dsn = "" # Sentry DSN (for use in sentry, you need to set the Sentry DSN) service_bind_address = "0.0.0.0:3000" +too_many_requests_timeout_ms = 2000 # Timeout for too many requests flag in milliseconds whitelist_sui_addresses_for_fiat = [ ] # Sui addresses that are allowed to use fiat payments [atoma_sui] diff --git a/helm/atoma-node/Chart.lock b/helm/atoma-node/Chart.lock new file mode 100644 index 00000000..04c3290e --- /dev/null +++ b/helm/atoma-node/Chart.lock @@ -0,0 +1,15 @@ +dependencies: +- name: prometheus + repository: https://prometheus-community.github.io/helm-charts + version: 25.0.0 +- name: grafana + repository: https://grafana.github.io/helm-charts + version: 7.0.0 +- name: loki + repository: https://grafana.github.io/helm-charts + version: 5.41.3 +- name: tempo + repository: https://grafana.github.io/helm-charts + version: 1.5.0 +digest: sha256:9ccf73ebce95cc136f31ff45eca85232279cee14c36992a71b4d9052fac45572 +generated: "2025-06-06T14:57:32.451129313Z" diff --git a/helm/atoma-node/Chart.yaml b/helm/atoma-node/Chart.yaml new file mode 100644 index 00000000..467b68ed --- /dev/null +++ b/helm/atoma-node/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: atoma-node +description: A Helm chart for deploying Atoma Node and its dependencies +type: application +version: 0.1.0 +appVersion: "1.0.0" +dependencies: + - name: prometheus + version: "25.0.0" + repository: https://prometheus-community.github.io/helm-charts + condition: prometheus.enabled + - name: grafana + version: "7.0.0" + repository: https://grafana.github.io/helm-charts + condition: grafana.enabled + - name: loki + version: "5.41.3" + repository: https://grafana.github.io/helm-charts + condition: loki.enabled + - name: tempo + version: "1.5.0" + repository: https://grafana.github.io/helm-charts + condition: tempo.enabled diff --git a/helm/atoma-node/README.md b/helm/atoma-node/README.md new file mode 100644 index 00000000..2cb50c8f --- /dev/null +++ b/helm/atoma-node/README.md @@ -0,0 +1,253 @@ +# Atoma Node Helm Chart + +This Helm chart deploys the Atoma Node and its dependencies, including VLLM and SGLang inference servers, along with monitoring stack (Prometheus, Grafana, Loki, and Tempo). + +## Prerequisites + +- Kubernetes cluster with GPU support (for VLLM and SGLang) +- Helm 3.x +- NVIDIA device plugin installed in the cluster +- Storage class with sufficient capacity for persistent volumes +- MetalLB (for LoadBalancer services) +- Ingress NGINX (for external access) +- Cert-Manager (for TLS certificates) + +## Infrastructure Components + +The infrastructure components are managed by a separate Helm chart in the `infrastructure` directory. These components include: + +- MetalLB: For LoadBalancer services +- NVIDIA Device Plugin: For GPU support +- Ingress NGINX: For external access +- Cert-Manager: For TLS certificate management + +To install the infrastructure components: + +```bash +# Add required repositories +helm repo add metallb https://metallb.github.io/metallb +helm repo add nvidia https://nvidia.github.io/k8s-device-plugin +helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx +helm repo add jetstack https://charts.jetstack.io +helm repo update + +# Install infrastructure +helm dependency update ./helm/infrastructure +helm install infrastructure ./helm/infrastructure \ + --namespace infrastructure \ + --create-namespace +``` + +## Local Testing with Minikube + +For local testing, you can use the provided scripts to set up a Minikube environment: + +1. Make the scripts executable: +```bash +chmod +x scripts/setup-minikube.sh +chmod +x scripts/cleanup-minikube.sh +``` + +2. Run the setup script: +```bash +./scripts/setup-minikube.sh +``` + +3. Install the Atoma Node chart: +```bash +helm install atoma-node ./helm/atoma-node -f values-local.yaml -n atoma +``` + +4. To clean up: +```bash +./scripts/cleanup-minikube.sh +``` + +Note: The Minikube setup script will: +- Start Minikube with GPU support +- Install all required infrastructure components +- Configure MetalLB for LoadBalancer services +- Create necessary namespaces and secrets +- Generate a values file for local development + +## Installation + +1. Add the required Helm repositories: + +```bash +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo add grafana https://grafana.github.io/helm-charts +helm repo update +``` + +2. Create a values file for your environment (e.g., `my-values.yaml`): + +```yaml +# Example values file +atomaNode: + image: + repository: ghcr.io/atoma-network/atoma-node + tag: latest + + config: + environment: "production" + heartbeatUrl: "your-heartbeat-url" + # Add other configuration as needed + +vllm: + enabled: true + replicas: 8 # Adjust based on your GPU count + model: "your-model-name" + maxModelLen: 4096 + +sglang: + enabled: true + modelPath: "your-model-path" + +# Configure monitoring stack +prometheus: + enabled: true + server: + persistentVolume: + size: 50Gi + +grafana: + enabled: true + adminPassword: "your-secure-password" + +loki: + enabled: true + persistence: + size: 50Gi + +tempo: + enabled: true + persistence: + size: 50Gi +``` + +3. Create required secrets: + +```bash +# Create SUI config secret +kubectl create secret generic atoma-node-sui-config \ + --from-file=client.yaml=/path/to/sui/client.yaml \ + --from-file=sui.keystore=/path/to/sui/sui.keystore + +# Create SGLang secrets +kubectl create secret generic atoma-node-sglang-secrets \ + --from-literal=hf-token=your-huggingface-token +``` + +4. Install the chart: + +```bash +helm install atoma-node ./helm/atoma-node \ + -f my-values.yaml \ + --namespace atoma \ + --create-namespace +``` + +## Configuration + +### Atoma Node + +The Atoma Node can be configured through the `atomaNode` section in values: + +- `image`: Container image configuration +- `resources`: Resource requests and limits +- `service`: Service configuration +- `config`: Application configuration +- `persistence`: Storage configuration + +### VLLM + +VLLM inference servers can be configured through the `vllm` section: + +- `enabled`: Enable/disable VLLM deployment +- `replicas`: Number of VLLM instances +- `resources`: Resource configuration including GPU requests +- `model`: Model name to load +- `maxModelLen`: Maximum model context length + +### SGLang + +SGLang inference server can be configured through the `sglang` section: + +- `enabled`: Enable/disable SGLang deployment +- `resources`: Resource configuration including GPU requests +- `modelPath`: Path to the model + +### Monitoring Stack + +The monitoring stack includes: + +- Prometheus: Metrics collection +- Grafana: Metrics visualization +- Loki: Log aggregation +- Tempo: Distributed tracing + +Each component can be enabled/disabled and configured through their respective sections in values. + +## Accessing Services + +- Atoma Node API: `http://atoma-node:3000` +- Grafana: `http://grafana:3000` +- Prometheus: `http://prometheus:9090` +- Loki: `http://loki:3100` +- Tempo: `http://tempo:3200` + +## Upgrading + +To upgrade the deployment: + +```bash +helm upgrade atoma-node ./helm/atoma-node \ + -f my-values.yaml \ + --namespace atoma +``` + +## Uninstalling + +To uninstall the deployment: + +```bash +helm uninstall atoma-node --namespace atoma +``` + +## Troubleshooting + +1. Check pod status: +```bash +kubectl get pods -n atoma +``` + +2. Check pod logs: +```bash +kubectl logs -n atoma +``` + +3. Check persistent volume claims: +```bash +kubectl get pvc -n atoma +``` + +4. Check services: +```bash +kubectl get svc -n atoma +``` + +5. Check GPU availability: +```bash +kubectl describe node | grep nvidia.com/gpu +``` + +## Notes + +- Ensure your cluster has sufficient GPU resources for VLLM and SGLang +- Adjust resource requests and limits based on your cluster capacity +- Configure appropriate storage classes for persistent volumes +- Set up proper network policies for security +- Consider using an ingress controller for external access +- For local testing, use the provided Minikube scripts +- Make sure MetalLB is properly configured for LoadBalancer services diff --git a/helm/atoma-node/charts/grafana-7.0.0.tgz b/helm/atoma-node/charts/grafana-7.0.0.tgz new file mode 100644 index 00000000..1ec95bb7 Binary files /dev/null and b/helm/atoma-node/charts/grafana-7.0.0.tgz differ diff --git a/helm/atoma-node/charts/loki-5.41.3.tgz b/helm/atoma-node/charts/loki-5.41.3.tgz new file mode 100644 index 00000000..c44d5228 Binary files /dev/null and b/helm/atoma-node/charts/loki-5.41.3.tgz differ diff --git a/helm/atoma-node/charts/prometheus-25.0.0.tgz b/helm/atoma-node/charts/prometheus-25.0.0.tgz new file mode 100644 index 00000000..e82764bc Binary files /dev/null and b/helm/atoma-node/charts/prometheus-25.0.0.tgz differ diff --git a/helm/atoma-node/charts/tempo-1.5.0.tgz b/helm/atoma-node/charts/tempo-1.5.0.tgz new file mode 100644 index 00000000..58635bfb Binary files /dev/null and b/helm/atoma-node/charts/tempo-1.5.0.tgz differ diff --git a/helm/atoma-node/files/config.toml b/helm/atoma-node/files/config.toml new file mode 100644 index 00000000..c2312a62 --- /dev/null +++ b/helm/atoma-node/files/config.toml @@ -0,0 +1,86 @@ +[atoma_sui] +atoma_db = "0x02920289f426dd1f3c2572d613f7dc92be95041720864a73d44d65585530efc5" +atoma_package_id = "0x8903298ba49a8e83d438e014b2cfd18404324f3a0274b9507b520d5745b85208" +cursor_path = "./cursor.toml" +http_rpc_node_addr = "https://fullnode.testnet.sui.io:443" +limit = 100 +max_concurrent_requests = 10 +request_timeout = { secs = 300, nanos = 0 } +sui_config_path = "/root/.sui/sui_config/client.yaml" +sui_keystore_path = "/root/.sui/sui_config/sui.keystore" +usdc_package_id = "0xa1ec7fc00a6f40db9693ad1415d0c193ad3906494428cf252621037bd7117e29" + +[node] +address = "0.0.0.0" +port = 3000 + +[daemon] +address = "0.0.0.0" +port = 3001 + +[p2p] +address = "0.0.0.0" +port = 4001 + +[database] +url = "postgresql://atoma_node_dev:dev_password@atoma-node-dev-postgresql:5432/atoma_node_dev" + +[logging] +level = "debug" + +[inference_services] +vllm_endpoint = "http://atoma-node-dev-vllm:8000" +sglang_endpoint = "http://atoma-node-dev-sglang:30000" + +[atoma_state] +database_url = "postgresql://atoma_node_dev:dev_password@atoma-node-dev-postgresql:5432/atoma_node_dev" + +[atoma_state.metrics_collection] +metrics_url = "http://prometheus:9090" +models = [ + [ + "Chat Completions", + "Llama-3.2-3B-Instruct", + ], +] +top_k = 10 + +[atoma_service] +chat_completions_service_urls = { "mistralai/Mistral-Nemo-Instruct-2407" = "http://atoma-node-dev-vllm-0:8000" } +service_bind_address = "0.0.0.0:8080" +password = "password" +open_router_models_file = "/app/open_router.json" +models = [ "Infermatic/Llama-3.3-70B-Instruct-FP8-Dynamic", "mistralai/Mistral-Nemo-Instruct-2407", "deepseek-ai/DeepSeek-V3-0324" ] +revisions = ["main", "main", "main"] +hf_token = "${HF_TOKEN}" +modalities = [["Chat Completions"], ["Chat Completions"], ["Chat Completions"]] +heartbeat_url = "https://hc-ping.com/6981b450-3453-40f9-8b53-4f95a1f30e30" +sentry_dsn = "https://bf63999f90e261523847f4474f4d510b@o4509203694419968.ingest.us.sentry.io/4509203836370944" +environment = "development" + +[atoma_proxy_service] +service_bind_address = "0.0.0.0:8081" +grafana_url = "http://213.130.147.75:3000" +grafana_api_token = "${GRAFANA_API_TOKEN}" +grafana_dashboard_tag = "proxy" +grafana_stats_tag = "stats" + +[atoma_auth] +secret_key = "atoma_auth123" +access_token_lifetime = 1 +refresh_token_lifetime = 1 +google_client_id = "135471414073-41r9t89rejgfr6bc9aptjpm75o4oedk2.apps.googleusercontent.com" + +[atoma_p2p] +heartbeat_interval = { secs = 30, nanos = 0 } +idle_connection_timeout = { secs = 60, nanos = 0 } +listen_addrs = [ + "/ip4/0.0.0.0/tcp/8083", + "/ip4/0.0.0.0/udp/8083/quic-v1", +] +bootstrap_node_addrs = [ + "/ip4/213.130.147.75/tcp/8083", + "/ip4/213.130.147.75/udp/8083/quic-v1", +] +metrics_endpoints = {} +local_key = "/app/data/local_key" diff --git a/helm/atoma-node/templates/atoma-node-deployment.yaml b/helm/atoma-node/templates/atoma-node-deployment.yaml new file mode 100644 index 00000000..1187392e --- /dev/null +++ b/helm/atoma-node/templates/atoma-node-deployment.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-atoma-node + labels: + app: atoma-node + release: {{ .Release.Name }} +spec: + replicas: 1 + selector: + matchLabels: + app: atoma-node + release: {{ .Release.Name }} + template: + metadata: + labels: + app: atoma-node + release: {{ .Release.Name }} + spec: + containers: + - name: atoma-node + image: "{{ .Values.atomaNode.image.repository }}:{{ .Values.atomaNode.image.tag }}" + imagePullPolicy: {{ .Values.atomaNode.image.pullPolicy }} + ports: + - name: service + containerPort: {{ .Values.atomaNode.service.port }} + - name: daemon + containerPort: {{ .Values.atomaNode.service.daemonPort }} + - name: p2p + containerPort: {{ .Values.atomaNode.service.p2pPort }} + resources: + {{- toYaml .Values.atomaNode.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ .Release.Name }}-env + volumeMounts: + - name: config + mountPath: /app/config.toml + subPath: config.toml + - name: data + mountPath: /app/data + - name: logs + mountPath: /app/logs + - name: sui-config + mountPath: /root/.sui/sui_config + volumes: + - name: config + configMap: + name: {{ .Release.Name }}-config + - name: data + emptyDir: {} + - name: logs + emptyDir: {} + - name: sui-config + configMap: + name: {{ .Release.Name }}-sui-config \ No newline at end of file diff --git a/helm/atoma-node/templates/configmap.yaml b/helm/atoma-node/templates/configmap.yaml new file mode 100644 index 00000000..fc715914 --- /dev/null +++ b/helm/atoma-node/templates/configmap.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-config + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} +data: + config.toml: |- +{{- if .Values.atomaNode.config.content }} +{{ .Values.atomaNode.config.content | indent 4 }} +{{- else }} +{{ .Files.Get "files/config.toml" | indent 4 }} +{{- end }} + environment: {{ .Values.atomaNode.config.environment | quote }} + log_level: {{ .Values.atomaNode.config.logLevel | quote }} diff --git a/helm/atoma-node/templates/env-configmap.yaml b/helm/atoma-node/templates/env-configmap.yaml new file mode 100644 index 00000000..df0543f9 --- /dev/null +++ b/helm/atoma-node/templates/env-configmap.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-env + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} +data: + .env: |- +{{- range $key, $value := .Values.atomaNode.env }} +{{ $key }}={{ $value }} +{{- end }} \ No newline at end of file diff --git a/helm/atoma-node/templates/ingress.yaml b/helm/atoma-node/templates/ingress.yaml new file mode 100644 index 00000000..a56f7d3f --- /dev/null +++ b/helm/atoma-node/templates/ingress.yaml @@ -0,0 +1,53 @@ +{{- if .Values.atomaNode.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ .Release.Name }}-ingress + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} + {{- with .Values.atomaNode.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + ingressClassName: {{ .Values.atomaNode.ingress.className }} + tls: + - hosts: + {{- range .Values.atomaNode.ingress.hosts }} + - {{ .host }} + {{- end }} + secretName: {{ .Release.Name }}-tls + rules: + {{- range .Values.atomaNode.ingress.hosts }} + - host: {{ .host }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: {{ $.Release.Name }}-atoma-node + port: + number: {{ $.Values.atomaNode.service.port }} + {{- if $.Values.vllm.enabled }} + - path: /vllm + pathType: Prefix + backend: + service: + name: {{ $.Release.Name }}-vllm-0 + port: + number: {{ $.Values.vllm.service.port }} + {{- end }} + {{- if $.Values.sglang.enabled }} + - path: /sglang + pathType: Prefix + backend: + service: + name: {{ $.Release.Name }}-sglang + port: + number: {{ $.Values.sglang.service.port }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atoma-node/templates/pvc.yaml b/helm/atoma-node/templates/pvc.yaml new file mode 100644 index 00000000..d19c0b7a --- /dev/null +++ b/helm/atoma-node/templates/pvc.yaml @@ -0,0 +1,17 @@ +{{- if .Values.atomaNode.persistence.enabled }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ .Release.Name }}-atoma-node-data + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} +spec: + accessModes: + - {{ .Values.atomaNode.persistence.accessMode }} + resources: + requests: + storage: {{ .Values.atomaNode.persistence.size }} + storageClassName: {{ .Values.atomaNode.persistence.storageClass }} +{{- end }} diff --git a/helm/atoma-node/templates/services.yaml b/helm/atoma-node/templates/services.yaml new file mode 100644 index 00000000..285a684f --- /dev/null +++ b/helm/atoma-node/templates/services.yaml @@ -0,0 +1,39 @@ +{{- if .Values.vllm.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-vllm + labels: + app: vllm + release: {{ .Release.Name }} +spec: + type: ClusterIP + ports: + - port: {{ .Values.vllm.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + app: vllm + release: {{ .Release.Name }} +--- +{{- end }} +{{- if .Values.sglang.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-sglang + labels: + app: sglang + release: {{ .Release.Name }} +spec: + type: {{ .Values.sglang.service.type | default "ClusterIP" }} + ports: + - port: {{ .Values.sglang.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + app: sglang + release: {{ .Release.Name }} +{{- end }} diff --git a/helm/atoma-node/templates/sglang-deployment.yaml b/helm/atoma-node/templates/sglang-deployment.yaml new file mode 100644 index 00000000..d8bd0e29 --- /dev/null +++ b/helm/atoma-node/templates/sglang-deployment.yaml @@ -0,0 +1,67 @@ +{{- if .Values.sglang.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-sglang + labels: + app: sglang + release: {{ .Release.Name }} +spec: + replicas: 1 + selector: + matchLabels: + app: sglang + release: {{ .Release.Name }} + template: + metadata: + labels: + app: sglang + release: {{ .Release.Name }} + spec: + containers: + - name: sglang + image: "{{ .Values.sglang.image.repository }}:{{ .Values.sglang.image.tag }}" + imagePullPolicy: {{ .Values.sglang.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.sglang.service.port }} + resources: + {{- toYaml .Values.sglang.resources | nindent 12 }} + env: + - name: CUDA_VISIBLE_DEVICES + value: "0" + {{- with .Values.sglang.extraEnv }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.sglang.model.args }} + args: + {{- toYaml .Values.sglang.model.args | nindent 12 }} + {{- else }} + command: ["python", "-m", "sglang.launch_server"] + args: + - "--model-path" + - "{{ .Values.sglang.model.name }}" + - "--host" + - "0.0.0.0" + - "--port" + - "{{ .Values.sglang.service.port }}" + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-sglang + labels: + app: sglang + release: {{ .Release.Name }} +spec: + type: {{ .Values.sglang.service.type | default "ClusterIP" }} + ports: + - port: {{ .Values.sglang.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + app: sglang + release: {{ .Release.Name }} +{{- end }} diff --git a/helm/atoma-node/templates/sui-config-configmap.yaml b/helm/atoma-node/templates/sui-config-configmap.yaml new file mode 100644 index 00000000..312a6fed --- /dev/null +++ b/helm/atoma-node/templates/sui-config-configmap.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-sui-config + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} +data: + client.yaml: |- +{{ .Files.Get "files/sui_config/client.yaml" | indent 4 }} + sui.keystore: |- +{{ .Files.Get "files/sui_config/sui.keystore" | indent 4 }} + sui.aliases: |- +{{ .Files.Get "files/sui_config/sui.aliases" | indent 4 }} \ No newline at end of file diff --git a/helm/atoma-node/templates/vllm-deployment.yaml b/helm/atoma-node/templates/vllm-deployment.yaml new file mode 100644 index 00000000..0bdcecc1 --- /dev/null +++ b/helm/atoma-node/templates/vllm-deployment.yaml @@ -0,0 +1,73 @@ +{{- if .Values.vllm.enabled }} +{{- range $i, $e := until (.Values.vllm.replicas | int) }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ $.Release.Name }}-vllm-{{ $i }} + labels: + app: vllm + instance: "{{ $i }}" + release: {{ $.Release.Name }} +spec: + replicas: 1 + selector: + matchLabels: + app: vllm + instance: "{{ $i }}" + release: {{ $.Release.Name }} + template: + metadata: + labels: + app: vllm + instance: "{{ $i }}" + release: {{ $.Release.Name }} + spec: + containers: + - name: vllm + image: "{{ $.Values.vllm.image.repository }}:{{ $.Values.vllm.image.tag }}" + imagePullPolicy: {{ $.Values.vllm.image.pullPolicy }} + ports: + - name: http + containerPort: {{ $.Values.vllm.service.port }} + resources: + {{- toYaml $.Values.vllm.resources | nindent 12 }} + env: + - name: VLLM_ATTENTION_BACKEND + value: "FLASH_ATTN" + - name: VLLM_FLASH_ATTN_VERSION + value: "3" + - name: VLLM_USE_V1 + value: "1" + - name: CUDA_VISIBLE_DEVICES + value: "{{ $i }}" + {{- with $.Values.vllm.extraEnv }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if $.Values.vllm.model.args }} + args: + {{- toYaml $.Values.vllm.model.args | nindent 12 }} + {{- else }} + command: {{ $.Values.vllm.command | default (list "--model" $.Values.vllm.model "--max-model-len" $.Values.vllm.maxModelLen) }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ $.Release.Name }}-vllm-{{ $i }} + labels: + app: vllm + instance: "{{ $i }}" + release: {{ $.Release.Name }} +spec: + type: {{ $.Values.vllm.service.type | default "ClusterIP" }} + ports: + - port: {{ $.Values.vllm.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + app: vllm + instance: "{{ $i }}" + release: {{ $.Release.Name }} +{{- end }} +{{- end }} diff --git a/helm/atoma-node/values-dev.yaml b/helm/atoma-node/values-dev.yaml new file mode 100644 index 00000000..45066b34 --- /dev/null +++ b/helm/atoma-node/values-dev.yaml @@ -0,0 +1,246 @@ +# Development environment settings +global: + environment: development + domain: atoma.network + +# Main application settings +atomaNode: + image: + repository: ghcr.io/atoma-network/atoma-node + tag: latest + pullPolicy: Always + replicas: 1 + resources: + requests: + memory: "4Gi" + cpu: "2" + limits: + memory: "8Gi" + cpu: "4" + service: + port: 3000 + daemonPort: 3001 + p2pPort: 4001 + ingress: + enabled: true + className: "nginx" + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-staging" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + hosts: + - host: node-dev.atoma.network + paths: + - path: / + pathType: Prefix + service: main + - host: vllm-dev.atoma.network + paths: + - path: / + pathType: Prefix + service: vllm + - host: sglang-dev.atoma.network + paths: + - path: / + pathType: Prefix + service: sglang + persistence: + enabled: true + storageClass: "gp2" + size: 10Gi + accessMode: ReadWriteOnce + config: + environment: "development" + logLevel: "debug" + # Atoma node configuration will be read from files/config.toml + extraEnv: + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://otel-collector:4317 + - name: RUST_LOG + value: "info" + +# VLLM settings for Mistral-Nemo +vllm: + enabled: true + replicas: 1 + image: + repository: vllm/vllm-openai + tag: v0.6.2 + pullPolicy: Always + resources: + requests: + memory: "16Gi" + cpu: "4" + nvidia.com/gpu: 1 + limits: + memory: "32Gi" + cpu: "8" + nvidia.com/gpu: 1 + service: + port: 8000 + model: + name: "mistralai/Mistral-Nemo-Instruct-2407" + args: + - "--model" + - "mistralai/Mistral-Nemo-Instruct-2407" + - "--served-model-name" + - "mistral-nemo" + - "--host" + - "0.0.0.0" + - "--port" + - "8000" + - "--tensor-parallel-size" + - "1" + - "--gpu-memory-utilization" + - "0.9" + - "--max-model-len" + - "8192" + extraEnv: + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://otel-collector:4317 + - name: OTEL_SERVICE_NAME + value: vllm + - name: OTEL_LOGS_EXPORTER + value: otlp + - name: HF_TOKEN + value: "${HF_TOKEN}" + +# SGLang settings for Mistral-Nemo +sglang: + enabled: true + image: + repository: lmsysorg/sglang + tag: v0.3.5.post1 + pullPolicy: Always + resources: + requests: + memory: "32Gi" + cpu: "8" + nvidia.com/gpu: 1 + limits: + memory: "64Gi" + cpu: "16" + nvidia.com/gpu: 1 + service: + port: 30000 + type: ClusterIP + model: + name: "mistralai/Mistral-Nemo-Instruct-2407" + args: + - "--model-path" + - "mistralai/Mistral-Nemo-Instruct-2407" + - "--host" + - "0.0.0.0" + - "--port" + - "30000" + - "--tp-size" + - "1" + - "--mem-fraction-static" + - "0.9" + extraEnv: + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://otel-collector:4317 + - name: OTEL_SERVICE_NAME + value: sglang + - name: OTEL_LOGS_EXPORTER + value: otlp + - name: HF_TOKEN + value: "${HF_TOKEN}" + +# PostgreSQL settings +postgresql: + enabled: true + auth: + database: atoma_node_dev + username: atoma_node_dev + password: "dev_password" + primary: + persistence: + size: 5Gi + service: + ports: + postgresql: 5432 + +# Monitoring stack settings +prometheus: + enabled: true + server: + persistentVolume: + size: 5Gi + ingress: + enabled: true + className: "nginx" + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-staging" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + hosts: + - prometheus-dev.atoma.network + service: + type: LoadBalancer + servicePort: 9090 + targetPort: 9090 + +grafana: + enabled: true + persistence: + size: 5Gi + adminUser: admin + adminPassword: admin123 + ingress: + enabled: true + hosts: + - grafana-dev.atoma.network + service: + type: LoadBalancer + port: 3000 + targetPort: 3000 + annotations: + metallb.universe.tf/address-pool: grafana-pool + +loki: + enabled: true + deploymentMode: SingleBinary + loki: + auth_enabled: false + server: + http_listen_port: 3100 + common: + path_prefix: /var/loki + replication_factor: 1 + ring: + kvstore: + store: inmemory + schema_config: + configs: + - from: 2020-10-24 + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h + storage_config: + boltdb_shipper: + active_index_directory: /var/loki/index + shared_store: filesystem + filesystem: + directory: /var/loki/chunks + singleBinary: + replicas: 1 + persistence: + enabled: true + size: 10Gi + write: + replicas: 0 + read: + replicas: 0 + backend: + replicas: 0 + +tempo: + enabled: true + persistence: + size: 5Gi + ingress: + enabled: true + hosts: + - tempo-dev.atoma.network diff --git a/helm/atoma-node/values.yaml b/helm/atoma-node/values.yaml new file mode 100644 index 00000000..c15817e8 --- /dev/null +++ b/helm/atoma-node/values.yaml @@ -0,0 +1,179 @@ +# Global settings +global: + environment: production + domain: atoma.network + +# Main application settings +atomaNode: + image: + repository: ghcr.io/atoma-network/atoma-node + tag: latest + pullPolicy: IfNotPresent + replicas: 1 + resources: + requests: + memory: "4Gi" + cpu: "2" + limits: + memory: "8Gi" + cpu: "4" + service: + main: + port: 3000 + daemon: + port: 3001 + p2p: + port: 4001 + ingress: + enabled: true + className: "nginx" + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-prod" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + hosts: + - host: node.atoma.network + paths: + - path: / + pathType: Prefix + service: main + - host: vllm.atoma.network + paths: + - path: / + pathType: Prefix + service: vllm + - host: sglang.atoma.network + paths: + - path: / + pathType: Prefix + service: sglang + persistence: + enabled: true + storageClass: "standard" + size: 10Gi + accessMode: ReadWriteOnce + +# VLLM settings +vllm: + enabled: true + replicas: 8 + image: + repository: vllm/vllm-openai + tag: v0.8.1 + pullPolicy: IfNotPresent + resources: + requests: + memory: "16Gi" + cpu: "4" + nvidia.com/gpu: 1 + limits: + memory: "32Gi" + cpu: "8" + nvidia.com/gpu: 1 + service: + port: 8000 + +# SGLang settings +sglang: + enabled: true + image: + repository: lmsysorg/sglang + tag: latest + pullPolicy: IfNotPresent + resources: + requests: + memory: "32Gi" + cpu: "8" + nvidia.com/gpu: 8 + limits: + memory: "64Gi" + cpu: "16" + nvidia.com/gpu: 8 + service: + port: 3000 + +# PostgreSQL settings +postgresql: + enabled: true + auth: + database: atoma_node + username: atoma_node + password: "" + primary: + persistence: + size: 10Gi + service: + port: 5432 + +# Monitoring stack settings +prometheus: + enabled: true + server: + persistentVolume: + size: 10Gi + alertmanager: + persistentVolume: + size: 2Gi + +grafana: + adminUser: admin + adminPassword: admin + enabled: true + persistence: + size: 10Gi + ingress: + enabled: true + hosts: + - grafana.atoma.network + +loki: + enabled: true + persistence: + size: 10Gi + ingress: + enabled: true + hosts: + - loki.atoma.network + +tempo: + enabled: true + persistence: + size: 10Gi + ingress: + enabled: true + hosts: + - tempo.atoma.network + +# OpenTelemetry Collector settings +otelCollector: + enabled: true + config: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + processors: + batch: {} + exporters: + prometheus: + endpoint: prometheus-server:9090 + loki: + endpoint: loki:3100 + otlp: + endpoint: tempo:4317 + service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [otlp] + metrics: + receivers: [otlp] + processors: [batch] + exporters: [prometheus] + logs: + receivers: [otlp] + processors: [batch] + exporters: [loki] diff --git a/helm/infrastructure/Chart.yaml b/helm/infrastructure/Chart.yaml new file mode 100644 index 00000000..ca58e447 --- /dev/null +++ b/helm/infrastructure/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: infrastructure +description: Infrastructure components for Atoma Node deployment +type: application +version: 0.1.0 +appVersion: "1.0.0" +dependencies: + - name: metallb + version: "0.13.10" + repository: https://metallb.github.io/metallb + condition: metallb.enabled + - name: nvidia-device-plugin + version: "0.14.0" + repository: https://nvidia.github.io/k8s-device-plugin + condition: nvidiaDevicePlugin.enabled + - name: ingress-nginx + version: "4.7.1" + repository: https://kubernetes.github.io/ingress-nginx + condition: ingressNginx.enabled + - name: cert-manager + version: "v1.13.3" + repository: https://charts.jetstack.io + condition: certManager.enabled diff --git a/helm/infrastructure/metallb-config.yaml b/helm/infrastructure/metallb-config.yaml new file mode 100644 index 00000000..0f93290f --- /dev/null +++ b/helm/infrastructure/metallb-config.yaml @@ -0,0 +1,32 @@ +apiVersion: metallb.io/v1beta1 +kind: IPAddressPool +metadata: + name: traefik-pool + namespace: metallb-system +spec: + addresses: + - 10.0.235.50/32 +--- +apiVersion: metallb.io/v1beta1 +kind: IPAddressPool +metadata: + name: prometheus-pool + namespace: metallb-system +spec: + addresses: + - 10.0.235.51/32 +--- +apiVersion: metallb.io/v1beta1 +kind: L2Advertisement +metadata: + name: l2 + namespace: metallb-system +--- +apiVersion: metallb.io/v1beta1 +kind: IPAddressPool +metadata: + name: grafana-pool + namespace: metallb-system +spec: + addresses: + - 10.0.235.52/32 diff --git a/helm/infrastructure/values.yaml b/helm/infrastructure/values.yaml new file mode 100644 index 00000000..3cce875d --- /dev/null +++ b/helm/infrastructure/values.yaml @@ -0,0 +1,51 @@ +# MetalLB Configuration +metallb: + enabled: true + configInline: + address-pools: + - name: default + protocol: layer2 + addresses: + - 192.168.1.240-192.168.1.250 # Adjust based on your network + +# NVIDIA Device Plugin Configuration +nvidiaDevicePlugin: + enabled: true + runtimeClassName: nvidia + resources: + requests: + cpu: "100m" + memory: "100Mi" + limits: + cpu: "200m" + memory: "200Mi" + +# Ingress NGINX Configuration +ingressNginx: + enabled: true + controller: + service: + type: LoadBalancer + resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "200m" + memory: "256Mi" + metrics: + enabled: true + serviceMonitor: + enabled: true + +# Cert-Manager Configuration +certManager: + enabled: true + installCRDs: true + resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "200m" + memory: "256Mi" diff --git a/scripts/cleanup-minikube.sh b/scripts/cleanup-minikube.sh new file mode 100644 index 00000000..b9816ea4 --- /dev/null +++ b/scripts/cleanup-minikube.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Exit on error +set -e + +echo "Cleaning up Minikube environment..." + +# Delete Atoma Node release +echo "Deleting Atoma Node release..." +helm uninstall atoma-node -n atoma || true + +# Delete infrastructure release +echo "Deleting infrastructure release..." +helm uninstall infrastructure -n infrastructure || true + +# Delete namespaces +echo "Deleting namespaces..." +kubectl delete namespace atoma || true +kubectl delete namespace infrastructure || true + +# Stop Minikube +echo "Stopping Minikube..." +minikube stop + +# Delete Minikube cluster +echo "Deleting Minikube cluster..." +minikube delete + +echo "Cleanup complete!" \ No newline at end of file diff --git a/scripts/setup-minikube.sh b/scripts/setup-minikube.sh new file mode 100644 index 00000000..fa6f1123 --- /dev/null +++ b/scripts/setup-minikube.sh @@ -0,0 +1,141 @@ +#!/bin/bash + +# Exit on error +set -e + +# Check if minikube is installed +if ! command -v minikube &> /dev/null; then + echo "Minikube is not installed. Please install it first." + exit 1 +fi + +# Check if docker is installed +if ! command -v docker &> /dev/null; then + echo "Docker is not installed. Please install it first." + exit 1 +fi + +# Check if helm is installed +if ! command -v helm &> /dev/null; then + echo "Helm is not installed. Please install it first." + exit 1 +fi + +# Start minikube with GPU support +echo "Starting Minikube with GPU support..." +minikube start \ + --driver=docker \ + --container-runtime=containerd \ + --feature-gates=DevicePlugins=true \ + --addons=ingress \ + --cpus=8 \ + --memory=16384 \ + --gpus=1 + +# Enable GPU support +echo "Enabling GPU support..." +minikube ssh "sudo nvidia-smi" + +# Add required Helm repositories +echo "Adding Helm repositories..." +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo add grafana https://grafana.github.io/helm-charts +helm repo add metallb https://metallb.github.io/metallb +helm repo add nvidia https://nvidia.github.io/k8s-device-plugin +helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx +helm repo add jetstack https://charts.jetstack.io +helm repo update + +# Install infrastructure components +echo "Installing infrastructure components..." +helm dependency update ./helm/infrastructure +helm install infrastructure ./helm/infrastructure \ + --namespace infrastructure \ + --create-namespace \ + --wait + +# Configure MetalLB for Minikube +echo "Configuring MetalLB..." +kubectl apply -f - < values-local.yaml <