From e9f7914a6a3e2d64109ae6e4431ef5f862e3df44 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Mon, 14 Jul 2025 17:05:47 -0400 Subject: [PATCH 01/14] add bootnodes to cmd; impl kad actions and handlers; worker upload to dht on startup --- .env.example | 4 +- Cargo.lock | 1 + Makefile | 22 +-- crates/orchestrator/src/main.rs | 5 + crates/orchestrator/src/p2p/mod.rs | 3 +- crates/p2p/Cargo.toml | 1 + crates/p2p/src/behaviour.rs | 47 +++++- crates/p2p/src/discovery.rs | 122 +++++++++++++++ crates/p2p/src/lib.rs | 60 +++++++- crates/shared/src/p2p/service.rs | 31 +++- crates/validator/src/main.rs | 234 ++++++++++++++++++++++------- crates/validator/src/p2p/mod.rs | 12 +- crates/worker/src/cli/command.rs | 203 ++++++++++++++++++------- crates/worker/src/p2p/mod.rs | 59 +++++--- 14 files changed, 637 insertions(+), 167 deletions(-) create mode 100644 crates/p2p/src/discovery.rs diff --git a/.env.example b/.env.example index 7555ff66..0b7c4e08 100644 --- a/.env.example +++ b/.env.example @@ -14,6 +14,8 @@ WORK_VALIDATION_CONTRACT=0x0B306BF915C4d645ff596e518fAf3F9669b97016 # Discovery # --------- DISCOVERY_URLS=http://localhost:8089 +ORCHESTRATOR_P2P_ADDRESS=/ip4/127.0.0.1/tcp/4004/p2p/12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL +ORCHESTRATOR_LIBP2P_PRIVATE_KEY="d0884c9823a0a2c846dbf5e71853bc5f80b2ec5d2de46532cdbe8ab46f020836845c655bb6fb3fd7f45d09a9ab687656606e8e2a841bf0f9cb376c618e6a3887" # Accounts (Anvil Test Accounts - DO NOT USE IN PRODUCTION) # --------------------------------------------------------- @@ -62,4 +64,4 @@ VALIDATOR_API_KEY=prime # Webhooks (Optional) # ------------------- -WEBHOOK_CONFIGS='[]' \ No newline at end of file +WEBHOOK_CONFIGS='[]' diff --git a/Cargo.lock b/Cargo.lock index 67fc79bd..998525cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6215,6 +6215,7 @@ name = "p2p" version = "0.3.11" dependencies = [ "anyhow", + "hex", "libp2p", "log", "nalgebra", diff --git a/Makefile b/Makefile index decd07f6..b75ea346 100644 --- a/Makefile +++ b/Makefile @@ -76,19 +76,19 @@ up: @tmux set -t prime-dev pane-border-format " #{pane_title} " @# Start Worker pane first (pane 0) @tmux select-pane -t prime-dev:services.0 -T "Worker" - @# Discovery pane (pane 1) + # @# Discovery pane (pane 1) + # @tmux split-window -h -t prime-dev:services + # @tmux select-pane -t prime-dev:services.1 -T "Discovery" + # @tmux send-keys -t prime-dev:services.1 'make watch-discovery' C-m + @# Orchestrator pane (pane 1) @tmux split-window -h -t prime-dev:services - @tmux select-pane -t prime-dev:services.1 -T "Discovery" - @tmux send-keys -t prime-dev:services.1 'make watch-discovery' C-m + @tmux select-pane -t prime-dev:services.1 -T "Orchestrator" + @tmux send-keys -t prime-dev:services.1 'make watch-orchestrator' C-m + @tmux select-layout -t prime-dev:services even-horizontal @# Validator pane (pane 2) @tmux split-window -h -t prime-dev:services.1 @tmux select-pane -t prime-dev:services.2 -T "Validator" @tmux send-keys -t prime-dev:services.2 'make watch-validator' C-m - @# Orchestrator pane (pane 3) - @tmux split-window -h -t prime-dev:services.2 - @tmux select-pane -t prime-dev:services.3 -T "Orchestrator" - @tmux send-keys -t prime-dev:services.3 'make watch-orchestrator' C-m - @tmux select-layout -t prime-dev:services even-horizontal @# Create background window for docker logs @tmux new-window -t prime-dev -n background @tmux send-keys -t prime-dev:background 'docker compose logs -f reth redis' C-m @@ -121,7 +121,7 @@ watch-discovery: watch-worker: set -a; source ${ENV_FILE}; set +a; \ - cargo watch -w crates/worker/src -x "run --bin worker -- run --port 8091 --discovery-url $${DISCOVERY_URLS:-$${DISCOVERY_URL:-http://localhost:8089}} --compute-pool-id $$WORKER_COMPUTE_POOL_ID --skip-system-checks $${LOKI_URL:+--loki-url $${LOKI_URL}} --log-level $${LOG_LEVEL:-info}" + cargo watch -w crates/worker/src -x "run --bin worker -- run --bootnodes $${ORCHESTRATOR_P2P_ADDRESS} --port 8091 --discovery-url $${DISCOVERY_URLS:-$${DISCOVERY_URL:-http://localhost:8089}} --compute-pool-id $$WORKER_COMPUTE_POOL_ID --skip-system-checks $${LOKI_URL:+--loki-url $${LOKI_URL}} --log-level $${LOG_LEVEL:-info}" watch-worker-two: set -a; source ${ENV_FILE}; set +a; \ @@ -132,11 +132,11 @@ watch-check: watch-validator: set -a; source ${ENV_FILE}; set +a; \ - cargo watch -w crates/validator/src -x "run --bin validator -- --validator-key $${PRIVATE_KEY_VALIDATOR} --rpc-url $${RPC_URL} --discovery-urls $${DISCOVERY_URLS:-$${DISCOVERY_URL:-http://localhost:8089}} --pool-id $${WORKER_COMPUTE_POOL_ID} $${BUCKET_NAME:+--bucket-name $${BUCKET_NAME}} -l $${LOG_LEVEL:-info} --toploc-grace-interval $${TOPLOC_GRACE_INTERVAL:-30} --incomplete-group-grace-period-minutes $${INCOMPLETE_GROUP_GRACE_PERIOD_MINUTES:-1} --use-grouping" + cargo watch -w crates/validator/src -x "run --bin validator -- --bootnodes $${ORCHESTRATOR_P2P_ADDRESS} --validator-key $${PRIVATE_KEY_VALIDATOR} --rpc-url $${RPC_URL} --discovery-urls $${DISCOVERY_URLS:-$${DISCOVERY_URL:-http://localhost:8089}} --pool-id $${WORKER_COMPUTE_POOL_ID} $${BUCKET_NAME:+--bucket-name $${BUCKET_NAME}} -l $${LOG_LEVEL:-info} --toploc-grace-interval $${TOPLOC_GRACE_INTERVAL:-30} --incomplete-group-grace-period-minutes $${INCOMPLETE_GROUP_GRACE_PERIOD_MINUTES:-1} --use-grouping" watch-orchestrator: set -a; source ${ENV_FILE}; set +a; \ - cargo watch -w crates/orchestrator/src -x "run --bin orchestrator -- -r $$RPC_URL -k $$POOL_OWNER_PRIVATE_KEY -d 0 -p 8090 -i 10 -u http://localhost:8090 --discovery-urls $${DISCOVERY_URLS:-$${DISCOVERY_URL:-http://localhost:8089}} --compute-pool-id $$WORKER_COMPUTE_POOL_ID $${BUCKET_NAME:+--bucket-name $$BUCKET_NAME} -l $${LOG_LEVEL:-info} --hourly-s3-upload-limit $${HOURLY_S3_LIMIT:-3} --max-healthy-nodes-with-same-endpoint $${MAX_HEALTHY_NODES_WITH_SAME_ENDPOINT:-2}" + cargo watch -w crates/orchestrator/src -x "run --bin orchestrator -- -r $$RPC_URL -k $$POOL_OWNER_PRIVATE_KEY -d 0 -p 8090 -i 10 -u http://localhost:8090 --libp2p-private-key $${ORCHESTRATOR_LIBP2P_PRIVATE_KEY} --discovery-urls $${DISCOVERY_URLS:-$${DISCOVERY_URL:-http://localhost:8089}} --compute-pool-id $$WORKER_COMPUTE_POOL_ID $${BUCKET_NAME:+--bucket-name $$BUCKET_NAME} -l $${LOG_LEVEL:-info} --hourly-s3-upload-limit $${HOURLY_S3_LIMIT:-3} --max-healthy-nodes-with-same-endpoint $${MAX_HEALTHY_NODES_WITH_SAME_ENDPOINT:-2}" build-worker: cargo build --release --bin worker diff --git a/crates/orchestrator/src/main.rs b/crates/orchestrator/src/main.rs index 5f8e2af2..686392f7 100644 --- a/crates/orchestrator/src/main.rs +++ b/crates/orchestrator/src/main.rs @@ -96,6 +96,11 @@ struct Args { /// Libp2p port #[arg(long, default_value = "4004")] libp2p_port: u16, + + /// Hex-encoded libp2p private key. + /// A new key is generated if this is not provided. + #[arg(long)] + libp2p_private_key: Option, } #[tokio::main] diff --git a/crates/orchestrator/src/p2p/mod.rs b/crates/orchestrator/src/p2p/mod.rs index c11ca2bf..836eaca9 100644 --- a/crates/orchestrator/src/p2p/mod.rs +++ b/crates/orchestrator/src/p2p/mod.rs @@ -32,9 +32,10 @@ impl Service { let (invite_tx, invite_rx) = tokio::sync::mpsc::channel(100); let (get_task_logs_tx, get_task_logs_rx) = tokio::sync::mpsc::channel(100); let (restart_task_tx, restart_task_rx) = tokio::sync::mpsc::channel(100); - let (inner, outgoing_message_tx) = P2PService::new( + let (inner, outgoing_message_tx, kademlia_action_tx) = P2PService::new( keypair, port, + vec![], cancellation_token.clone(), wallet, Protocols::new() diff --git a/crates/p2p/Cargo.toml b/crates/p2p/Cargo.toml index 498fbd29..651902a4 100644 --- a/crates/p2p/Cargo.toml +++ b/crates/p2p/Cargo.toml @@ -14,6 +14,7 @@ tokio = {workspace = true, features = ["sync"]} tokio-util = { workspace = true, features = ["rt"] } tracing = { workspace = true } log = { workspace = true } +hex = {workspace = true} [lints] workspace = true diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index 76f959e8..c458f1cb 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -5,15 +5,19 @@ use libp2p::connection_limits; use libp2p::connection_limits::ConnectionLimits; use libp2p::identify; use libp2p::identity; -use libp2p::kad; -// use libp2p::kad::store::MemoryStore; +use libp2p::kad::store::MemoryStore; +use libp2p::kad::{self, QueryId}; use libp2p::mdns; use libp2p::ping; use libp2p::request_response; use libp2p::swarm::NetworkBehaviour; use log::debug; +use std::collections::HashMap; +use std::sync::Arc; use std::time::Duration; +use tokio::sync::Mutex; +use crate::discovery::OngoingKademliaQuery; use crate::message::IncomingMessage; use crate::message::{Request, Response}; use crate::Protocols; @@ -29,8 +33,7 @@ pub(crate) struct Behaviour { // discovery mdns: mdns::tokio::Behaviour, - // comment out kademlia for now as it requires bootnodes to be provided - // kademlia: kad::Behaviour, + kademlia: kad::Behaviour, // protocols identify: identify::Behaviour, @@ -116,7 +119,7 @@ impl Behaviour { let mdns = mdns::tokio::Behaviour::new(mdns::Config::default(), peer_id) .context("failed to create mDNS behaviour")?; - // let kademlia = kad::Behaviour::new(peer_id, MemoryStore::new(peer_id)); + let kademlia = kad::Behaviour::new(peer_id, MemoryStore::new(peer_id)); let identify = identify::Behaviour::new( identify::Config::new(PRIME_STREAM_PROTOCOL.to_string(), keypair.public()) @@ -127,7 +130,7 @@ impl Behaviour { Ok(Self { autonat, connection_limits, - // kademlia, + kademlia, mdns, identify, ping, @@ -143,14 +146,42 @@ impl Behaviour { ) -> &mut request_response::cbor::Behaviour { &mut self.request_response } + + pub(crate) fn kademlia(&mut self) -> &mut kad::Behaviour { + &mut self.kademlia + } } impl BehaviourEvent { - pub(crate) async fn handle(self, message_tx: tokio::sync::mpsc::Sender) { + pub(crate) async fn handle( + self, + message_tx: tokio::sync::mpsc::Sender, + ongoing_kademlia_queries: Arc>>, + ) { match self { BehaviourEvent::Autonat(_event) => {} BehaviourEvent::Identify(_event) => {} - BehaviourEvent::Kademlia(_event) => { // TODO: potentially on outbound queries + BehaviourEvent::Kademlia(event) => { + match event { + kad::Event::OutboundQueryProgressed { + id, + result, + stats: _, + step, + } => { + debug!("kademlia query {id:?} progressed with step {step:?} and result {result:?}"); + + let mut ongoing_queries = ongoing_kademlia_queries.lock().await; + if let Some(query) = ongoing_queries.get_mut(&id) { + let _ = query.result_tx.send(Ok(result)).await; + } + + if step.last { + ongoing_queries.remove(&id); + } + } + _ => {} + } } BehaviourEvent::Mdns(_event) => {} BehaviourEvent::Ping(_event) => {} diff --git a/crates/p2p/src/discovery.rs b/crates/p2p/src/discovery.rs new file mode 100644 index 00000000..10d33b29 --- /dev/null +++ b/crates/p2p/src/discovery.rs @@ -0,0 +1,122 @@ +use anyhow::{Context as _, Result}; +use libp2p::kad::QueryResult; +use libp2p::kad::{self, store::RecordStore, QueryId, Quorum}; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::Mutex; + +pub const WORKER_DHT_KEY: &str = "prime-worker/1.0.0"; + +pub struct KademliaActionWithChannel { + kad_action: KademliaAction, + result_tx: tokio::sync::mpsc::Sender>, +} + +impl KademliaActionWithChannel { + pub fn new( + kad_action: KademliaAction, + result_tx: tokio::sync::mpsc::Sender>, + ) -> Self { + Self { + kad_action, + result_tx, + } + } + + pub(crate) fn result_tx(&self) -> tokio::sync::mpsc::Sender> { + self.result_tx.clone() + } +} + +pub enum KademliaAction { + PutRecord { key: Vec, value: Vec }, + GetRecord(Vec), + StartProviding(Vec), + StopProviding(Vec), + GetProviders(Vec), +} + +impl KademliaAction { + pub fn into_kademlia_action_with_channel( + self, + ) -> ( + KademliaActionWithChannel, + tokio::sync::mpsc::Receiver>, + ) { + let (result_tx, result_rx) = tokio::sync::mpsc::channel(1); + ( + KademliaActionWithChannel { + kad_action: self, + result_tx, + }, + result_rx, + ) + } +} + +pub(crate) struct OngoingKademliaQuery { + pub(crate) result_tx: tokio::sync::mpsc::Sender>, +} + +pub(crate) async fn handle_kademlia_action( + kademlia: &mut kad::Behaviour, + action: KademliaActionWithChannel, + ongoing_kademlia_queries: Arc>>, +) -> Result<()> { + match action.kad_action { + KademliaAction::PutRecord { key, value } => { + let query_id = kademlia + .put_record( + kad::Record { + key: key.into(), + value: value.into(), + publisher: None, + expires: None, + }, + Quorum::One, + ) + .context("failed to put record in dht")?; + let mut ongoing_queries = ongoing_kademlia_queries.lock().await; + ongoing_queries.insert( + query_id, + OngoingKademliaQuery { + result_tx: action.result_tx, + }, + ); + } + KademliaAction::GetRecord(key) => { + let query_id = kademlia.get_record(key.into()); + let mut ongoing_queries = ongoing_kademlia_queries.lock().await; + ongoing_queries.insert( + query_id, + OngoingKademliaQuery { + result_tx: action.result_tx, + }, + ); + } + KademliaAction::StartProviding(key) => { + let query_id = kademlia + .start_providing(key.into()) + .context("failed to start providing key in dht")?; + let mut ongoing_queries = ongoing_kademlia_queries.lock().await; + ongoing_queries.insert( + query_id, + OngoingKademliaQuery { + result_tx: action.result_tx, + }, + ); + } + KademliaAction::StopProviding(key) => kademlia.stop_providing(&key.into()), + KademliaAction::GetProviders(key) => { + let query_id = kademlia.get_providers(key.into()); + let mut ongoing_queries = ongoing_kademlia_queries.lock().await; + ongoing_queries.insert( + query_id, + OngoingKademliaQuery { + result_tx: action.result_tx, + }, + ); + } + } + Ok(()) +} diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index f5bc648c..92b78ad3 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -1,5 +1,6 @@ use anyhow::Context; use anyhow::Result; +use libp2p::kad::QueryId; use libp2p::noise; use libp2p::swarm::SwarmEvent; use libp2p::tcp; @@ -8,14 +9,20 @@ use libp2p::Swarm; use libp2p::SwarmBuilder; use libp2p::{identity, Transport}; use log::debug; +use log::warn; +use std::collections::HashMap; +use std::sync::Arc; use std::time::Duration; +use tokio::sync::Mutex; mod behaviour; +mod discovery; mod message; mod protocol; use behaviour::Behaviour; +pub use discovery::*; pub use message::*; pub use protocol::*; @@ -24,6 +31,11 @@ pub type ResponseChannel = libp2p::request_response::ResponseChannel; pub type PeerId = libp2p::PeerId; pub type Multiaddr = libp2p::Multiaddr; pub type Keypair = libp2p::identity::Keypair; +pub type KademliaQueryResult = libp2p::kad::QueryResult; +pub type KademliaGetProvidersOk = libp2p::kad::GetProvidersOk; +pub type KademliaGetRecordOk = libp2p::kad::GetRecordOk; + +type MultiaddrProtocol<'a> = libp2p::multiaddr::Protocol<'a>; pub const PRIME_STREAM_PROTOCOL: libp2p::StreamProtocol = libp2p::StreamProtocol::new("/prime/1.0.0"); @@ -42,6 +54,10 @@ pub struct Node { // channel for receiving outgoing messages from the consumer of this library outgoing_message_rx: tokio::sync::mpsc::Receiver, + + // channel for receiving kademlia actions from the consumer of this library + kademlia_action_rx: tokio::sync::mpsc::Receiver, + ongoing_kademlia_queries: Arc>>, } impl Node { @@ -76,6 +92,8 @@ impl Node { cancellation_token, incoming_message_tx, mut outgoing_message_rx, + mut kademlia_action_rx, + ongoing_kademlia_queries, } = self; for addr in listen_addrs { @@ -84,11 +102,21 @@ impl Node { .context("swarm failed to listen on multiaddr")?; } - for bootnode in bootnodes { - match swarm.dial(bootnode.clone()) { - Ok(_) => {} + for mut multiaddr in bootnodes { + let Some(MultiaddrProtocol::P2p(peer_id)) = multiaddr.pop() else { + warn!("bootnode {multiaddr} does not have a peer ID, skipping"); + continue; + }; + + match swarm.dial(multiaddr.clone()) { + Ok(()) => { + swarm + .behaviour_mut() + .kademlia() + .add_address(&peer_id, multiaddr); + } Err(e) => { - debug!("failed to dial bootnode {bootnode}: {e:?}"); + debug!("failed to dial bootnode {multiaddr}: {e:?}"); } } } @@ -116,6 +144,13 @@ impl Node { } } } + Some(kademlia_action) = kademlia_action_rx.recv() => { + let result_tx = kademlia_action.result_tx(); + if let Err(e) = discovery::handle_kademlia_action(swarm.behaviour_mut().kademlia(), kademlia_action, ongoing_kademlia_queries.clone()).await { + debug!("failed to handle kademlia action: {e:?}"); + let _ = result_tx.send(Err(e)).await; + } + } event = swarm.select_next_some() => { match event { SwarmEvent::NewListenAddr { @@ -140,7 +175,7 @@ impl Node { } => { debug!("connection closed with peer {peer_id}: {cause:?}"); } - SwarmEvent::Behaviour(event) => event.handle(incoming_message_tx.clone()).await, + SwarmEvent::Behaviour(event) => event.handle(incoming_message_tx.clone(), ongoing_kademlia_queries.clone()).await, _ => continue, } }, @@ -263,6 +298,7 @@ impl NodeBuilder { Node, tokio::sync::mpsc::Receiver, tokio::sync::mpsc::Sender, + tokio::sync::mpsc::Sender, )> { let Self { port, @@ -275,6 +311,10 @@ impl NodeBuilder { } = self; let keypair = keypair.unwrap_or(identity::Keypair::generate_ed25519()); + println!( + "keypair: {}", + hex::encode(keypair.clone().try_into_ed25519().unwrap().to_bytes()) + ); let peer_id = keypair.public().to_peer_id(); let transport = create_transport(&keypair)?; @@ -304,6 +344,8 @@ impl NodeBuilder { let (incoming_message_tx, incoming_message_rx) = tokio::sync::mpsc::channel(100); let (outgoing_message_tx, outgoing_message_rx) = tokio::sync::mpsc::channel(100); + let (kademlia_action_tx, kademlia_action_rx) = tokio::sync::mpsc::channel(100); + let ongoing_kademlia_queries = Arc::new(Mutex::new(HashMap::new())); Ok(( Node { @@ -313,10 +355,13 @@ impl NodeBuilder { bootnodes, incoming_message_tx, outgoing_message_rx, + kademlia_action_rx, cancellation_token: cancellation_token.unwrap_or_default(), + ongoing_kademlia_queries, }, incoming_message_rx, outgoing_message_tx, + kademlia_action_tx, )) } } @@ -341,11 +386,12 @@ mod test { #[tokio::test] async fn two_nodes_can_connect_and_do_request_response() { - let (node1, mut incoming_message_rx1, outgoing_message_tx1) = + let (node1, mut incoming_message_rx1, outgoing_message_tx1, _) = NodeBuilder::new().with_get_task_logs().try_build().unwrap(); let node1_peer_id = node1.peer_id(); - let (node2, mut incoming_message_rx2, outgoing_message_tx2) = NodeBuilder::new() + println!("{:?}", node1.multiaddrs()); + let (node2, mut incoming_message_rx2, outgoing_message_tx2, _) = NodeBuilder::new() .with_get_task_logs() .with_bootnodes(node1.multiaddrs()) .try_build() diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs index bf776009..cff13592 100644 --- a/crates/shared/src/p2p/service.rs +++ b/crates/shared/src/p2p/service.rs @@ -1,6 +1,7 @@ use crate::web3::wallet::Wallet; use anyhow::{bail, Context as _, Result}; use futures::stream::FuturesUnordered; +use p2p::KademliaActionWithChannel; use p2p::{ AuthenticationInitiationRequest, AuthenticationResponse, AuthenticationSolutionRequest, IncomingMessage, Libp2pIncomingMessage, Node, NodeBuilder, OutgoingMessage, PeerId, Protocol, @@ -36,13 +37,23 @@ impl Service { pub fn new( keypair: p2p::Keypair, port: u16, + bootnodes: Vec, cancellation_token: CancellationToken, wallet: Wallet, protocols: Protocols, - ) -> Result<(Self, Sender)> { - let (node, incoming_messages_rx, outgoing_messages) = - build_p2p_node(keypair, port, cancellation_token.clone(), protocols.clone()) - .context("failed to build p2p node")?; + ) -> Result<( + Self, + Sender, + Sender, + )> { + let (node, incoming_messages_rx, outgoing_message_tx, kademlia_action_tx) = build_p2p_node( + keypair, + port, + bootnodes, + cancellation_token.clone(), + protocols.clone(), + ) + .context("failed to build p2p node")?; let (outgoing_messages_tx, outgoing_messages_rx) = tokio::sync::mpsc::channel(100); Ok(( @@ -51,9 +62,10 @@ impl Service { incoming_messages_rx, outgoing_messages_rx, cancellation_token, - context: Context::new(outgoing_messages, wallet, protocols), + context: Context::new(outgoing_message_tx, wallet, protocols), }, outgoing_messages_tx, + kademlia_action_tx, )) } @@ -106,12 +118,19 @@ impl Service { fn build_p2p_node( keypair: p2p::Keypair, port: u16, + bootnodes: Vec, cancellation_token: CancellationToken, protocols: Protocols, -) -> Result<(Node, Receiver, Sender)> { +) -> Result<( + Node, + Receiver, + Sender, + Sender, +)> { NodeBuilder::new() .with_keypair(keypair) .with_port(port) + .with_bootnodes(bootnodes) .with_authentication() .with_protocols(protocols) .with_cancellation_token(cancellation_token) diff --git a/crates/validator/src/main.rs b/crates/validator/src/main.rs index f3b80d4b..c7126e11 100644 --- a/crates/validator/src/main.rs +++ b/crates/validator/src/main.rs @@ -1,10 +1,10 @@ use actix_web::{web, App, HttpRequest, HttpResponse, HttpServer, Responder}; use alloy::primitives::utils::Unit; use alloy::primitives::{Address, U256}; -use anyhow::{Context, Result}; +use anyhow::{bail, Context as _, Result}; use clap::Parser; use log::{debug, LevelFilter}; -use log::{error, info}; +use log::{error, info, warn}; use serde_json::json; use shared::models::api::ApiResponse; use shared::models::node::DiscoveryNode; @@ -13,6 +13,7 @@ use shared::security::request_signer::sign_request_with_nonce; use shared::utils::google_cloud::GcsStorageProvider; use shared::web3::contracts::core::builder::ContractBuilder; use shared::web3::wallet::Wallet; +use std::collections::HashSet; use std::str::FromStr; use std::sync::atomic::{AtomicI64, Ordering}; use std::sync::Arc; @@ -200,6 +201,16 @@ struct Args { /// Libp2p port #[arg(long, default_value = "4003")] libp2p_port: u16, + + /// Comma-separated list of libp2p bootnode multiaddresses + /// Example: `/ip4/104.131.131.82/tcp/4001/p2p/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ,/ip4/104.131.131.82/udp/4001/quic-v1/p2p/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ` + #[arg(long, default_value = "")] + bootnodes: String, + + /// Path to the libp2p private key file which contains the hex-encoded private key + /// A new key is generated if this is not provided + #[arg(long)] + libp2p_private_key_file: Option, } #[tokio::main] @@ -274,10 +285,27 @@ async fn main() -> anyhow::Result<()> { // Initialize P2P client if enabled let keypair = p2p::Keypair::generate_ed25519(); - let (p2p_service, hardware_challenge_tx) = { + let bootnodes: Vec = args + .bootnodes + .split(',') + .filter_map(|addr| match addr.to_string().try_into() { + Ok(multiaddr) => Some(multiaddr), + Err(e) => { + error!("Invalid bootnode address '{addr}': {e}"); + None + } + }) + .collect(); + if bootnodes.is_empty() { + error!("No valid bootnodes provided. Please provide at least one valid bootnode address."); + std::process::exit(1); + } + + let (p2p_service, hardware_challenge_tx, kademlia_action_tx) = { match P2PService::new( keypair, args.libp2p_port, + bootnodes, cancellation_token.clone(), validator_wallet.clone(), ) { @@ -454,58 +482,65 @@ async fn main() -> anyhow::Result<()> { } if !args.disable_hardware_validation { - async fn _fetch_nodes_from_discovery_url( - discovery_url: &str, - validator_wallet: &Wallet, - ) -> Result> { - let address = validator_wallet - .wallet - .default_signer() - .address() - .to_string(); - - let discovery_route = "/api/validator"; - let signature = sign_request_with_nonce(discovery_route, validator_wallet, None) - .await - .map_err(|e| anyhow::anyhow!("{}", e))?; - - let mut headers = reqwest::header::HeaderMap::new(); - headers.insert( - "x-address", - reqwest::header::HeaderValue::from_str(&address) - .context("Failed to create address header")?, - ); - headers.insert( - "x-signature", - reqwest::header::HeaderValue::from_str(&signature.signature) - .context("Failed to create signature header")?, - ); - - debug!("Fetching nodes from: {discovery_url}{discovery_route}"); - let response = reqwest::Client::new() - .get(format!("{discovery_url}{discovery_route}")) - .query(&[("nonce", signature.nonce)]) - .headers(headers) - .timeout(Duration::from_secs(10)) - .send() - .await - .context("Failed to fetch nodes")?; - - let response_text = response - .text() - .await - .context("Failed to get response text")?; - - let parsed_response: ApiResponse> = - serde_json::from_str(&response_text).context("Failed to parse response")?; - - if !parsed_response.success { - error!("Failed to fetch nodes from {discovery_url}: {parsed_response:?}"); - return Ok(vec![]); - } - - Ok(parsed_response.data) - } + // async fn _fetch_nodes_from_discovery_url( + // discovery_url: &str, + // validator_wallet: &Wallet, + // ) -> Result> { + // let address = validator_wallet + // .wallet + // .default_signer() + // .address() + // .to_string(); + + // let discovery_route = "/api/validator"; + // let signature = sign_request_with_nonce(discovery_route, validator_wallet, None) + // .await + // .map_err(|e| anyhow::anyhow!("{}", e))?; + + // let mut headers = reqwest::header::HeaderMap::new(); + // headers.insert( + // "x-address", + // reqwest::header::HeaderValue::from_str(&address) + // .context("Failed to create address header")?, + // ); + // headers.insert( + // "x-signature", + // reqwest::header::HeaderValue::from_str(&signature.signature) + // .context("Failed to create signature header")?, + // ); + + // debug!("Fetching nodes from: {discovery_url}{discovery_route}"); + // let response = reqwest::Client::new() + // .get(format!("{discovery_url}{discovery_route}")) + // .query(&[("nonce", signature.nonce)]) + // .headers(headers) + // .timeout(Duration::from_secs(10)) + // .send() + // .await + // .context("Failed to fetch nodes")?; + + // let response_text = response + // .text() + // .await + // .context("Failed to get response text")?; + + // let parsed_response: ApiResponse> = + // serde_json::from_str(&response_text).context("Failed to parse response")?; + + // if !parsed_response.success { + // error!("Failed to fetch nodes from {discovery_url}: {parsed_response:?}"); + // return Ok(vec![]); + // } + + // Ok(parsed_response.data) + // } + + let nodes = get_worker_nodes_from_dht(kademlia_action_tx.clone()) + .await + .unwrap_or_else(|e| { + error!("Failed to fetch nodes from DHT: {e}"); + vec![] + }); let nodes = match async { let mut all_nodes = Vec::new(); @@ -632,6 +667,95 @@ async fn main() -> anyhow::Result<()> { Ok(()) } +async fn get_worker_nodes_from_dht( + kademlia_action_tx: tokio::sync::mpsc::Sender, +) -> Result, anyhow::Error> { + let (kad_action, mut result_rx) = + p2p::KademliaAction::GetProviders(p2p::WORKER_DHT_KEY.as_bytes().to_vec()) + .into_kademlia_action_with_channel(); + if let Err(e) = kademlia_action_tx.send(kad_action).await { + bail!("failed to send Kademlia action: {e}"); + } + + info!("🔄 Fetching worker nodes from DHT..."); + let mut workers = HashSet::new(); + while let Some(result) = result_rx.recv().await { + match result { + Ok(res) => { + match res { + p2p::KademliaQueryResult::GetProviders(res) => match res { + Ok(res) => match res { + p2p::KademliaGetProvidersOk::FoundProviders { key: _, providers } => { + workers.extend(providers.into_iter()); + } + _ => {} + }, + Err(e) => { + bail!("failed to get providers from DHT: {e}"); + } + }, + _ => { + // this case should never happen + bail!("unexpected Kademlia query result: {res:?}"); + } + } + } + Err(e) => { + bail!("kademlia action failed: {e}"); + } + } + } + + let mut nodes = Vec::new(); + for peer_id in workers { + let record_key = format!("{}:{}", p2p::WORKER_DHT_KEY, peer_id); + let (kad_action, mut result_rx) = + p2p::KademliaAction::GetRecord(record_key.as_bytes().to_vec()) + .into_kademlia_action_with_channel(); + if let Err(e) = kademlia_action_tx.send(kad_action).await { + bail!("failed to send Kademlia action: {e}"); + } + + while let Some(result) = result_rx.recv().await { + match result { + Ok(res) => { + match res { + p2p::KademliaQueryResult::GetRecord(res) => match res { + Ok(res) => match res { + p2p::KademliaGetRecordOk::FoundRecord(record) => { + match serde_json::from_slice::( + &record.record.value, + ) { + Ok(node) => { + nodes.push(node); + } + Err(e) => { + warn!("failed to deserialize node record: {e}"); + } + } + } + _ => {} + }, + Err(e) => { + warn!("failed to get record from DHT: {e}"); + } + }, + _ => { + // this case should never happen + bail!("unexpected Kademlia query result: {res:?}"); + } + } + } + Err(e) => { + warn!("Kademlia action failed: {e}"); + } + } + } + } + + Ok(nodes) +} + #[cfg(test)] mod tests { use actix_web::{test, App}; diff --git a/crates/validator/src/p2p/mod.rs b/crates/validator/src/p2p/mod.rs index 7d199eb2..42950c42 100644 --- a/crates/validator/src/p2p/mod.rs +++ b/crates/validator/src/p2p/mod.rs @@ -1,5 +1,6 @@ use anyhow::{bail, Context as _, Result}; use futures::stream::FuturesUnordered; +use p2p::KademliaActionWithChannel; use p2p::{Keypair, Protocols}; use shared::p2p::OutgoingRequest; use shared::p2p::Service as P2PService; @@ -19,13 +20,19 @@ impl Service { pub fn new( keypair: Keypair, port: u16, + bootnodes: Vec, cancellation_token: CancellationToken, wallet: Wallet, - ) -> Result<(Self, Sender)> { + ) -> Result<( + Self, + Sender, + Sender, + )> { let (hardware_challenge_tx, hardware_challenge_rx) = tokio::sync::mpsc::channel(100); - let (inner, outgoing_message_tx) = P2PService::new( + let (inner, outgoing_message_tx, kademlia_action_tx) = P2PService::new( keypair, port, + bootnodes, cancellation_token.clone(), wallet, Protocols::new() @@ -40,6 +47,7 @@ impl Service { hardware_challenge_rx, }, hardware_challenge_tx, + kademlia_action_tx, )) } diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 1e9e5825..e8062adf 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -9,8 +9,8 @@ use crate::metrics::store::MetricsStore; use crate::operations::compute_node::ComputeNodeOperations; use crate::operations::heartbeat::service::HeartbeatService; use crate::operations::provider::ProviderOperations; -use crate::services::discovery::DiscoveryService; -use crate::services::discovery_updater::DiscoveryUpdater; +// use crate::services::discovery::DiscoveryService; +// use crate::services::discovery_updater::DiscoveryUpdater; use crate::state::system_state::SystemState; use crate::TaskHandles; use alloy::primitives::utils::format_ether; @@ -20,6 +20,7 @@ use alloy::signers::local::PrivateKeySigner; use alloy::signers::Signer; use clap::{Parser, Subcommand}; use log::{error, info}; +use p2p::KademliaAction; use shared::models::node::ComputeRequirements; use shared::models::node::Node; use shared::web3::contracts::core::builder::ContractBuilder; @@ -61,6 +62,11 @@ pub enum Commands { #[arg(long, default_value = "4002")] libp2p_port: u16, + /// Comma-separated list of libp2p bootnode multiaddresses + /// Example: `/ip4/104.131.131.82/tcp/4001/p2p/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ,/ip4/104.131.131.82/udp/4001/quic-v1/p2p/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ` + #[arg(long, default_value = "")] + bootnodes: String, + /// External IP address for the worker to advertise #[arg(long)] external_ip: Option, @@ -85,10 +91,6 @@ pub enum Commands { #[arg(long, default_value = "false")] no_auto_recover: bool, - /// Discovery service URL - #[arg(long)] - discovery_url: Option, - /// Private key for the provider (not recommended, use environment variable PRIVATE_KEY_PROVIDER instead) #[arg(long)] private_key_provider: Option, @@ -194,11 +196,11 @@ pub async fn execute_command( Commands::Run { port: _, libp2p_port, + bootnodes, external_ip, compute_pool_id, dry_run: _, rpc_url, - discovery_url, state_dir_overwrite, disable_state_storing, no_auto_recover, @@ -302,14 +304,14 @@ pub async fn execute_command( compute_node_state, ); - let discovery_urls = vec![discovery_url - .clone() - .unwrap_or("http://localhost:8089".to_string())]; - let discovery_service = - DiscoveryService::new(node_wallet_instance.clone(), discovery_urls, None); - let discovery_state = state.clone(); - let discovery_updater = - DiscoveryUpdater::new(discovery_service.clone(), discovery_state.clone()); + // let discovery_urls = vec![discovery_url + // .clone() + // .unwrap_or("http://localhost:8089".to_string())]; + // let discovery_service = + // DiscoveryService::new(node_wallet_instance.clone(), discovery_urls, None); + // let discovery_state = state.clone(); + // let discovery_updater = + // DiscoveryUpdater::new(discovery_service.clone(), discovery_state.clone()); let pool_id = U256::from(*compute_pool_id); let pool_info = loop { @@ -733,9 +735,25 @@ pub async fn execute_command( allowed_addresses.extend(validators); let validator_addresses = std::collections::HashSet::from_iter(allowed_addresses); - let p2p_service = match crate::p2p::Service::new( + let bootnodes: Vec = bootnodes + .split(',') + .filter_map(|addr| match addr.to_string().try_into() { + Ok(multiaddr) => Some(multiaddr), + Err(e) => { + error!("❌ Invalid bootnode address '{addr}': {e}"); + None + } + }) + .collect(); + if bootnodes.is_empty() { + error!("❌ No valid bootnodes provided. Please provide at least one valid bootnode address."); + std::process::exit(1); + } + + let (p2p_service, kademlia_action_tx) = match crate::p2p::Service::new( state.get_p2p_keypair().clone(), *libp2p_port, + bootnodes, node_wallet_instance.clone(), validator_addresses, docker_service.clone(), @@ -766,53 +784,128 @@ pub async fn execute_command( ); tokio::task::spawn(p2p_service.run()); - Console::success(&format!("P2P service started with ID: {peer_id}",)); + Console::success(&format!("P2P service started with ID: {peer_id}")); - let mut attempts = 0; - let max_attempts = 100; - while attempts < max_attempts { - Console::title("📦 Uploading discovery info"); - match discovery_service.upload_discovery_info(&node_config).await { - Ok(_) => break, - Err(e) => { - attempts += 1; - let error_msg = e.to_string(); - - // Check if this is a Cloudflare block - if error_msg.contains("403 Forbidden") - && (error_msg.contains("Cloudflare") - || error_msg.contains("Sorry, you have been blocked") - || error_msg.contains("Attention Required!")) - { - error!( - "Attempt {attempts}: ❌ Discovery service blocked by Cloudflare protection. This may indicate:" - ); - error!(" • Your IP address has been flagged by Cloudflare security"); - error!(" • Too many requests from your location"); - error!(" • Network configuration issues"); - error!(" • Discovery service may be under DDoS protection"); - error!( - "Please contact support or try from a different network/IP address" - ); - } else { - error!("Attempt {attempts}: ❌ Failed to upload discovery info: {e}"); + let record_key = format!("{}:{}", p2p::WORKER_DHT_KEY, peer_id); + let (kad_action, mut result_rx) = KademliaAction::PutRecord { + key: record_key.as_bytes().to_vec(), + value: serde_json::to_vec(&node_config).unwrap(), + } + .into_kademlia_action_with_channel(); + if let Err(e) = kademlia_action_tx.send(kad_action).await { + error!("❌ Failed to send Kademlia action: {e}"); + std::process::exit(1); + } + + while let Some(result) = result_rx.recv().await { + match result { + Ok(res) => { + match res { + p2p::KademliaQueryResult::PutRecord(res) => match res { + Ok(_) => { + Console::success("Worker info published to DHT"); + } + Err(e) => { + error!("❌ Failed to put record in DHT: {e}"); + std::process::exit(1); + } + }, + _ => { + // this case should never happen + error!("❌ Unexpected result from putting record in DHT: {res:?}"); + std::process::exit(1); + } } + } + Err(e) => { + error!("❌ Failed to publish worker info to DHT: {e}"); + std::process::exit(1); + } + } + } - if attempts >= max_attempts { - if error_msg.contains("403 Forbidden") - && (error_msg.contains("Cloudflare") - || error_msg.contains("Sorry, you have been blocked")) - { - error!("❌ Unable to reach discovery service due to Cloudflare blocking after {max_attempts} attempts"); - error!("This is likely a network/IP issue rather than a worker configuration problem"); + let (kad_action, mut result_rx) = + KademliaAction::StartProviding(p2p::WORKER_DHT_KEY.as_bytes().to_vec()) + .into_kademlia_action_with_channel(); + if let Err(e) = kademlia_action_tx.send(kad_action).await { + error!("❌ Failed to send Kademlia action: {e}"); + std::process::exit(1); + } + + while let Some(result) = result_rx.recv().await { + match result { + Ok(res) => { + match res { + p2p::KademliaQueryResult::StartProviding(res) => match res { + Ok(_) => { + Console::success( + "Advertising ourselves as a worker in the DHT", + ); + } + Err(e) => { + error!("❌ Failed to start providing worker info in DHT: {e}"); + std::process::exit(1); + } + }, + _ => { + // this case should never happen + error!("❌ Unexpected result from starting providing worker info in DHT: {res:?}"); + std::process::exit(1); } - std::process::exit(1); } } + Err(e) => { + error!("❌ Failed to start providing worker info in DHT: {e}"); + std::process::exit(1); + } } - tokio::time::sleep(tokio::time::Duration::from_secs(10)).await; } + // let mut attempts = 0; + // let max_attempts = 100; + // while attempts < max_attempts { + // Console::title("📦 Uploading discovery info"); + // match discovery_service.upload_discovery_info(&node_config).await { + // Ok(_) => break, + // Err(e) => { + // attempts += 1; + // let error_msg = e.to_string(); + + // // Check if this is a Cloudflare block + // if error_msg.contains("403 Forbidden") + // && (error_msg.contains("Cloudflare") + // || error_msg.contains("Sorry, you have been blocked") + // || error_msg.contains("Attention Required!")) + // { + // error!( + // "Attempt {attempts}: ❌ Discovery service blocked by Cloudflare protection. This may indicate:" + // ); + // error!(" • Your IP address has been flagged by Cloudflare security"); + // error!(" • Too many requests from your location"); + // error!(" • Network configuration issues"); + // error!(" • Discovery service may be under DDoS protection"); + // error!( + // "Please contact support or try from a different network/IP address" + // ); + // } else { + // error!("Attempt {attempts}: ❌ Failed to upload discovery info: {e}"); + // } + + // if attempts >= max_attempts { + // if error_msg.contains("403 Forbidden") + // && (error_msg.contains("Cloudflare") + // || error_msg.contains("Sorry, you have been blocked")) + // { + // error!("❌ Unable to reach discovery service due to Cloudflare blocking after {max_attempts} attempts"); + // error!("This is likely a network/IP issue rather than a worker configuration problem"); + // } + // std::process::exit(1); + // } + // } + // } + // tokio::time::sleep(tokio::time::Duration::from_secs(10)).await; + // } + Console::success("Discovery info uploaded"); Console::section("Starting Worker with Task Bridge"); @@ -827,7 +920,7 @@ pub async fn execute_command( std::process::exit(1); } - discovery_updater.start_auto_update(node_config); + // discovery_updater.start_auto_update(node_config); if recover_last_state { info!("Recovering from previous state: {recover_last_state}"); diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 94fe10a3..0eadd191 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -2,6 +2,7 @@ use anyhow::Context as _; use anyhow::Result; use futures::stream::FuturesUnordered; use p2p::InviteRequestUrl; +use p2p::KademliaActionWithChannel; use p2p::Node; use p2p::NodeBuilder; use p2p::PeerId; @@ -34,6 +35,7 @@ impl Service { pub(crate) fn new( keypair: p2p::Keypair, port: u16, + bootnodes: Vec, wallet: Wallet, validator_addresses: HashSet, docker_service: Arc, @@ -42,25 +44,28 @@ impl Service { contracts: Contracts, provider_wallet: Wallet, cancellation_token: CancellationToken, - ) -> Result { - let (node, incoming_messages, outgoing_messages) = - build_p2p_node(keypair, port, cancellation_token.clone()) + ) -> Result<(Self, Sender)> { + let (node, incoming_messages, outgoing_messages, kademlia_action_tx) = + build_p2p_node(keypair, port, bootnodes, cancellation_token.clone()) .context("failed to build p2p node")?; - Ok(Self { - node, - incoming_messages, - cancellation_token, - context: Context::new( - wallet, - outgoing_messages, - validator_addresses, - docker_service, - heartbeat_service, - system_state, - contracts, - provider_wallet, - ), - }) + Ok(( + Self { + node, + incoming_messages, + cancellation_token, + context: Context::new( + wallet, + outgoing_messages, + validator_addresses, + docker_service, + heartbeat_service, + system_state, + contracts, + provider_wallet, + ), + }, + kademlia_action_tx, + )) } pub(crate) fn peer_id(&self) -> PeerId { @@ -110,11 +115,18 @@ impl Service { fn build_p2p_node( keypair: p2p::Keypair, port: u16, + bootnodes: Vec, cancellation_token: CancellationToken, -) -> Result<(Node, Receiver, Sender)> { - let (node, incoming_message_rx, outgoing_message_tx) = NodeBuilder::new() +) -> Result<( + Node, + Receiver, + Sender, + Sender, +)> { + let (node, incoming_message_rx, outgoing_message_tx, kademlia_action_tx) = NodeBuilder::new() .with_keypair(keypair) .with_port(port) + .with_bootnodes(bootnodes) .with_authentication() .with_hardware_challenge() .with_invite() @@ -123,7 +135,12 @@ fn build_p2p_node( .with_cancellation_token(cancellation_token) .try_build() .context("failed to build p2p node")?; - Ok((node, incoming_message_rx, outgoing_message_tx)) + Ok(( + node, + incoming_message_rx, + outgoing_message_tx, + kademlia_action_tx, + )) } #[derive(Clone)] From 07b45338e77ebb8001cc232907923d225842975c Mon Sep 17 00:00:00 2001 From: elizabeth Date: Tue, 15 Jul 2025 13:06:41 -0400 Subject: [PATCH 02/14] implement validator which fetches nodes from dht for validation --- crates/shared/src/models/node.rs | 129 ++++- crates/validator/src/lib.rs | 3 + crates/validator/src/main.rs | 535 +++++------------- crates/validator/src/validator.rs | 308 ++++++++++ crates/validator/src/validators/hardware.rs | 99 ++-- .../src/validators/hardware_challenge.rs | 10 +- 6 files changed, 618 insertions(+), 466 deletions(-) create mode 100644 crates/validator/src/validator.rs diff --git a/crates/shared/src/models/node.rs b/crates/shared/src/models/node.rs index 0466492c..16bb68bc 100644 --- a/crates/shared/src/models/node.rs +++ b/crates/shared/src/models/node.rs @@ -1,5 +1,7 @@ -use alloy::primitives::U256; -use anyhow::anyhow; +use crate::web3::{contracts::core::builder::Contracts, wallet::WalletProvider}; +use alloy::primitives::{Address, U256}; +use alloy::providers::Provider as _; +use anyhow::{anyhow, Context as _}; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use std::fmt; @@ -9,6 +11,7 @@ use utoipa::{openapi::Object, ToSchema}; #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Default, ToSchema)] pub struct Node { + // TODO: change all three of these from `String` to `Address` pub id: String, // the node's on-chain address. pub provider_address: String, @@ -549,6 +552,128 @@ pub struct NodeLocation { pub country: Option, } +#[derive(Debug, Clone)] +pub struct NodeWithMetadata { + node: Node, + is_validated: bool, + is_active: bool, + is_provider_whitelisted: bool, + is_blacklisted: bool, + latest_balance: U256, + last_updated: Option>, + created_at: Option>, + location: Option, +} + +impl NodeWithMetadata { + pub fn new( + node: Node, + is_validated: bool, + is_active: bool, + is_provider_whitelisted: bool, + is_blacklisted: bool, + latest_balance: U256, + last_updated: Option>, + created_at: Option>, + location: Option, + ) -> Self { + Self { + node, + is_validated, + is_active, + is_provider_whitelisted, + is_blacklisted, + latest_balance, + last_updated, + created_at, + location, + } + } + + pub async fn new_from_contracts( + node: Node, + provider: &WalletProvider, + contracts: &Contracts, + ) -> anyhow::Result { + let provider_address = + Address::from_str(&node.provider_address).context("invalid provider address")?; + let node_address = Address::from_str(&node.id).context("invalid node address")?; + let latest_balance = provider + .get_balance(node_address) + .await + .context("failed to get node balance")?; + + let node_info = contracts + .compute_registry + .get_node(provider_address, node_address) + .await + .context("failed to get node info from compute registry")?; + + let provider_info = contracts + .compute_registry + .get_provider(provider_address) + .await + .map_err(|e| anyhow!("failed to get provider info from compute registry: {e}"))?; + + let (is_active, is_validated) = node_info; + let is_provider_whitelisted = provider_info.is_whitelisted; + + let is_blacklisted = contracts + .compute_pool + .is_node_blacklisted(node.compute_pool_id, node_address) + .await + .map_err(|e| anyhow!("failed to check if node is blacklisted: {e}"))?; + + Ok(Self { + node, + is_validated, + is_active, + is_provider_whitelisted, + is_blacklisted, + latest_balance, + last_updated: None, // TODO + created_at: None, // TODO + location: None, // TODO + }) + } + + pub fn node(&self) -> &Node { + &self.node + } + + pub fn is_validated(&self) -> bool { + self.is_validated + } + + pub fn is_active(&self) -> bool { + self.is_active + } + + pub fn is_provider_whitelisted(&self) -> bool { + self.is_provider_whitelisted + } + + pub fn is_blacklisted(&self) -> bool { + self.is_blacklisted + } + + pub fn latest_balance(&self) -> U256 { + self.latest_balance + } + + pub fn last_updated(&self) -> Option> { + self.last_updated + } + + pub fn created_at(&self) -> Option> { + self.created_at + } + + pub fn location(&self) -> Option<&NodeLocation> { + self.location.as_ref() + } +} + #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Default, ToSchema)] pub struct DiscoveryNode { #[serde(flatten)] diff --git a/crates/validator/src/lib.rs b/crates/validator/src/lib.rs index 9fac5ce8..e80f711c 100644 --- a/crates/validator/src/lib.rs +++ b/crates/validator/src/lib.rs @@ -1,12 +1,15 @@ mod metrics; mod p2p; mod store; +mod validator; mod validators; pub use metrics::export_metrics; pub use metrics::MetricsContext; pub use p2p::Service as P2PService; pub use store::redis::RedisStore; +pub use validator::Validator; +pub use validator::ValidatorHealth; pub use validators::hardware::HardwareValidator; pub use validators::synthetic_data::types::InvalidationType; pub use validators::synthetic_data::SyntheticDataValidator; diff --git a/crates/validator/src/main.rs b/crates/validator/src/main.rs index c7126e11..b02d870f 100644 --- a/crates/validator/src/main.rs +++ b/crates/validator/src/main.rs @@ -1,126 +1,27 @@ use actix_web::{web, App, HttpRequest, HttpResponse, HttpServer, Responder}; use alloy::primitives::utils::Unit; -use alloy::primitives::{Address, U256}; -use anyhow::{bail, Context as _, Result}; +use alloy::primitives::U256; use clap::Parser; -use log::{debug, LevelFilter}; -use log::{error, info, warn}; +use log::LevelFilter; +use log::{error, info}; use serde_json::json; use shared::models::api::ApiResponse; -use shared::models::node::DiscoveryNode; use shared::security::api_key_middleware::ApiKeyMiddleware; -use shared::security::request_signer::sign_request_with_nonce; use shared::utils::google_cloud::GcsStorageProvider; use shared::web3::contracts::core::builder::ContractBuilder; use shared::web3::wallet::Wallet; -use std::collections::HashSet; use std::str::FromStr; -use std::sync::atomic::{AtomicI64, Ordering}; use std::sync::Arc; -use std::time::Duration; -use std::time::{Instant, SystemTime, UNIX_EPOCH}; +use std::time::{SystemTime, UNIX_EPOCH}; use tokio::signal::unix::{signal, SignalKind}; use tokio_util::sync::CancellationToken; use url::Url; use validator::{ export_metrics, HardwareValidator, InvalidationType, MetricsContext, P2PService, RedisStore, - SyntheticDataValidator, + SyntheticDataValidator, Validator, }; -// Track the last time the validation loop ran -static LAST_VALIDATION_TIMESTAMP: AtomicI64 = AtomicI64::new(0); -// Maximum allowed time between validation loops (2 minutes) -const MAX_VALIDATION_INTERVAL_SECS: i64 = 120; -// Track the last loop duration in milliseconds -static LAST_LOOP_DURATION_MS: AtomicI64 = AtomicI64::new(0); - -async fn get_rejections( - req: HttpRequest, - validator: web::Data>>, -) -> impl Responder { - match validator.as_ref() { - Some(validator) => { - // Parse query parameters - let query = req.query_string(); - let limit = parse_limit_param(query).unwrap_or(100); // Default limit of 100 - - let result = if limit > 0 && limit < 1000 { - // Use the optimized recent rejections method for reasonable limits - validator.get_recent_rejections(limit as isize).await - } else { - // Fallback to all rejections (but warn about potential performance impact) - if limit >= 1000 { - info!("Large limit requested ({limit}), this may impact performance"); - } - validator.get_all_rejections().await - }; - - match result { - Ok(rejections) => HttpResponse::Ok().json(ApiResponse { - success: true, - data: rejections, - }), - Err(e) => { - error!("Failed to get rejections: {e}"); - HttpResponse::InternalServerError().json(ApiResponse { - success: false, - data: format!("Failed to get rejections: {e}"), - }) - } - } - } - None => HttpResponse::ServiceUnavailable().json(ApiResponse { - success: false, - data: "Synthetic data validator not available", - }), - } -} - -fn parse_limit_param(query: &str) -> Option { - for pair in query.split('&') { - if let Some((key, value)) = pair.split_once('=') { - if key == "limit" { - return value.parse::().ok(); - } - } - } - None -} - -async fn health_check() -> impl Responder { - let now = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() as i64; - let last_validation = LAST_VALIDATION_TIMESTAMP.load(Ordering::Relaxed); - let last_duration_ms = LAST_LOOP_DURATION_MS.load(Ordering::Relaxed); - - if last_validation == 0 { - // Validation hasn't run yet, but we're still starting up - return HttpResponse::Ok().json(json!({ - "status": "starting", - "message": "Validation loop hasn't started yet" - })); - } - - let elapsed = now - last_validation; - - if elapsed > MAX_VALIDATION_INTERVAL_SECS { - return HttpResponse::ServiceUnavailable().json(json!({ - "status": "error", - "message": format!("Validation loop hasn't run in {} seconds (max allowed: {})", elapsed, MAX_VALIDATION_INTERVAL_SECS), - "last_loop_duration_ms": last_duration_ms - })); - } - - HttpResponse::Ok().json(json!({ - "status": "ok", - "last_validation_seconds_ago": elapsed, - "last_loop_duration_ms": last_duration_ms - })) -} - #[derive(Parser)] struct Args { /// RPC URL @@ -235,33 +136,9 @@ async fn main() -> anyhow::Result<()> { .init(); let cancellation_token = CancellationToken::new(); - let mut sigterm = signal(SignalKind::terminate())?; - let mut sigint = signal(SignalKind::interrupt())?; - let mut sighup = signal(SignalKind::hangup())?; - let mut sigquit = signal(SignalKind::quit())?; - let signal_token = cancellation_token.clone(); - let cancellation_token_clone = cancellation_token.clone(); - tokio::spawn(async move { - tokio::select! { - _ = sigterm.recv() => { - log::info!("Received termination signal"); - } - _ = sigint.recv() => { - log::info!("Received interrupt signal"); - } - _ = sighup.recv() => { - log::info!("Received hangup signal"); - } - _ = sigquit.recv() => { - log::info!("Received quit signal"); - } - } - signal_token.cancel(); - }); let private_key_validator = args.validator_key; let rpc_url: Url = args.rpc_url.parse().unwrap(); - let discovery_urls = args.discovery_urls; let redis_store = RedisStore::new(&args.redis_url); @@ -279,11 +156,9 @@ async fn main() -> anyhow::Result<()> { .with_stake_manager(); let contracts = contract_builder.build_partial().unwrap(); - let metrics_ctx = MetricsContext::new(validator_wallet.address().to_string(), args.pool_id.clone()); - // Initialize P2P client if enabled let keypair = p2p::Keypair::generate_ed25519(); let bootnodes: Vec = args .bootnodes @@ -391,7 +266,7 @@ async fn main() -> anyhow::Result<()> { penalty, storage_provider, redis_store, - cancellation_token, + cancellation_token.clone(), args.toploc_work_validation_interval, args.toploc_work_validation_unknown_status_expiry_seconds, args.toploc_grace_interval, @@ -419,15 +294,34 @@ async fn main() -> anyhow::Result<()> { None }; + let (validator, validator_health) = match Validator::new( + cancellation_token.clone(), + validator_wallet.provider(), + contracts, + hardware_validator, + synthetic_validator.clone(), + kademlia_action_tx, + args.disable_hardware_validation, + metrics_ctx, + ) { + Ok(v) => v, + Err(e) => { + error!("Failed to create validator: {e}"); + std::process::exit(1); + } + }; + // Start HTTP server with access to the validator - let validator_for_server = synthetic_validator.clone(); tokio::spawn(async move { let key = std::env::var("VALIDATOR_API_KEY").unwrap_or_default(); let api_key_middleware = Arc::new(ApiKeyMiddleware::new(key)); if let Err(e) = HttpServer::new(move || { App::new() - .app_data(web::Data::new(validator_for_server.clone())) + .app_data(web::Data::new(( + synthetic_validator.clone(), + validator_health.clone(), + ))) .route("/health", web::get().to(health_check)) .route( "/rejections", @@ -459,301 +353,136 @@ async fn main() -> anyhow::Result<()> { } }); - loop { - if cancellation_token_clone.is_cancelled() { - log::info!("Validation loop is stopping due to cancellation signal"); - break; - } - - // Start timing the loop - let loop_start = Instant::now(); + tokio::task::spawn(validator.run()); - // Update the last validation timestamp - let now = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() as i64; - LAST_VALIDATION_TIMESTAMP.store(now, Ordering::Relaxed); + let mut sigterm = signal(SignalKind::terminate())?; + let mut sigint = signal(SignalKind::interrupt())?; + let mut sighup = signal(SignalKind::hangup())?; + let mut sigquit = signal(SignalKind::quit())?; - if let Some(validator) = synthetic_validator.clone() { - if let Err(e) = validator.validate_work().await { - error!("Failed to validate work: {e}"); - } + tokio::select! { + _ = sigterm.recv() => { + log::info!("Received termination signal"); } + _ = sigint.recv() => { + log::info!("Received interrupt signal"); + } + _ = sighup.recv() => { + log::info!("Received hangup signal"); + } + _ = sigquit.recv() => { + log::info!("Received quit signal"); + } + } + cancellation_token.cancel(); - if !args.disable_hardware_validation { - // async fn _fetch_nodes_from_discovery_url( - // discovery_url: &str, - // validator_wallet: &Wallet, - // ) -> Result> { - // let address = validator_wallet - // .wallet - // .default_signer() - // .address() - // .to_string(); - - // let discovery_route = "/api/validator"; - // let signature = sign_request_with_nonce(discovery_route, validator_wallet, None) - // .await - // .map_err(|e| anyhow::anyhow!("{}", e))?; - - // let mut headers = reqwest::header::HeaderMap::new(); - // headers.insert( - // "x-address", - // reqwest::header::HeaderValue::from_str(&address) - // .context("Failed to create address header")?, - // ); - // headers.insert( - // "x-signature", - // reqwest::header::HeaderValue::from_str(&signature.signature) - // .context("Failed to create signature header")?, - // ); - - // debug!("Fetching nodes from: {discovery_url}{discovery_route}"); - // let response = reqwest::Client::new() - // .get(format!("{discovery_url}{discovery_route}")) - // .query(&[("nonce", signature.nonce)]) - // .headers(headers) - // .timeout(Duration::from_secs(10)) - // .send() - // .await - // .context("Failed to fetch nodes")?; - - // let response_text = response - // .text() - // .await - // .context("Failed to get response text")?; - - // let parsed_response: ApiResponse> = - // serde_json::from_str(&response_text).context("Failed to parse response")?; - - // if !parsed_response.success { - // error!("Failed to fetch nodes from {discovery_url}: {parsed_response:?}"); - // return Ok(vec![]); - // } - - // Ok(parsed_response.data) - // } - - let nodes = get_worker_nodes_from_dht(kademlia_action_tx.clone()) - .await - .unwrap_or_else(|e| { - error!("Failed to fetch nodes from DHT: {e}"); - vec![] - }); - - let nodes = match async { - let mut all_nodes = Vec::new(); - let mut any_success = false; - - for discovery_url in &discovery_urls { - match _fetch_nodes_from_discovery_url(discovery_url, &validator_wallet).await { - Ok(nodes) => { - debug!( - "Successfully fetched {} nodes from {}", - nodes.len(), - discovery_url - ); - all_nodes.extend(nodes); - any_success = true; - } - Err(e) => { - error!("Failed to fetch nodes from {discovery_url}: {e:#}"); - } - } - } - - if !any_success { - error!("Failed to fetch nodes from all discovery services"); - return Ok::, anyhow::Error>(vec![]); - } + // TODO: handle spawn handles here - // Remove duplicates based on node ID - let mut unique_nodes = Vec::new(); - let mut seen_ids = std::collections::HashSet::new(); - for node in all_nodes { - if seen_ids.insert(node.node.id.clone()) { - unique_nodes.push(node); - } - } + Ok(()) +} - debug!( - "Total unique nodes after deduplication: {}", - unique_nodes.len() - ); - Ok(unique_nodes) - } - .await - { - Ok(n) => n, - Err(e) => { - error!("Error in node fetching loop: {e:#}"); - std::thread::sleep(std::time::Duration::from_secs(10)); - continue; - } - }; +async fn health_check( + _: HttpRequest, + state: web::Data< + Option<( + SyntheticDataValidator, + Arc>, + )>, + >, +) -> impl Responder { + // Maximum allowed time between validation loops (2 minutes) + const MAX_VALIDATION_INTERVAL_SECS: u64 = 120; - // Ensure nodes have enough stake - let mut nodes_with_enough_stake = Vec::new(); - let Some(stake_manager) = contracts.stake_manager.as_ref() else { - error!("Stake manager contract not initialized"); - continue; - }; + let Some(state) = state.get_ref() else { + return HttpResponse::ServiceUnavailable().json(json!({ + "status": "error", + "message": "Validator not initialized" + })); + }; - let mut provider_stake_cache: std::collections::HashMap = - std::collections::HashMap::new(); - for node in nodes { - let provider_address_str = &node.node.provider_address; - let provider_address = match Address::from_str(provider_address_str) { - Ok(address) => address, - Err(e) => { - error!("Failed to parse provider address {provider_address_str}: {e}"); - continue; - } - }; + let validator_health = state.1.lock().await; - let (stake, required_stake) = - if let Some(&cached_info) = provider_stake_cache.get(provider_address_str) { - cached_info - } else { - let stake = stake_manager - .get_stake(provider_address) - .await - .unwrap_or_default(); - let total_compute = contracts - .compute_registry - .get_provider_total_compute(provider_address) - .await - .unwrap_or_default(); - let required_stake = stake_manager - .calculate_stake(U256::from(0), total_compute) - .await - .unwrap_or_default(); - - provider_stake_cache - .insert(provider_address_str.clone(), (stake, required_stake)); - (stake, required_stake) - }; - - if stake >= required_stake { - nodes_with_enough_stake.push(node); - } else { - info!( - "Node {} has insufficient stake: {} (required: {})", - node.node.id, - stake / Unit::ETHER.wei(), - required_stake / Unit::ETHER.wei() - ); - } - } + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); - if let Err(e) = hardware_validator - .validate_nodes(nodes_with_enough_stake) - .await - { - error!("Error validating nodes: {e:#}"); - } - } + if validator_health.last_validation_timestamp() == 0 { + // Validation hasn't run yet, but we're still starting up + return HttpResponse::Ok().json(json!({ + "status": "starting", + "message": "Validation loop hasn't started yet" + })); + } - // Calculate and store loop duration - let loop_duration = loop_start.elapsed(); - let loop_duration_ms = loop_duration.as_millis() as i64; - LAST_LOOP_DURATION_MS.store(loop_duration_ms, Ordering::Relaxed); + let elapsed = now - validator_health.last_validation_timestamp(); - metrics_ctx.record_validation_loop_duration(loop_duration.as_secs_f64()); - info!("Validation loop completed in {loop_duration_ms}ms"); - tokio::time::sleep(std::time::Duration::from_secs(5)).await; + if elapsed > MAX_VALIDATION_INTERVAL_SECS { + return HttpResponse::ServiceUnavailable().json(json!({ + "status": "error", + "message": format!("Validation loop hasn't run in {} seconds (max allowed: {})", elapsed, MAX_VALIDATION_INTERVAL_SECS), + "last_loop_duration_ms": validator_health.last_loop_duration_ms(), + })); } - Ok(()) + + HttpResponse::Ok().json(json!({ + "status": "ok", + "last_validation_seconds_ago": elapsed, + "last_loop_duration_ms": validator_health.last_loop_duration_ms(), + })) } -async fn get_worker_nodes_from_dht( - kademlia_action_tx: tokio::sync::mpsc::Sender, -) -> Result, anyhow::Error> { - let (kad_action, mut result_rx) = - p2p::KademliaAction::GetProviders(p2p::WORKER_DHT_KEY.as_bytes().to_vec()) - .into_kademlia_action_with_channel(); - if let Err(e) = kademlia_action_tx.send(kad_action).await { - bail!("failed to send Kademlia action: {e}"); - } +async fn get_rejections( + req: HttpRequest, + validator: web::Data>>, +) -> impl Responder { + match validator.as_ref() { + Some(validator) => { + // Parse query parameters + let query = req.query_string(); + let limit = parse_limit_param(query).unwrap_or(100); // Default limit of 100 - info!("🔄 Fetching worker nodes from DHT..."); - let mut workers = HashSet::new(); - while let Some(result) = result_rx.recv().await { - match result { - Ok(res) => { - match res { - p2p::KademliaQueryResult::GetProviders(res) => match res { - Ok(res) => match res { - p2p::KademliaGetProvidersOk::FoundProviders { key: _, providers } => { - workers.extend(providers.into_iter()); - } - _ => {} - }, - Err(e) => { - bail!("failed to get providers from DHT: {e}"); - } - }, - _ => { - // this case should never happen - bail!("unexpected Kademlia query result: {res:?}"); - } + let result = if limit > 0 && limit < 1000 { + // Use the optimized recent rejections method for reasonable limits + validator.get_recent_rejections(limit as isize).await + } else { + // Fallback to all rejections (but warn about potential performance impact) + if limit >= 1000 { + info!("Large limit requested ({limit}), this may impact performance"); } - } - Err(e) => { - bail!("kademlia action failed: {e}"); - } - } - } - - let mut nodes = Vec::new(); - for peer_id in workers { - let record_key = format!("{}:{}", p2p::WORKER_DHT_KEY, peer_id); - let (kad_action, mut result_rx) = - p2p::KademliaAction::GetRecord(record_key.as_bytes().to_vec()) - .into_kademlia_action_with_channel(); - if let Err(e) = kademlia_action_tx.send(kad_action).await { - bail!("failed to send Kademlia action: {e}"); - } + validator.get_all_rejections().await + }; - while let Some(result) = result_rx.recv().await { match result { - Ok(res) => { - match res { - p2p::KademliaQueryResult::GetRecord(res) => match res { - Ok(res) => match res { - p2p::KademliaGetRecordOk::FoundRecord(record) => { - match serde_json::from_slice::( - &record.record.value, - ) { - Ok(node) => { - nodes.push(node); - } - Err(e) => { - warn!("failed to deserialize node record: {e}"); - } - } - } - _ => {} - }, - Err(e) => { - warn!("failed to get record from DHT: {e}"); - } - }, - _ => { - // this case should never happen - bail!("unexpected Kademlia query result: {res:?}"); - } - } - } + Ok(rejections) => HttpResponse::Ok().json(ApiResponse { + success: true, + data: rejections, + }), Err(e) => { - warn!("Kademlia action failed: {e}"); + error!("Failed to get rejections: {e}"); + HttpResponse::InternalServerError().json(ApiResponse { + success: false, + data: format!("Failed to get rejections: {e}"), + }) } } } + None => HttpResponse::ServiceUnavailable().json(ApiResponse { + success: false, + data: "Synthetic data validator not available", + }), } +} - Ok(nodes) +fn parse_limit_param(query: &str) -> Option { + for pair in query.split('&') { + if let Some((key, value)) = pair.split_once('=') { + if key == "limit" { + return value.parse::().ok(); + } + } + } + None } #[cfg(test)] diff --git a/crates/validator/src/validator.rs b/crates/validator/src/validator.rs new file mode 100644 index 00000000..a09680d7 --- /dev/null +++ b/crates/validator/src/validator.rs @@ -0,0 +1,308 @@ +use crate::{HardwareValidator, MetricsContext, SyntheticDataValidator}; +use alloy::primitives::{utils::Unit, Address, U256}; +use anyhow::{bail, Result}; +use futures::stream::FuturesUnordered; +use futures::StreamExt as _; +use log::{error, info, warn}; +use shared::models::node::NodeWithMetadata; +use shared::web3::contracts::core::builder::Contracts; +use shared::web3::wallet::WalletProvider; +use std::collections::HashSet; +use std::str::FromStr as _; +use std::sync::Arc; +use std::time::{Instant, SystemTime, UNIX_EPOCH}; +use tokio::sync::Mutex; + +pub struct ValidatorHealth { + last_validation_timestamp: u64, + last_loop_duration_ms: u64, +} + +impl ValidatorHealth { + fn new() -> Self { + Self { + last_validation_timestamp: 0, + last_loop_duration_ms: 0, + } + } + + fn update(&mut self, timestamp: u64, duration_ms: u64) { + self.last_validation_timestamp = timestamp; + self.last_loop_duration_ms = duration_ms; + } + + pub fn last_validation_timestamp(&self) -> u64 { + self.last_validation_timestamp + } + + pub fn last_loop_duration_ms(&self) -> u64 { + self.last_loop_duration_ms + } +} + +pub struct Validator { + synthetic_validator: Option>, // TOOD: does this need to be optional? + provider: WalletProvider, + contracts: Contracts, + hardware_validator: HardwareValidator, + cancellation_token: tokio_util::sync::CancellationToken, + kademlia_action_tx: tokio::sync::mpsc::Sender, + disable_hardware_validation: bool, + metrics_ctx: MetricsContext, + validator_health: Arc>, +} + +impl Validator { + pub fn new( + cancellation_token: tokio_util::sync::CancellationToken, + provider: WalletProvider, + contracts: Contracts, + hardware_validator: HardwareValidator, + synthetic_validator: Option>, + kademlia_action_tx: tokio::sync::mpsc::Sender, + disable_hardware_validation: bool, + metrics_ctx: MetricsContext, + ) -> Result<(Self, Arc>)> { + if contracts.stake_manager.is_none() { + bail!("stake manager contract not initialized"); + }; + + let validator_health = Arc::new(Mutex::new(ValidatorHealth::new())); + + Ok(( + Self { + cancellation_token, + provider, + contracts, + hardware_validator, + synthetic_validator, + kademlia_action_tx, + disable_hardware_validation, + metrics_ctx, + validator_health: validator_health.clone(), + }, + validator_health, + )) + } + + pub async fn run(self) { + let Self { + cancellation_token, + provider, + contracts, + hardware_validator, + synthetic_validator, + kademlia_action_tx, + disable_hardware_validation, + metrics_ctx, + validator_health, + } = self; + + let stake_manager = contracts + .stake_manager + .as_ref() + .expect("stake manager contract must be initialized"); + + loop { + if cancellation_token.is_cancelled() { + info!("Validation loop is stopping due to cancellation signal"); + break; + } + + // Start timing the loop + let loop_start = Instant::now(); + + // Update the last validation timestamp + let last_validation_timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("current time must be after unix epoch") + .as_secs(); + + if let Some(validator) = synthetic_validator.clone() { + if let Err(e) = validator.validate_work().await { + error!("Failed to validate work: {e}"); + } + } + + if !disable_hardware_validation { + let nodes = get_worker_nodes_from_dht(kademlia_action_tx.clone()) + .await + .unwrap_or_else(|e| { + error!("Failed to fetch nodes from DHT: {e}"); + vec![] + }); + + if nodes.is_empty() { + info!("No worker nodes found in DHT, skipping hardware validation"); + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + continue; + } + + let futures = FuturesUnordered::new(); + for node in nodes { + futures.push(NodeWithMetadata::new_from_contracts( + node, &provider, &contracts, + )); + } + let nodes = futures + .collect::>() + .await + .into_iter() + .filter_map(Result::ok) + .collect::>(); + if nodes.is_empty() { + info!("No valid nodes found for hardware validation"); + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + continue; + } + + // Ensure nodes have enough stake + let mut nodes_with_enough_stake = Vec::new(); + let mut provider_stake_cache: std::collections::HashMap = + std::collections::HashMap::new(); + + for node in nodes { + let provider_address = Address::from_str(&node.node().provider_address).expect("provider address must be valid, as it was checked in `NodeWithMetadata::new`"); + + let (stake, required_stake) = + if let Some(&cached_info) = provider_stake_cache.get(&provider_address) { + cached_info + } else { + let stake = stake_manager + .get_stake(provider_address) + .await + .unwrap_or_default(); + let total_compute = contracts + .compute_registry + .get_provider_total_compute(provider_address) + .await + .unwrap_or_default(); + let required_stake = stake_manager + .calculate_stake(U256::from(0), total_compute) + .await + .unwrap_or_default(); + + provider_stake_cache.insert(provider_address, (stake, required_stake)); + (stake, required_stake) + }; + + if stake >= required_stake { + nodes_with_enough_stake.push(node); + } else { + info!( + "Node {} has insufficient stake: {} (required: {})", + node.node().id, + stake / Unit::ETHER.wei(), + required_stake / Unit::ETHER.wei() + ); + } + } + + if let Err(e) = hardware_validator + .validate_nodes(nodes_with_enough_stake) + .await + { + error!("Error validating nodes: {e:#}"); + } + } + + // Calculate and store loop duration + let last_loop_duration_ms = loop_start.elapsed().as_millis(); + metrics_ctx.record_validation_loop_duration(loop_start.elapsed().as_secs_f64()); + info!("Validation loop completed in {last_loop_duration_ms}ms"); + + let mut validator_health = validator_health.lock().await; + validator_health.update(last_validation_timestamp, last_loop_duration_ms as u64); + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + } + } +} + +async fn get_worker_nodes_from_dht( + kademlia_action_tx: tokio::sync::mpsc::Sender, +) -> Result, anyhow::Error> { + let (kad_action, mut result_rx) = + p2p::KademliaAction::GetProviders(p2p::WORKER_DHT_KEY.as_bytes().to_vec()) + .into_kademlia_action_with_channel(); + if let Err(e) = kademlia_action_tx.send(kad_action).await { + bail!("failed to send Kademlia action: {e}"); + } + + info!("🔄 Fetching worker nodes from DHT..."); + let mut workers = HashSet::new(); + while let Some(result) = result_rx.recv().await { + match result { + Ok(res) => { + match res { + p2p::KademliaQueryResult::GetProviders(res) => match res { + Ok(res) => match res { + p2p::KademliaGetProvidersOk::FoundProviders { key: _, providers } => { + workers.extend(providers.into_iter()); + } + _ => {} + }, + Err(e) => { + bail!("failed to get providers from DHT: {e}"); + } + }, + _ => { + // this case should never happen + bail!("unexpected Kademlia query result: {res:?}"); + } + } + } + Err(e) => { + bail!("kademlia action failed: {e}"); + } + } + } + + let mut nodes = Vec::new(); + for peer_id in workers { + let record_key = format!("{}:{}", p2p::WORKER_DHT_KEY, peer_id); + let (kad_action, mut result_rx) = + p2p::KademliaAction::GetRecord(record_key.as_bytes().to_vec()) + .into_kademlia_action_with_channel(); + if let Err(e) = kademlia_action_tx.send(kad_action).await { + bail!("failed to send Kademlia action: {e}"); + } + + while let Some(result) = result_rx.recv().await { + match result { + Ok(res) => { + match res { + p2p::KademliaQueryResult::GetRecord(res) => match res { + Ok(res) => match res { + p2p::KademliaGetRecordOk::FoundRecord(record) => { + match serde_json::from_slice::( + &record.record.value, + ) { + Ok(node) => { + nodes.push(node); + } + Err(e) => { + warn!("failed to deserialize node record: {e}"); + } + } + } + _ => {} + }, + Err(e) => { + warn!("failed to get record from DHT: {e}"); + } + }, + _ => { + // this case should never happen + bail!("unexpected Kademlia query result: {res:?}"); + } + } + } + Err(e) => { + warn!("Kademlia action failed: {e}"); + } + } + } + } + + Ok(nodes) +} diff --git a/crates/validator/src/validators/hardware.rs b/crates/validator/src/validators/hardware.rs index 877861da..8bdc8e30 100644 --- a/crates/validator/src/validators/hardware.rs +++ b/crates/validator/src/validators/hardware.rs @@ -1,9 +1,9 @@ use alloy::primitives::Address; -use anyhow::bail; -use anyhow::Result; +use anyhow::{bail, Context as _, Result}; use log::{debug, error, info}; use shared::{ - models::node::DiscoveryNode, + models::node::Node, + models::node::NodeWithMetadata, web3::{contracts::core::builder::Contracts, wallet::WalletProvider}, }; @@ -31,43 +31,32 @@ impl HardwareValidator { } } - async fn validate_node(&self, node: DiscoveryNode) -> Result<()> { - let node_address = match node.id.trim_start_matches("0x").parse::
() { - Ok(addr) => addr, - Err(e) => { - bail!("failed to parse node address: {e:?}"); - } - }; - - let provider_address = match node + async fn validate_node(&self, node: &Node) -> Result<()> { + let node_address = node + .id + .trim_start_matches("0x") + .parse::
() + .context("failed to parse node address")?; + let provider_address = node .provider_address .trim_start_matches("0x") .parse::
() - { - Ok(addr) => addr, - Err(e) => { - bail!("failed to parse provider address: {e:?}"); - } - }; + .context("failed to parse provider address")?; - // Perform hardware challenge let hardware_challenge = HardwareChallenge::new(self.challenge_tx.clone()); - let challenge_result = hardware_challenge.challenge_node(&node).await; + let challenge_result = hardware_challenge.challenge_node(node).await; if let Err(e) = challenge_result { bail!("failed to challenge node: {e:?}"); } - debug!("Sending validation transaction for node {}", node.id); - if let Err(e) = self .contracts .prime_network .validate_node(provider_address, node_address) .await { - error!("Failed to validate node: {e}"); - return Err(anyhow::anyhow!("Failed to validate node: {}", e)); + bail!("failed to validate node: {e}"); } // Small delay to ensure nonce incrementation @@ -77,20 +66,17 @@ impl HardwareValidator { Ok(()) } - pub async fn validate_nodes(&self, nodes: Vec) -> Result<()> { - let non_validated: Vec<_> = nodes.into_iter().filter(|n| !n.is_validated).collect(); + pub(crate) async fn validate_nodes(&self, nodes: Vec) -> Result<()> { + let non_validated: Vec<_> = nodes.into_iter().filter(|n| !n.is_validated()).collect(); debug!("Non validated nodes: {non_validated:?}"); info!("Starting validation for {} nodes", non_validated.len()); // Process non validated nodes sequentially as simple fix // to avoid nonce conflicts for now. Will sophisticate this in the future for node in non_validated { - let node_id = node.id.clone(); - match self.validate_node(node).await { - Ok(_) => (), - Err(e) => { - error!("Failed to validate node {node_id}: {e}"); - } + let node_id = node.node().id.clone(); + if let Err(e) = self.validate_node(node.node()).await { + error!("Failed to validate node {node_id}: {e}"); } } Ok(()) @@ -100,7 +86,8 @@ impl HardwareValidator { #[cfg(test)] mod tests { use super::*; - use shared::models::node::Node; + use alloy::primitives::U256; + use shared::models::node::{Node, NodeWithMetadata}; use shared::web3::contracts::core::builder::ContractBuilder; use shared::web3::wallet::Wallet; use std::sync::Arc; @@ -123,9 +110,8 @@ mod tests { let (tx, _rx) = tokio::sync::mpsc::channel(100); let validator = HardwareValidator::new(contracts, tx); - let fake_discovery_node1 = DiscoveryNode { - is_validated: false, - node: Node { + let node1 = NodeWithMetadata::new( + Node { ip_address: "192.168.1.1".to_string(), port: 8080, compute_pool_id: 1, @@ -133,15 +119,18 @@ mod tests { provider_address: Address::ZERO.to_string(), ..Default::default() }, - is_active: true, - is_provider_whitelisted: true, - is_blacklisted: false, - ..Default::default() - }; - - let fake_discovery_node2 = DiscoveryNode { - is_validated: false, - node: Node { + false, // is_validated + true, // is_active + true, // is_provider_whitelisted + false, // is_blacklisted + U256::ZERO, + None, + None, + None, + ); + + let node2 = NodeWithMetadata::new( + Node { ip_address: "192.168.1.2".to_string(), port: 8080, compute_pool_id: 1, @@ -149,20 +138,22 @@ mod tests { provider_address: Address::ZERO.to_string(), ..Default::default() }, - is_active: true, - is_provider_whitelisted: true, - is_blacklisted: false, - ..Default::default() - }; - - let nodes = vec![fake_discovery_node1, fake_discovery_node2]; + false, // is_validated + true, // is_active + true, // is_provider_whitelisted + false, // is_blacklisted + U256::ZERO, + None, + None, + None, + ); + + let nodes = vec![node1, node2]; let start_time = std::time::Instant::now(); let result = validator.validate_nodes(nodes).await; let elapsed = start_time.elapsed(); assert!(elapsed < std::time::Duration::from_secs(11)); - println!("Validation took: {:?}", elapsed); - assert!(result.is_ok()); } } diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index 6970355d..6a57676b 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -2,7 +2,7 @@ use alloy::primitives::Address; use anyhow::{bail, Context as _, Result}; use log::{error, info}; use rand::{rng, Rng}; -use shared::models::node::DiscoveryNode; +use shared::models::node::Node; use std::str::FromStr; use crate::p2p::HardwareChallengeRequest; @@ -16,16 +16,14 @@ impl HardwareChallenge { Self { challenge_tx } } - pub(crate) async fn challenge_node(&self, node: &DiscoveryNode) -> Result<()> { + pub(crate) async fn challenge_node(&self, node: &Node) -> Result<()> { // Check if node has P2P ID and addresses let p2p_id = node - .node .worker_p2p_id .clone() .ok_or_else(|| anyhow::anyhow!("Node {} does not have P2P ID", node.id))?; let p2p_addresses = node - .node .worker_p2p_addresses .clone() .ok_or_else(|| anyhow::anyhow!("Node {} does not have P2P addresses", node.id))?; @@ -42,9 +40,7 @@ impl HardwareChallenge { let mut challenge_with_timestamp = challenge_matrix.clone(); challenge_with_timestamp.timestamp = Some(current_time); - let node_address = Address::from_str(&node.node.id) - .map_err(|e| anyhow::anyhow!("Failed to parse node address {}: {}", node.node.id, e))?; - + let node_address = Address::from_str(&node.id).context("failed to parse node address")?; let (response_tx, response_rx) = tokio::sync::oneshot::channel(); let hardware_challenge = HardwareChallengeRequest { worker_wallet_address: node_address, From 7e0493db91dc276961ca2fa8abb1b25cb5a923de Mon Sep 17 00:00:00 2001 From: elizabeth Date: Tue, 15 Jul 2025 13:14:33 -0400 Subject: [PATCH 03/14] clean up validation loop --- crates/p2p/src/behaviour.rs | 2 + crates/validator/src/validator.rs | 226 +++++++++++--------- crates/validator/src/validators/hardware.rs | 1 + 3 files changed, 132 insertions(+), 97 deletions(-) diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index c458f1cb..021c0176 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -163,6 +163,8 @@ impl BehaviourEvent { BehaviourEvent::Identify(_event) => {} BehaviourEvent::Kademlia(event) => { match event { + // TODO: also handle InboundRequest::AddProvider and InboundRequest::PutRecord, + // as these are new workers joining the network kad::Event::OutboundQueryProgressed { id, result, diff --git a/crates/validator/src/validator.rs b/crates/validator/src/validator.rs index a09680d7..2325e2d2 100644 --- a/crates/validator/src/validator.rs +++ b/crates/validator/src/validator.rs @@ -1,6 +1,6 @@ use crate::{HardwareValidator, MetricsContext, SyntheticDataValidator}; use alloy::primitives::{utils::Unit, Address, U256}; -use anyhow::{bail, Result}; +use anyhow::{bail, Context as _, Result}; use futures::stream::FuturesUnordered; use futures::StreamExt as _; use log::{error, info, warn}; @@ -104,118 +104,150 @@ impl Validator { .expect("stake manager contract must be initialized"); loop { - if cancellation_token.is_cancelled() { - info!("Validation loop is stopping due to cancellation signal"); - break; + let sleep = tokio::time::sleep(std::time::Duration::from_secs(5)); + tokio::select! { + _ = cancellation_token.cancelled() => { + info!("Validator is stopping due to cancellation signal"); + break; + } + _ = sleep => { + info!("Validator is starting validation loop"); + if let Err(e) = perform_validation( + synthetic_validator.clone(), + provider.clone(), + contracts.clone(), + hardware_validator.clone(), + stake_manager.clone(), + kademlia_action_tx.clone(), + disable_hardware_validation, + metrics_ctx.clone(), + validator_health.clone(), + ).await { + error!("Validation loop failed: {e:#}"); + } else { + info!("Validation loop completed successfully"); + } + } } + } + } +} - // Start timing the loop - let loop_start = Instant::now(); - - // Update the last validation timestamp - let last_validation_timestamp = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("current time must be after unix epoch") - .as_secs(); +async fn perform_validation( + synthetic_validator: Option>, + provider: WalletProvider, + contracts: Contracts, + hardware_validator: HardwareValidator, + stake_manager: shared::web3::contracts::implementations::stake_manager::StakeManagerContract< + WalletProvider, + >, + kademlia_action_tx: tokio::sync::mpsc::Sender, + disable_hardware_validation: bool, + metrics_ctx: MetricsContext, + validator_health: Arc>, +) -> Result<()> { + // Start timing the loop + let loop_start = Instant::now(); - if let Some(validator) = synthetic_validator.clone() { - if let Err(e) = validator.validate_work().await { - error!("Failed to validate work: {e}"); - } - } + // Update the last validation timestamp + let last_validation_timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("current time must be after unix epoch") + .as_secs(); - if !disable_hardware_validation { - let nodes = get_worker_nodes_from_dht(kademlia_action_tx.clone()) - .await - .unwrap_or_else(|e| { - error!("Failed to fetch nodes from DHT: {e}"); - vec![] - }); + if let Some(validator) = synthetic_validator.clone() { + if let Err(e) = validator.validate_work().await { + error!("Failed to validate work: {e}"); + } + } - if nodes.is_empty() { - info!("No worker nodes found in DHT, skipping hardware validation"); - tokio::time::sleep(std::time::Duration::from_secs(5)).await; - continue; - } + if !disable_hardware_validation { + let nodes = get_worker_nodes_from_dht(kademlia_action_tx.clone()) + .await + .context("failed to fetch nodes from DHT")?; - let futures = FuturesUnordered::new(); - for node in nodes { - futures.push(NodeWithMetadata::new_from_contracts( - node, &provider, &contracts, - )); - } - let nodes = futures - .collect::>() - .await - .into_iter() - .filter_map(Result::ok) - .collect::>(); - if nodes.is_empty() { - info!("No valid nodes found for hardware validation"); - tokio::time::sleep(std::time::Duration::from_secs(5)).await; - continue; - } + if nodes.is_empty() { + info!("No worker nodes found in DHT, skipping hardware validation"); + return Ok(()); + } - // Ensure nodes have enough stake - let mut nodes_with_enough_stake = Vec::new(); - let mut provider_stake_cache: std::collections::HashMap = - std::collections::HashMap::new(); + let futures = FuturesUnordered::new(); + for node in nodes { + futures.push(NodeWithMetadata::new_from_contracts( + node, &provider, &contracts, + )); + } + let nodes = futures + .collect::>() + .await + .into_iter() + .filter_map(Result::ok) + .collect::>(); + if nodes.is_empty() { + info!("No valid nodes found for hardware validation"); + return Ok(()); + } - for node in nodes { - let provider_address = Address::from_str(&node.node().provider_address).expect("provider address must be valid, as it was checked in `NodeWithMetadata::new`"); + // Ensure nodes have enough stake + let mut nodes_with_enough_stake = Vec::new(); + let mut provider_stake_cache: std::collections::HashMap = + std::collections::HashMap::new(); - let (stake, required_stake) = - if let Some(&cached_info) = provider_stake_cache.get(&provider_address) { - cached_info - } else { - let stake = stake_manager - .get_stake(provider_address) - .await - .unwrap_or_default(); - let total_compute = contracts - .compute_registry - .get_provider_total_compute(provider_address) - .await - .unwrap_or_default(); - let required_stake = stake_manager - .calculate_stake(U256::from(0), total_compute) - .await - .unwrap_or_default(); + for node in nodes { + let provider_address = Address::from_str(&node.node().provider_address).expect( + "provider address must be valid, as it was checked in `NodeWithMetadata::new`", + ); - provider_stake_cache.insert(provider_address, (stake, required_stake)); - (stake, required_stake) - }; + let (stake, required_stake) = + if let Some(&cached_info) = provider_stake_cache.get(&provider_address) { + cached_info + } else { + let stake = stake_manager + .get_stake(provider_address) + .await + .unwrap_or_default(); + let total_compute = contracts + .compute_registry + .get_provider_total_compute(provider_address) + .await + .unwrap_or_default(); + let required_stake = stake_manager + .calculate_stake(U256::from(0), total_compute) + .await + .unwrap_or_default(); - if stake >= required_stake { - nodes_with_enough_stake.push(node); - } else { - info!( - "Node {} has insufficient stake: {} (required: {})", - node.node().id, - stake / Unit::ETHER.wei(), - required_stake / Unit::ETHER.wei() - ); - } - } + provider_stake_cache.insert(provider_address, (stake, required_stake)); + (stake, required_stake) + }; - if let Err(e) = hardware_validator - .validate_nodes(nodes_with_enough_stake) - .await - { - error!("Error validating nodes: {e:#}"); - } + if stake >= required_stake { + nodes_with_enough_stake.push(node); + } else { + info!( + "Node {} has insufficient stake: {} (required: {})", + node.node().id, + stake / Unit::ETHER.wei(), + required_stake / Unit::ETHER.wei() + ); } + } - // Calculate and store loop duration - let last_loop_duration_ms = loop_start.elapsed().as_millis(); - metrics_ctx.record_validation_loop_duration(loop_start.elapsed().as_secs_f64()); - info!("Validation loop completed in {last_loop_duration_ms}ms"); - - let mut validator_health = validator_health.lock().await; - validator_health.update(last_validation_timestamp, last_loop_duration_ms as u64); - tokio::time::sleep(std::time::Duration::from_secs(5)).await; + if let Err(e) = hardware_validator + .validate_nodes(nodes_with_enough_stake) + .await + { + error!("Error validating nodes: {e:#}"); } } + + // Calculate and store loop duration + let last_loop_duration_ms = loop_start.elapsed().as_millis(); + metrics_ctx.record_validation_loop_duration(loop_start.elapsed().as_secs_f64()); + info!("Validation loop completed in {last_loop_duration_ms}ms"); + + let mut validator_health = validator_health.lock().await; + validator_health.update(last_validation_timestamp, last_loop_duration_ms as u64); + Ok(()) } async fn get_worker_nodes_from_dht( diff --git a/crates/validator/src/validators/hardware.rs b/crates/validator/src/validators/hardware.rs index 8bdc8e30..08adcf42 100644 --- a/crates/validator/src/validators/hardware.rs +++ b/crates/validator/src/validators/hardware.rs @@ -15,6 +15,7 @@ use crate::validators::hardware_challenge::HardwareChallenge; /// NOTE: This is a temporary implementation that will be replaced with a proper /// hardware validator in the near future. The current implementation only performs /// basic matrix multiplication challenges and does not verify actual hardware specs. +#[derive(Clone)] pub struct HardwareValidator { contracts: Contracts, challenge_tx: tokio::sync::mpsc::Sender, From 1f2374c0c50fc8f2dce97a5b9f8612dfa0c2c7ed Mon Sep 17 00:00:00 2001 From: elizabeth Date: Tue, 15 Jul 2025 18:30:18 -0400 Subject: [PATCH 04/14] wip put record --- Makefile | 6 +-- crates/p2p/src/behaviour.rs | 30 ++++++++++--- crates/p2p/src/lib.rs | 70 ++++++++++++++++++++++++------- crates/validator/src/main.rs | 7 ++-- crates/validator/src/validator.rs | 8 ++-- crates/worker/src/cli/command.rs | 52 ++--------------------- 6 files changed, 95 insertions(+), 78 deletions(-) diff --git a/Makefile b/Makefile index b75ea346..1e3fb9b8 100644 --- a/Makefile +++ b/Makefile @@ -121,18 +121,18 @@ watch-discovery: watch-worker: set -a; source ${ENV_FILE}; set +a; \ - cargo watch -w crates/worker/src -x "run --bin worker -- run --bootnodes $${ORCHESTRATOR_P2P_ADDRESS} --port 8091 --discovery-url $${DISCOVERY_URLS:-$${DISCOVERY_URL:-http://localhost:8089}} --compute-pool-id $$WORKER_COMPUTE_POOL_ID --skip-system-checks $${LOKI_URL:+--loki-url $${LOKI_URL}} --log-level $${LOG_LEVEL:-info}" + cargo watch -w crates/worker/src -x "run --bin worker -- run --bootnodes $${ORCHESTRATOR_P2P_ADDRESS} --port 8091 --compute-pool-id $$WORKER_COMPUTE_POOL_ID --skip-system-checks $${LOKI_URL:+--loki-url $${LOKI_URL}} --log-level $${LOG_LEVEL:-info}" watch-worker-two: set -a; source ${ENV_FILE}; set +a; \ - cargo watch -w crates/worker/src -x "run --bin worker -- run --port 8092 --discovery-url $${DISCOVERY_URLS:-$${DISCOVERY_URL:-http://localhost:8089}} --private-key-node $${PRIVATE_KEY_NODE_2} --private-key-provider $${PRIVATE_KEY_PROVIDER} --compute-pool-id $$WORKER_COMPUTE_POOL_ID --skip-system-checks $${LOKI_URL:+--loki-url $${LOKI_URL}} --log-level $${LOG_LEVEL:-info} --disable-state-storing --no-auto-recover" + cargo watch -w crates/worker/src -x "run --bin worker -- run --port 8092 --private-key-node $${PRIVATE_KEY_NODE_2} --private-key-provider $${PRIVATE_KEY_PROVIDER} --compute-pool-id $$WORKER_COMPUTE_POOL_ID --skip-system-checks $${LOKI_URL:+--loki-url $${LOKI_URL}} --log-level $${LOG_LEVEL:-info} --disable-state-storing --no-auto-recover" watch-check: cargo watch -w crates/worker/src -x "run --bin worker -- check" watch-validator: set -a; source ${ENV_FILE}; set +a; \ - cargo watch -w crates/validator/src -x "run --bin validator -- --bootnodes $${ORCHESTRATOR_P2P_ADDRESS} --validator-key $${PRIVATE_KEY_VALIDATOR} --rpc-url $${RPC_URL} --discovery-urls $${DISCOVERY_URLS:-$${DISCOVERY_URL:-http://localhost:8089}} --pool-id $${WORKER_COMPUTE_POOL_ID} $${BUCKET_NAME:+--bucket-name $${BUCKET_NAME}} -l $${LOG_LEVEL:-info} --toploc-grace-interval $${TOPLOC_GRACE_INTERVAL:-30} --incomplete-group-grace-period-minutes $${INCOMPLETE_GROUP_GRACE_PERIOD_MINUTES:-1} --use-grouping" + cargo watch -w crates/validator/src -x "run --bin validator -- --bootnodes $${ORCHESTRATOR_P2P_ADDRESS} --validator-key $${PRIVATE_KEY_VALIDATOR} --rpc-url $${RPC_URL} --pool-id $${WORKER_COMPUTE_POOL_ID} $${BUCKET_NAME:+--bucket-name $${BUCKET_NAME}} -l $${LOG_LEVEL:-info} --toploc-grace-interval $${TOPLOC_GRACE_INTERVAL:-30} --incomplete-group-grace-period-minutes $${INCOMPLETE_GROUP_GRACE_PERIOD_MINUTES:-1} --use-grouping" watch-orchestrator: set -a; source ${ENV_FILE}; set +a; \ diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index 021c0176..14deaceb 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -11,10 +11,13 @@ use libp2p::mdns; use libp2p::ping; use libp2p::request_response; use libp2p::swarm::NetworkBehaviour; -use log::debug; +use libp2p::Multiaddr; +use libp2p::PeerId; +use log::{debug, info}; use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; +use std::vec; use tokio::sync::Mutex; use crate::discovery::OngoingKademliaQuery; @@ -119,7 +122,12 @@ impl Behaviour { let mdns = mdns::tokio::Behaviour::new(mdns::Config::default(), peer_id) .context("failed to create mDNS behaviour")?; - let kademlia = kad::Behaviour::new(peer_id, MemoryStore::new(peer_id)); + let mut kad_config = kad::Config::new(kad::PROTOCOL_NAME); + // TODO: by default this is 20, however on a local test network we won't have 20 nodes. + kad_config + .set_replication_factor(1usize.try_into().expect("can convert 1 to NonZeroUsize")); + kad_config.set_provider_publication_interval(Some(Duration::from_secs(10))); + let kademlia = kad::Behaviour::with_config(peer_id, MemoryStore::new(peer_id), kad_config); let identify = identify::Behaviour::new( identify::Config::new(PRIME_STREAM_PROTOCOL.to_string(), keypair.public()) @@ -157,12 +165,17 @@ impl BehaviourEvent { self, message_tx: tokio::sync::mpsc::Sender, ongoing_kademlia_queries: Arc>>, - ) { + ) -> Vec<(PeerId, Multiaddr)> { match self { BehaviourEvent::Autonat(_event) => {} BehaviourEvent::Identify(_event) => {} BehaviourEvent::Kademlia(event) => { match event { + kad::Event::RoutingUpdated { + peer, addresses, .. + } => { + log::info!("kademlia routing updated for peer {peer:?} with addresses {addresses:?}"); + } // TODO: also handle InboundRequest::AddProvider and InboundRequest::PutRecord, // as these are new workers joining the network kad::Event::OutboundQueryProgressed { @@ -171,7 +184,7 @@ impl BehaviourEvent { stats: _, step, } => { - debug!("kademlia query {id:?} progressed with step {step:?} and result {result:?}"); + info!("kademlia query {id:?} progressed with step {step:?} and result {result:?}"); let mut ongoing_queries = ongoing_kademlia_queries.lock().await; if let Some(query) = ongoing_queries.get_mut(&id) { @@ -185,7 +198,12 @@ impl BehaviourEvent { _ => {} } } - BehaviourEvent::Mdns(_event) => {} + BehaviourEvent::Mdns(event) => match event { + mdns::Event::Discovered(peers) => { + return peers; + } + _ => {} + }, BehaviourEvent::Ping(_event) => {} BehaviourEvent::RequestResponse(event) => match event { request_response::Event::Message { peer, message } => { @@ -217,5 +235,7 @@ impl BehaviourEvent { } }, } + + vec![] } } diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 92b78ad3..db63368d 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -8,8 +8,7 @@ use libp2p::yamux; use libp2p::Swarm; use libp2p::SwarmBuilder; use libp2p::{identity, Transport}; -use log::debug; -use log::warn; +use log::{debug, info, warn}; use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; @@ -108,30 +107,68 @@ impl Node { continue; }; + swarm + .behaviour_mut() + .kademlia() + .add_address(&peer_id, multiaddr.clone()); + + log::info!("dialing bootnode {peer_id} at {multiaddr}"); + match swarm.dial(multiaddr.clone()) { - Ok(()) => { - swarm - .behaviour_mut() - .kademlia() - .add_address(&peer_id, multiaddr); - } + Ok(()) => {} Err(e) => { debug!("failed to dial bootnode {multiaddr}: {e:?}"); } } } + // this will only error if we have no known peers + let (bootstrap_result_tx, mut bootstrap_result_rx) = tokio::sync::mpsc::channel(100); + match swarm.behaviour_mut().kademlia().bootstrap() { + Ok(query_id) => { + let mut ongoing_kademlia_queries = ongoing_kademlia_queries.lock().await; + ongoing_kademlia_queries.insert( + query_id, + OngoingKademliaQuery { + result_tx: bootstrap_result_tx, + }, + ); + } + Err(e) => { + warn!("failed to bootstrap kademlia: {e:?}"); + } + }; + + let connected_peers_check_interval = Duration::from_secs(5); + loop { + let sleep = tokio::time::sleep(connected_peers_check_interval); tokio::select! { biased; + _ = sleep => { + let peer_count = swarm.connected_peers().count(); + info!("connected peers: {peer_count}"); + } _ = cancellation_token.cancelled() => { debug!("cancellation token triggered, shutting down node"); break Ok(()); } + Some(res) = bootstrap_result_rx.recv() => { + match res { + Ok(libp2p::kad::QueryResult::Bootstrap(_)) => { + log::info!("kademlia bootstrap progressed successfully"); + } + Ok(res) => { + warn!("kademlia bootstrap query returned unexpected result: {res:?}"); + } + Err(e) => { + warn!("kademlia bootstrap query failed: {e:?}"); + } + } + } Some(message) = outgoing_message_rx.recv() => { match message { OutgoingMessage::Request((peer, addrs, request)) => { - // TODO: if we're not connected to the peer, we should dial it for addr in addrs { swarm.add_peer_address(peer, addr); } @@ -166,7 +203,7 @@ impl Node { peer_id, .. } => { - debug!("connection established with peer {peer_id}"); + log::info!("connection established with peer {peer_id}"); } SwarmEvent::ConnectionClosed { peer_id, @@ -175,7 +212,14 @@ impl Node { } => { debug!("connection closed with peer {peer_id}: {cause:?}"); } - SwarmEvent::Behaviour(event) => event.handle(incoming_message_tx.clone(), ongoing_kademlia_queries.clone()).await, + SwarmEvent::Behaviour(event) => { + let discovered_peers = event.handle(incoming_message_tx.clone(), ongoing_kademlia_queries.clone()).await; + for (peer_id, addr) in discovered_peers { + log::info!("discovered peer {peer_id} at {addr}"); + swarm.add_peer_address(peer_id, addr.clone()); + swarm.behaviour_mut().kademlia().add_address(&peer_id, addr.clone()); + } + } _ => continue, } }, @@ -311,10 +355,6 @@ impl NodeBuilder { } = self; let keypair = keypair.unwrap_or(identity::Keypair::generate_ed25519()); - println!( - "keypair: {}", - hex::encode(keypair.clone().try_into_ed25519().unwrap().to_bytes()) - ); let peer_id = keypair.public().to_peer_id(); let transport = create_transport(&keypair)?; diff --git a/crates/validator/src/main.rs b/crates/validator/src/main.rs index b02d870f..1c1c9b9c 100644 --- a/crates/validator/src/main.rs +++ b/crates/validator/src/main.rs @@ -32,10 +32,9 @@ struct Args { #[arg(short = 'k', long)] validator_key: String, - /// Discovery URLs (comma-separated) - #[arg(long, default_value = "http://localhost:8089", value_delimiter = ',')] - discovery_urls: Vec, - + // /// Discovery URLs (comma-separated) + // #[arg(long, default_value = "http://localhost:8089", value_delimiter = ',')] + // discovery_urls: Vec, /// Ability to disable hardware validation #[arg(long, default_value = "false")] disable_hardware_validation: bool, diff --git a/crates/validator/src/validator.rs b/crates/validator/src/validator.rs index 2325e2d2..7dd8b48f 100644 --- a/crates/validator/src/validator.rs +++ b/crates/validator/src/validator.rs @@ -103,8 +103,10 @@ impl Validator { .as_ref() .expect("stake manager contract must be initialized"); + let sleep_duration = std::time::Duration::from_secs(5); + loop { - let sleep = tokio::time::sleep(std::time::Duration::from_secs(5)); + let sleep = tokio::time::sleep(sleep_duration); tokio::select! { _ = cancellation_token.cancelled() => { info!("Validator is stopping due to cancellation signal"); @@ -124,8 +126,6 @@ impl Validator { validator_health.clone(), ).await { error!("Validation loop failed: {e:#}"); - } else { - info!("Validation loop completed successfully"); } } } @@ -289,6 +289,8 @@ async fn get_worker_nodes_from_dht( } } + info!("got {} worker nodes from DHT", workers.len()); + let mut nodes = Vec::new(); for peer_id in workers { let record_key = format!("{}:{}", p2p::WORKER_DHT_KEY, peer_id); diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index e8062adf..9b036465 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -786,6 +786,10 @@ pub async fn execute_command( Console::success(&format!("P2P service started with ID: {peer_id}")); + // TODO: sleep so that dht is bootstrapped before publishing; + // should update p2p service to expose this. + tokio::time::sleep(Duration::from_secs(30)).await; + let record_key = format!("{}:{}", p2p::WORKER_DHT_KEY, peer_id); let (kad_action, mut result_rx) = KademliaAction::PutRecord { key: record_key.as_bytes().to_vec(), @@ -807,7 +811,6 @@ pub async fn execute_command( } Err(e) => { error!("❌ Failed to put record in DHT: {e}"); - std::process::exit(1); } }, _ => { @@ -861,53 +864,6 @@ pub async fn execute_command( } } - // let mut attempts = 0; - // let max_attempts = 100; - // while attempts < max_attempts { - // Console::title("📦 Uploading discovery info"); - // match discovery_service.upload_discovery_info(&node_config).await { - // Ok(_) => break, - // Err(e) => { - // attempts += 1; - // let error_msg = e.to_string(); - - // // Check if this is a Cloudflare block - // if error_msg.contains("403 Forbidden") - // && (error_msg.contains("Cloudflare") - // || error_msg.contains("Sorry, you have been blocked") - // || error_msg.contains("Attention Required!")) - // { - // error!( - // "Attempt {attempts}: ❌ Discovery service blocked by Cloudflare protection. This may indicate:" - // ); - // error!(" • Your IP address has been flagged by Cloudflare security"); - // error!(" • Too many requests from your location"); - // error!(" • Network configuration issues"); - // error!(" • Discovery service may be under DDoS protection"); - // error!( - // "Please contact support or try from a different network/IP address" - // ); - // } else { - // error!("Attempt {attempts}: ❌ Failed to upload discovery info: {e}"); - // } - - // if attempts >= max_attempts { - // if error_msg.contains("403 Forbidden") - // && (error_msg.contains("Cloudflare") - // || error_msg.contains("Sorry, you have been blocked")) - // { - // error!("❌ Unable to reach discovery service due to Cloudflare blocking after {max_attempts} attempts"); - // error!("This is likely a network/IP issue rather than a worker configuration problem"); - // } - // std::process::exit(1); - // } - // } - // } - // tokio::time::sleep(tokio::time::Duration::from_secs(10)).await; - // } - - Console::success("Discovery info uploaded"); - Console::section("Starting Worker with Task Bridge"); // Start monitoring compute node status on chain From 990d70cbd9447de5cee4f63fd4618ddde4490351 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Tue, 15 Jul 2025 20:45:47 -0400 Subject: [PATCH 05/14] impl basic bootnode; set dht to server, provider discovery working --- Cargo.lock | 14 ++++ Cargo.toml | 3 +- Makefile | 52 +++++++++------ crates/bootnode/Cargo.toml | 18 +++++ crates/bootnode/src/main.rs | 97 +++++++++++++++++++++++++++ crates/orchestrator/src/main.rs | 101 ++++++++++++++++------------- crates/orchestrator/src/p2p/mod.rs | 5 +- crates/p2p/src/behaviour.rs | 23 +++++-- crates/p2p/src/discovery.rs | 5 ++ crates/p2p/src/lib.rs | 82 +++++++++++++++++++++-- crates/validator/src/main.rs | 5 -- crates/validator/src/validator.rs | 6 +- crates/worker/src/cli/command.rs | 4 +- crates/worker/src/lib.rs | 2 +- crates/worker/src/utils/logging.rs | 2 - 15 files changed, 328 insertions(+), 91 deletions(-) create mode 100644 crates/bootnode/Cargo.toml create mode 100644 crates/bootnode/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 998525cd..cbddf9ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1723,6 +1723,20 @@ dependencies = [ "serde_with", ] +[[package]] +name = "bootnode" +version = "0.3.11" +dependencies = [ + "anyhow", + "clap", + "hex", + "p2p", + "tokio", + "tokio-util", + "tracing", + "tracing-subscriber", +] + [[package]] name = "bounded-integer" version = "0.5.8" diff --git a/Cargo.toml b/Cargo.toml index 1bc9e2ac..0aed3ebf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,8 @@ members = [ "crates/shared", "crates/orchestrator", "crates/p2p", - "crates/dev-utils", + "crates/dev-utils", + "crates/bootnode", ] resolver = "2" diff --git a/Makefile b/Makefile index 1e3fb9b8..9e8887e3 100644 --- a/Makefile +++ b/Makefile @@ -76,19 +76,19 @@ up: @tmux set -t prime-dev pane-border-format " #{pane_title} " @# Start Worker pane first (pane 0) @tmux select-pane -t prime-dev:services.0 -T "Worker" - # @# Discovery pane (pane 1) - # @tmux split-window -h -t prime-dev:services - # @tmux select-pane -t prime-dev:services.1 -T "Discovery" - # @tmux send-keys -t prime-dev:services.1 'make watch-discovery' C-m - @# Orchestrator pane (pane 1) + @# Bootnode pane (pane 1) @tmux split-window -h -t prime-dev:services - @tmux select-pane -t prime-dev:services.1 -T "Orchestrator" - @tmux send-keys -t prime-dev:services.1 'make watch-orchestrator' C-m - @tmux select-layout -t prime-dev:services even-horizontal - @# Validator pane (pane 2) + @tmux select-pane -t prime-dev:services.1 -T "Bootnode" + @tmux send-keys -t prime-dev:services.1 'make run-bootnode' C-m + @# Orchestrator pane (pane 2) @tmux split-window -h -t prime-dev:services.1 - @tmux select-pane -t prime-dev:services.2 -T "Validator" - @tmux send-keys -t prime-dev:services.2 'make watch-validator' C-m + @tmux select-pane -t prime-dev:services.2 -T "Orchestrator" + @tmux send-keys -t prime-dev:services.2 'make watch-orchestrator' C-m + @tmux select-layout -t prime-dev:services even-horizontal + @# Validator pane (pane 3) + @tmux split-window -h -t prime-dev:services.2 + @tmux select-pane -t prime-dev:services.3 -T "Validator" + @tmux send-keys -t prime-dev:services.3 'make watch-validator' C-m @# Create background window for docker logs @tmux new-window -t prime-dev -n background @tmux send-keys -t prime-dev:background 'docker compose logs -f reth redis' C-m @@ -105,8 +105,8 @@ down: @pkill -f "target/debug/worker" 2>/dev/null || true @pkill -f "target/debug/orchestrator" 2>/dev/null || true @pkill -f "target/debug/validator" 2>/dev/null || true - @pkill -f "target/debug/discovery" 2>/dev/null || true - @pkill -9 -f "cargo run --bin discovery" 2>/dev/null || true + @pkill -f "target/debug/bootnode" 2>/dev/null || true + @pkill -9 -f "cargo run --bin bootnode" 2>/dev/null || true @pkill -9 -f "cargo watch" 2>/dev/null || true # Whitelist provider @@ -115,28 +115,40 @@ whitelist-provider: set -a; source ${ENV_FILE}; set +a; \ cargo run -p dev-utils --example whitelist_provider -- --provider-address $${PROVIDER_ADDRESS} --key $${PRIVATE_KEY_VALIDATOR} --rpc-url $${RPC_URL} -watch-discovery: - set -a; source .env; set +a; \ - cargo watch -w crates/discovery/src -x "run --bin discovery -- --rpc-url $${RPC_URL} --max-nodes-per-ip $${MAX_NODES_PER_IP:-2} $${LOCATION_SERVICE_URL:+--location-service-url $${LOCATION_SERVICE_URL}} $${LOCATION_SERVICE_API_KEY:+--location-service-api-key $${LOCATION_SERVICE_API_KEY}}" +# watch-discovery: +# set -a; source .env; set +a; \ +# cargo watch -w crates/discovery/src -x "run --bin discovery -- --rpc-url $${RPC_URL} --max-nodes-per-ip $${MAX_NODES_PER_IP:-2} $${LOCATION_SERVICE_URL:+--location-service-url $${LOCATION_SERVICE_URL}} $${LOCATION_SERVICE_API_KEY:+--location-service-api-key $${LOCATION_SERVICE_API_KEY}}" watch-worker: set -a; source ${ENV_FILE}; set +a; \ - cargo watch -w crates/worker/src -x "run --bin worker -- run --bootnodes $${ORCHESTRATOR_P2P_ADDRESS} --port 8091 --compute-pool-id $$WORKER_COMPUTE_POOL_ID --skip-system-checks $${LOKI_URL:+--loki-url $${LOKI_URL}} --log-level $${LOG_LEVEL:-info}" + cargo watch -w crates/worker/src -x "run --bin worker -- run --bootnodes $${BOOTNODE_P2P_ADDRESS} --port 8091 \ + --compute-pool-id $$WORKER_COMPUTE_POOL_ID --skip-system-checks $${LOKI_URL:+--loki-url $${LOKI_URL}} --log-level $${LOG_LEVEL:-info}" watch-worker-two: set -a; source ${ENV_FILE}; set +a; \ - cargo watch -w crates/worker/src -x "run --bin worker -- run --port 8092 --private-key-node $${PRIVATE_KEY_NODE_2} --private-key-provider $${PRIVATE_KEY_PROVIDER} --compute-pool-id $$WORKER_COMPUTE_POOL_ID --skip-system-checks $${LOKI_URL:+--loki-url $${LOKI_URL}} --log-level $${LOG_LEVEL:-info} --disable-state-storing --no-auto-recover" + cargo watch -w crates/worker/src -x "run --bin worker -- run --port 8092 --private-key-node $${PRIVATE_KEY_NODE_2} \ + --private-key-provider $${PRIVATE_KEY_PROVIDER} --compute-pool-id $$WORKER_COMPUTE_POOL_ID --skip-system-checks $${LOKI_URL:+--loki-url $${LOKI_URL}} \ + --log-level $${LOG_LEVEL:-info} --disable-state-storing --no-auto-recover" watch-check: cargo watch -w crates/worker/src -x "run --bin worker -- check" watch-validator: set -a; source ${ENV_FILE}; set +a; \ - cargo watch -w crates/validator/src -x "run --bin validator -- --bootnodes $${ORCHESTRATOR_P2P_ADDRESS} --validator-key $${PRIVATE_KEY_VALIDATOR} --rpc-url $${RPC_URL} --pool-id $${WORKER_COMPUTE_POOL_ID} $${BUCKET_NAME:+--bucket-name $${BUCKET_NAME}} -l $${LOG_LEVEL:-info} --toploc-grace-interval $${TOPLOC_GRACE_INTERVAL:-30} --incomplete-group-grace-period-minutes $${INCOMPLETE_GROUP_GRACE_PERIOD_MINUTES:-1} --use-grouping" + cargo watch -w crates/validator/src -x "run --bin validator -- --bootnodes $${BOOTNODE_P2P_ADDRESS} --validator-key $${PRIVATE_KEY_VALIDATOR} \ + --rpc-url $${RPC_URL} --pool-id $${WORKER_COMPUTE_POOL_ID} $${BUCKET_NAME:+--bucket-name $${BUCKET_NAME}} -l $${LOG_LEVEL:-info} \ + --toploc-grace-interval $${TOPLOC_GRACE_INTERVAL:-30} --incomplete-group-grace-period-minutes $${INCOMPLETE_GROUP_GRACE_PERIOD_MINUTES:-1} --use-grouping" watch-orchestrator: set -a; source ${ENV_FILE}; set +a; \ - cargo watch -w crates/orchestrator/src -x "run --bin orchestrator -- -r $$RPC_URL -k $$POOL_OWNER_PRIVATE_KEY -d 0 -p 8090 -i 10 -u http://localhost:8090 --libp2p-private-key $${ORCHESTRATOR_LIBP2P_PRIVATE_KEY} --discovery-urls $${DISCOVERY_URLS:-$${DISCOVERY_URL:-http://localhost:8089}} --compute-pool-id $$WORKER_COMPUTE_POOL_ID $${BUCKET_NAME:+--bucket-name $$BUCKET_NAME} -l $${LOG_LEVEL:-info} --hourly-s3-upload-limit $${HOURLY_S3_LIMIT:-3} --max-healthy-nodes-with-same-endpoint $${MAX_HEALTHY_NODES_WITH_SAME_ENDPOINT:-2}" + cargo watch -w crates/orchestrator/src -x "run --bin orchestrator -- --bootnodes $${BOOTNODE_P2P_ADDRESS} -r $$RPC_URL -k $$POOL_OWNER_PRIVATE_KEY \ + -d 0 -p 8090 -i 10 -u http://localhost:8090 \ + --compute-pool-id $$WORKER_COMPUTE_POOL_ID $${BUCKET_NAME:+--bucket-name $$BUCKET_NAME} -l $${LOG_LEVEL:-info} \ + --hourly-s3-upload-limit $${HOURLY_S3_LIMIT:-3} --max-healthy-nodes-with-same-endpoint $${MAX_HEALTHY_NODES_WITH_SAME_ENDPOINT:-2}" + +run-bootnode: + set -a; source ${ENV_FILE}; set +a; \ + cargo run --bin bootnode -- --libp2p-private-key $${BOOTNODE_LIBP2P_PRIVATE_KEY} build-worker: cargo build --release --bin worker diff --git a/crates/bootnode/Cargo.toml b/crates/bootnode/Cargo.toml new file mode 100644 index 00000000..a7124269 --- /dev/null +++ b/crates/bootnode/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "bootnode" +version.workspace = true +edition.workspace = true + +[dependencies] +p2p = {workspace = true} + +anyhow = {workspace = true} +clap = { workspace = true } +hex = { workspace = true } +tokio = { workspace = true } +tokio-util = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } + +[lints] +workspace = true diff --git a/crates/bootnode/src/main.rs b/crates/bootnode/src/main.rs new file mode 100644 index 00000000..12ffd22d --- /dev/null +++ b/crates/bootnode/src/main.rs @@ -0,0 +1,97 @@ +use clap::Parser; +use tokio::signal::unix::{signal, SignalKind}; +use tracing::Level; +use tracing_subscriber::{fmt, layer::SubscriberExt as _, util::SubscriberInitExt}; + +#[derive(Parser)] +struct Config { + /// Hex-encoded libp2p private key + #[clap(long)] + libp2p_private_key: String, + + /// Libp2p port + #[clap(long, default_value = "4005")] + libp2p_port: u16, + + /// Log level + #[arg(short = 'l', long, default_value = "info")] + log_level: String, +} + +#[tokio::main] +async fn main() { + let config = Config::parse(); + + let log_level = match config.log_level.as_str() { + "error" => Level::ERROR, + "warn" => Level::WARN, + "info" => Level::INFO, + "debug" => Level::DEBUG, + _ => { + eprintln!("invalid log level: {}", config.log_level); + std::process::exit(1); + } + }; + + let env_filter = + tracing_subscriber::filter::EnvFilter::from_default_env().add_directive(log_level.into()); + + tracing_subscriber::registry() + .with(fmt::layer()) + .with(env_filter) + .init(); + + let cancellation_token = tokio_util::sync::CancellationToken::new(); + + let mut bytes = hex::decode(config.libp2p_private_key.trim()).unwrap_or_else(|_| { + eprintln!("invalid hex-encoded libp2p private key"); + std::process::exit(1); + }); + let keypair = p2p::Keypair::ed25519_from_bytes(&mut bytes).unwrap_or_else(|e| { + eprintln!("failed to create ed25519 keypair from provided private key: {e}"); + std::process::exit(1); + }); + + let node = match p2p::NodeBuilder::new() + .with_keypair(keypair) + .with_port(config.libp2p_port) + .with_cancellation_token(cancellation_token.clone()) + .try_build() + { + Ok(res) => res.0, + Err(e) => { + eprintln!("failed to create p2p node: {e}"); + std::process::exit(1); + } + }; + + tokio::spawn({ + let cancellation_token = cancellation_token.clone(); + async move { + let mut sigint = signal(SignalKind::interrupt()).expect( + "setting a SIGINT listener should always work on unix; is this running on unix?", + ); + let mut sigterm = signal(SignalKind::terminate()).expect( + "setting a SIGTERM listener should always work on unix; is this running on unix?", + ); + loop { + tokio::select! { + _ = sigint.recv() => { + tracing::info!("received SIGINT"); + cancellation_token.cancel(); + } + _ = sigterm.recv() => { + tracing::info!("received SIGTERM"); + cancellation_token.cancel(); + } + } + } + } + }); + + tokio::task::spawn(node.run()); + + tracing::info!("Bootnode started with libp2p port {}", config.libp2p_port); + + cancellation_token.cancelled().await; +} diff --git a/crates/orchestrator/src/main.rs b/crates/orchestrator/src/main.rs index 686392f7..3e78ce4e 100644 --- a/crates/orchestrator/src/main.rs +++ b/crates/orchestrator/src/main.rs @@ -13,10 +13,10 @@ use tokio_util::sync::CancellationToken; use url::Url; use orchestrator::{ - start_server, DiscoveryMonitor, LoopHeartbeats, MetricsContext, MetricsSyncService, - MetricsWebhookSender, NodeGroupConfiguration, NodeGroupsPlugin, NodeInviter, NodeStatusUpdater, - P2PService, RedisStore, Scheduler, SchedulerPlugin, ServerMode, StatusUpdatePlugin, - StoreContext, WebhookConfig, WebhookPlugin, + start_server, LoopHeartbeats, MetricsContext, MetricsSyncService, MetricsWebhookSender, + NodeGroupConfiguration, NodeGroupsPlugin, NodeInviter, NodeStatusUpdater, P2PService, + RedisStore, Scheduler, SchedulerPlugin, ServerMode, StatusUpdatePlugin, StoreContext, + WebhookConfig, WebhookPlugin, }; #[derive(Parser)] @@ -61,10 +61,6 @@ struct Args { #[arg(short = 's', long, default_value = "redis://localhost:6380")] redis_store_url: String, - /// Discovery URLs (comma-separated) - #[arg(long, default_value = "http://localhost:8089", value_delimiter = ',')] - discovery_urls: Vec, - /// Admin api key #[arg(short = 'a', long, default_value = "admin")] admin_api_key: String, @@ -97,10 +93,10 @@ struct Args { #[arg(long, default_value = "4004")] libp2p_port: u16, - /// Hex-encoded libp2p private key. - /// A new key is generated if this is not provided. - #[arg(long)] - libp2p_private_key: Option, + /// Comma-separated list of libp2p bootnode multiaddresses + /// Example: `/ip4/104.131.131.82/tcp/4001/p2p/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ,/ip4/104.131.131.82/udp/4001/quic-v1/p2p/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ` + #[arg(long, default_value = "")] + bootnodes: String, } #[tokio::main] @@ -154,11 +150,28 @@ async fn main() -> Result<()> { let store_context = Arc::new(StoreContext::new(store.clone())); let keypair = p2p::Keypair::generate_ed25519(); + let bootnodes: Vec = args + .bootnodes + .split(',') + .filter_map(|addr| match addr.to_string().try_into() { + Ok(multiaddr) => Some(multiaddr), + Err(e) => { + error!("Invalid bootnode address '{addr}': {e}"); + None + } + }) + .collect(); + if bootnodes.is_empty() { + error!("No valid bootnodes provided. Please provide at least one valid bootnode address."); + std::process::exit(1); + } + let cancellation_token = CancellationToken::new(); - let (p2p_service, invite_tx, get_task_logs_tx, restart_task_tx) = { + let (p2p_service, invite_tx, get_task_logs_tx, restart_task_tx, kademlia_action_tx) = { match P2PService::new( keypair, args.libp2p_port, + bootnodes, cancellation_token.clone(), wallet.clone(), ) { @@ -293,37 +306,37 @@ async fn main() -> Result<()> { }); } - // Create status_update_plugins for discovery monitor - let mut discovery_status_update_plugins: Vec = vec![]; - - // Add webhook plugins to discovery status update plugins - for plugin in &webhook_plugins { - discovery_status_update_plugins.push(plugin.into()); - } - - // Add node groups plugin if available - if let Some(group_plugin) = node_groups_plugin.clone() { - discovery_status_update_plugins.push(group_plugin.into()); - } - - let discovery_store_context = store_context.clone(); - let discovery_heartbeats = heartbeats.clone(); - tasks.spawn({ - let wallet = wallet.clone(); - async move { - let monitor = DiscoveryMonitor::new( - wallet, - compute_pool_id, - args.discovery_refresh_interval, - args.discovery_urls, - discovery_store_context.clone(), - discovery_heartbeats.clone(), - args.max_healthy_nodes_with_same_endpoint, - discovery_status_update_plugins, - ); - monitor.run().await - } - }); + // // Create status_update_plugins for discovery monitor + // let mut discovery_status_update_plugins: Vec = vec![]; + + // // Add webhook plugins to discovery status update plugins + // for plugin in &webhook_plugins { + // discovery_status_update_plugins.push(plugin.into()); + // } + + // // Add node groups plugin if available + // if let Some(group_plugin) = node_groups_plugin.clone() { + // discovery_status_update_plugins.push(group_plugin.into()); + // } + + // let discovery_store_context = store_context.clone(); + // let discovery_heartbeats = heartbeats.clone(); + // tasks.spawn({ + // let wallet = wallet.clone(); + // async move { + // let monitor = DiscoveryMonitor::new( + // wallet, + // compute_pool_id, + // args.discovery_refresh_interval, + // args.discovery_urls, + // discovery_store_context.clone(), + // discovery_heartbeats.clone(), + // args.max_healthy_nodes_with_same_endpoint, + // discovery_status_update_plugins, + // ); + // monitor.run().await + // } + // }); let inviter_store_context = store_context.clone(); let inviter_heartbeats = heartbeats.clone(); diff --git a/crates/orchestrator/src/p2p/mod.rs b/crates/orchestrator/src/p2p/mod.rs index 836eaca9..7bc4f85c 100644 --- a/crates/orchestrator/src/p2p/mod.rs +++ b/crates/orchestrator/src/p2p/mod.rs @@ -21,6 +21,7 @@ impl Service { pub fn new( keypair: Keypair, port: u16, + bootnodes: Vec, cancellation_token: CancellationToken, wallet: Wallet, ) -> Result<( @@ -28,6 +29,7 @@ impl Service { Sender, Sender, Sender, + Sender, )> { let (invite_tx, invite_rx) = tokio::sync::mpsc::channel(100); let (get_task_logs_tx, get_task_logs_rx) = tokio::sync::mpsc::channel(100); @@ -35,7 +37,7 @@ impl Service { let (inner, outgoing_message_tx, kademlia_action_tx) = P2PService::new( keypair, port, - vec![], + bootnodes, cancellation_token.clone(), wallet, Protocols::new() @@ -56,6 +58,7 @@ impl Service { invite_tx, get_task_logs_tx, restart_task_tx, + kademlia_action_tx, )) } diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index 14deaceb..b3cdeb86 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -124,10 +124,13 @@ impl Behaviour { .context("failed to create mDNS behaviour")?; let mut kad_config = kad::Config::new(kad::PROTOCOL_NAME); // TODO: by default this is 20, however on a local test network we won't have 20 nodes. + // make this configurable? kad_config .set_replication_factor(1usize.try_into().expect("can convert 1 to NonZeroUsize")); - kad_config.set_provider_publication_interval(Some(Duration::from_secs(10))); - let kademlia = kad::Behaviour::with_config(peer_id, MemoryStore::new(peer_id), kad_config); + kad_config.set_provider_publication_interval(Some(Duration::from_secs(30))); + let mut kademlia = + kad::Behaviour::with_config(peer_id, MemoryStore::new(peer_id), kad_config); + kademlia.set_mode(Some(kad::Mode::Server)); let identify = identify::Behaviour::new( identify::Config::new(PRIME_STREAM_PROTOCOL.to_string(), keypair.public()) @@ -168,13 +171,23 @@ impl BehaviourEvent { ) -> Vec<(PeerId, Multiaddr)> { match self { BehaviourEvent::Autonat(_event) => {} - BehaviourEvent::Identify(_event) => {} + BehaviourEvent::Identify(event) => match event { + identify::Event::Received { peer_id, info, .. } => { + let addrs = info + .listen_addrs + .into_iter() + .map(|addr| (peer_id, addr)) + .collect::>(); + return addrs; + } + _ => {} + }, BehaviourEvent::Kademlia(event) => { match event { kad::Event::RoutingUpdated { peer, addresses, .. } => { - log::info!("kademlia routing updated for peer {peer:?} with addresses {addresses:?}"); + info!("kademlia routing updated for peer {peer:?} with addresses {addresses:?}"); } // TODO: also handle InboundRequest::AddProvider and InboundRequest::PutRecord, // as these are new workers joining the network @@ -184,7 +197,7 @@ impl BehaviourEvent { stats: _, step, } => { - info!("kademlia query {id:?} progressed with step {step:?} and result {result:?}"); + debug!("kademlia query {id:?} progressed with step {step:?} and result {result:?}"); let mut ongoing_queries = ongoing_kademlia_queries.lock().await; if let Some(query) = ongoing_queries.get_mut(&id) { diff --git a/crates/p2p/src/discovery.rs b/crates/p2p/src/discovery.rs index 10d33b29..790b0052 100644 --- a/crates/p2p/src/discovery.rs +++ b/crates/p2p/src/discovery.rs @@ -1,12 +1,17 @@ use anyhow::{Context as _, Result}; use libp2p::kad::QueryResult; use libp2p::kad::{self, store::RecordStore, QueryId, Quorum}; +use libp2p::PeerId; use std::collections::HashMap; use std::sync::Arc; use tokio::sync::Mutex; pub const WORKER_DHT_KEY: &str = "prime-worker/1.0.0"; +pub fn worker_dht_key_with_peer_id(peer_id: &PeerId) -> String { + format!("{WORKER_DHT_KEY}/{peer_id}") +} + pub struct KademliaActionWithChannel { kad_action: KademliaAction, result_tx: tokio::sync::mpsc::Sender>, diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index db63368d..48a5e8b0 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -8,11 +8,11 @@ use libp2p::yamux; use libp2p::Swarm; use libp2p::SwarmBuilder; use libp2p::{identity, Transport}; -use log::{debug, info, warn}; use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; use tokio::sync::Mutex; +use tracing::{debug, info, warn}; mod behaviour; mod discovery; @@ -112,7 +112,7 @@ impl Node { .kademlia() .add_address(&peer_id, multiaddr.clone()); - log::info!("dialing bootnode {peer_id} at {multiaddr}"); + log::debug!("dialing bootnode {peer_id} at {multiaddr}"); match swarm.dial(multiaddr.clone()) { Ok(()) => {} @@ -139,7 +139,7 @@ impl Node { } }; - let connected_peers_check_interval = Duration::from_secs(5); + let connected_peers_check_interval = Duration::from_secs(60); loop { let sleep = tokio::time::sleep(connected_peers_check_interval); @@ -156,7 +156,7 @@ impl Node { Some(res) = bootstrap_result_rx.recv() => { match res { Ok(libp2p::kad::QueryResult::Bootstrap(_)) => { - log::info!("kademlia bootstrap progressed successfully"); + debug!("kademlia bootstrap progressed successfully"); } Ok(res) => { warn!("kademlia bootstrap query returned unexpected result: {res:?}"); @@ -203,7 +203,7 @@ impl Node { peer_id, .. } => { - log::info!("connection established with peer {peer_id}"); + debug!("connection established with peer {peer_id}"); } SwarmEvent::ConnectionClosed { peer_id, @@ -215,7 +215,6 @@ impl Node { SwarmEvent::Behaviour(event) => { let discovered_peers = event.handle(incoming_message_tx.clone(), ongoing_kademlia_queries.clone()).await; for (peer_id, addr) in discovered_peers { - log::info!("discovered peer {peer_id} at {addr}"); swarm.add_peer_address(peer_id, addr.clone()); swarm.behaviour_mut().kademlia().add_address(&peer_id, addr.clone()); } @@ -421,8 +420,13 @@ fn create_transport( #[cfg(test)] mod test { + use libp2p::kad; + use libp2p::kad::GetProvidersOk; + use std::collections::HashSet; + use super::NodeBuilder; use crate::message; + use crate::KademliaAction; #[tokio::test] async fn two_nodes_can_connect_and_do_request_response() { @@ -430,7 +434,6 @@ mod test { NodeBuilder::new().with_get_task_logs().try_build().unwrap(); let node1_peer_id = node1.peer_id(); - println!("{:?}", node1.multiaddrs()); let (node2, mut incoming_message_rx2, outgoing_message_tx2, _) = NodeBuilder::new() .with_get_task_logs() .with_bootnodes(node1.multiaddrs()) @@ -482,4 +485,69 @@ mod test { }; assert_eq!(logs, "logs"); } + + #[tokio::test] + async fn kademlia_get_providers_ok() { + let (node1, _, _, _) = NodeBuilder::new().with_get_task_logs().try_build().unwrap(); + + let (node2, _, _, kademlia_action_tx_2) = NodeBuilder::new() + .with_get_task_logs() + .with_bootnodes(node1.multiaddrs()) + .try_build() + .unwrap(); + + let (node3, _, _, kademlia_action_tx_3) = NodeBuilder::new() + .with_get_task_logs() + .with_bootnodes(node1.multiaddrs()) + .try_build() + .unwrap(); + let node3_peer_id = node3.peer_id(); + + tokio::spawn(async move { node1.run().await }); + tokio::spawn(async move { node2.run().await }); + tokio::spawn(async move { node3.run().await }); + + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + + let test_key = b"test_key".to_vec(); + let action = KademliaAction::StartProviding(test_key.clone()); + let (action, mut rx) = action.into_kademlia_action_with_channel(); + kademlia_action_tx_3.send(action).await.unwrap(); + + let result = rx.recv().await.unwrap().unwrap(); + let kad::QueryResult::StartProviding(res) = result else { + panic!("expected a QueryResult::StartProviding response"); + }; + res.unwrap(); + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + + let action = KademliaAction::GetProviders(test_key.clone()); + let (action, mut rx) = action.into_kademlia_action_with_channel(); + kademlia_action_tx_2.send(action).await.unwrap(); + + let mut providers_set: HashSet = HashSet::new(); + while let Some(res) = rx.recv().await { + match res { + Ok(kad::QueryResult::GetProviders(res)) => { + let ok = res.unwrap(); + match ok { + GetProvidersOk::FoundProviders { key, providers } => { + assert_eq!(key, test_key.clone().into()); + providers_set.insert(providers.iter().map(|p| p.to_string()).collect()); + } + _ => {} + } + } + Ok(_) => panic!("expected a QueryResult::GetProviders response"), + Err(e) => panic!("unexpected error: {e}"), + } + } + assert!(!providers_set.is_empty(), "expected at least one provider"); + assert!( + providers_set + .iter() + .any(|s| s.contains(&node3_peer_id.to_string())), + "expected node3 to be a provider" + ); + } } diff --git a/crates/validator/src/main.rs b/crates/validator/src/main.rs index 1c1c9b9c..e36407e0 100644 --- a/crates/validator/src/main.rs +++ b/crates/validator/src/main.rs @@ -106,11 +106,6 @@ struct Args { /// Example: `/ip4/104.131.131.82/tcp/4001/p2p/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ,/ip4/104.131.131.82/udp/4001/quic-v1/p2p/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ` #[arg(long, default_value = "")] bootnodes: String, - - /// Path to the libp2p private key file which contains the hex-encoded private key - /// A new key is generated if this is not provided - #[arg(long)] - libp2p_private_key_file: Option, } #[tokio::main] diff --git a/crates/validator/src/validator.rs b/crates/validator/src/validator.rs index 7dd8b48f..fd06aa08 100644 --- a/crates/validator/src/validator.rs +++ b/crates/validator/src/validator.rs @@ -289,11 +289,11 @@ async fn get_worker_nodes_from_dht( } } - info!("got {} worker nodes from DHT", workers.len()); + log::debug!("got {} worker nodes from DHT", workers.len()); let mut nodes = Vec::new(); for peer_id in workers { - let record_key = format!("{}:{}", p2p::WORKER_DHT_KEY, peer_id); + let record_key = p2p::worker_dht_key_with_peer_id(&peer_id); let (kad_action, mut result_rx) = p2p::KademliaAction::GetRecord(record_key.as_bytes().to_vec()) .into_kademlia_action_with_channel(); @@ -332,7 +332,7 @@ async fn get_worker_nodes_from_dht( } } Err(e) => { - warn!("Kademlia action failed: {e}"); + warn!("kademlia action failed: {e}"); } } } diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 9b036465..9c34ef1d 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -788,9 +788,9 @@ pub async fn execute_command( // TODO: sleep so that dht is bootstrapped before publishing; // should update p2p service to expose this. - tokio::time::sleep(Duration::from_secs(30)).await; + tokio::time::sleep(Duration::from_secs(1)).await; - let record_key = format!("{}:{}", p2p::WORKER_DHT_KEY, peer_id); + let record_key = p2p::worker_dht_key_with_peer_id(&peer_id); let (kad_action, mut result_rx) = KademliaAction::PutRecord { key: record_key.as_bytes().to_vec(), value: serde_json::to_vec(&node_config).unwrap(), diff --git a/crates/worker/src/lib.rs b/crates/worker/src/lib.rs index 8fe8e1ea..91d08e91 100644 --- a/crates/worker/src/lib.rs +++ b/crates/worker/src/lib.rs @@ -5,7 +5,7 @@ mod docker; mod metrics; mod operations; mod p2p; -mod services; +// mod services; mod state; mod utils; diff --git a/crates/worker/src/utils/logging.rs b/crates/worker/src/utils/logging.rs index 312d565c..ed50024c 100644 --- a/crates/worker/src/utils/logging.rs +++ b/crates/worker/src/utils/logging.rs @@ -75,8 +75,6 @@ pub fn setup_logging(cli: Option<&Cli>) -> Result<(), Box Date: Wed, 16 Jul 2025 15:11:39 -0400 Subject: [PATCH 06/14] update orchestrator to use dht for discovery --- .env.example | 4 +- crates/orchestrator/src/api/routes/groups.rs | 24 +- crates/orchestrator/src/api/routes/nodes.rs | 29 +- crates/orchestrator/src/api/routes/storage.rs | 4 +- crates/orchestrator/src/discovery/monitor.rs | 827 +++++++++--------- crates/orchestrator/src/models/node.rs | 25 +- crates/orchestrator/src/node/invite.rs | 7 +- .../src/plugins/node_groups/mod.rs | 1 - .../src/plugins/node_groups/scheduler_impl.rs | 2 +- .../src/plugins/node_groups/tests.rs | 2 +- .../src/store/domains/node_store.rs | 14 +- crates/p2p/src/behaviour.rs | 4 +- crates/shared/src/models/node.rs | 16 +- crates/shared/src/web3/mod.rs | 2 + .../src/validators/hardware_challenge.rs | 8 +- crates/worker/src/cli/command.rs | 6 +- 16 files changed, 469 insertions(+), 506 deletions(-) diff --git a/.env.example b/.env.example index 0b7c4e08..0a604ff7 100644 --- a/.env.example +++ b/.env.example @@ -14,8 +14,8 @@ WORK_VALIDATION_CONTRACT=0x0B306BF915C4d645ff596e518fAf3F9669b97016 # Discovery # --------- DISCOVERY_URLS=http://localhost:8089 -ORCHESTRATOR_P2P_ADDRESS=/ip4/127.0.0.1/tcp/4004/p2p/12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL -ORCHESTRATOR_LIBP2P_PRIVATE_KEY="d0884c9823a0a2c846dbf5e71853bc5f80b2ec5d2de46532cdbe8ab46f020836845c655bb6fb3fd7f45d09a9ab687656606e8e2a841bf0f9cb376c618e6a3887" +BOOTNODE_P2P_ADDRESS=/ip4/127.0.0.1/tcp/4005/p2p/12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL +BOOTNODE_LIBP2P_PRIVATE_KEY="d0884c9823a0a2c846dbf5e71853bc5f80b2ec5d2de46532cdbe8ab46f020836" # Accounts (Anvil Test Accounts - DO NOT USE IN PRODUCTION) # --------------------------------------------------------- diff --git a/crates/orchestrator/src/api/routes/groups.rs b/crates/orchestrator/src/api/routes/groups.rs index 414f524a..7af66d60 100644 --- a/crates/orchestrator/src/api/routes/groups.rs +++ b/crates/orchestrator/src/api/routes/groups.rs @@ -237,25 +237,21 @@ async fn fetch_node_logs_p2p( match node { Some(node) => { // Check if node has P2P information - let (worker_p2p_id, worker_p2p_addresses) = - match (&node.worker_p2p_id, &node.worker_p2p_addresses) { - (Some(p2p_id), Some(p2p_addrs)) if !p2p_addrs.is_empty() => (p2p_id, p2p_addrs), - _ => { - error!("Node {node_address} does not have P2P information"); - return json!({ - "success": false, - "error": "Node does not have P2P information", - "status": node.status.to_string() - }); - } - }; + let Some(p2p_addresses) = node.worker_p2p_addresses else { + error!("Node {node_address} does not have P2P addresses"); + return json!({ + "success": false, + "error": "Node does not have P2P addresses", + "status": node.status.to_string() + }); + }; // Send P2P request for task logs let (response_tx, response_rx) = tokio::sync::oneshot::channel(); let get_task_logs_request = crate::p2p::GetTaskLogsRequest { worker_wallet_address: node_address, - worker_p2p_id: worker_p2p_id.clone(), - worker_addresses: worker_p2p_addresses.clone(), + worker_p2p_id: node.p2p_id.clone(), + worker_addresses: p2p_addresses, response_tx, }; if let Err(e) = app_state.get_task_logs_tx.send(get_task_logs_request).await { diff --git a/crates/orchestrator/src/api/routes/nodes.rs b/crates/orchestrator/src/api/routes/nodes.rs index 9debddde..4b7a25fb 100644 --- a/crates/orchestrator/src/api/routes/nodes.rs +++ b/crates/orchestrator/src/api/routes/nodes.rs @@ -148,27 +148,22 @@ async fn restart_node_task(node_id: web::Path, app_state: Data } }; - if node.worker_p2p_id.is_none() || node.worker_p2p_addresses.is_none() { + if node.worker_p2p_addresses.is_none() { return HttpResponse::BadRequest().json(json!({ "success": false, - "error": "Node does not have p2p information" + "error": "Node does not have p2p addresses" })); } - let p2p_id = node - .worker_p2p_id - .as_ref() - .expect("worker_p2p_id should be present"); let p2p_addresses = node .worker_p2p_addresses - .as_ref() .expect("worker_p2p_addresses should be present"); let (response_tx, response_rx) = tokio::sync::oneshot::channel(); let restart_task_request = crate::p2p::RestartTaskRequest { worker_wallet_address: node.address, - worker_p2p_id: p2p_id.clone(), - worker_addresses: p2p_addresses.clone(), + worker_p2p_id: node.p2p_id, + worker_addresses: p2p_addresses, response_tx, }; if let Err(e) = app_state.restart_task_tx.send(restart_task_request).await { @@ -231,20 +226,14 @@ async fn get_node_logs(node_id: web::Path, app_state: Data) -> } }; - if node.worker_p2p_id.is_none() || node.worker_p2p_addresses.is_none() { + if node.worker_p2p_addresses.is_none() { return HttpResponse::BadRequest().json(json!({ "success": false, - "error": "Node does not have p2p information" + "error": "Node does not have p2p addresses" })); } - let Some(p2p_id) = node.worker_p2p_id.as_ref() else { - return HttpResponse::BadRequest().json(json!({ - "success": false, - "error": "Node does not have worker p2p id" - })); - }; - let Some(p2p_addresses) = node.worker_p2p_addresses.as_ref() else { + let Some(p2p_addresses) = node.worker_p2p_addresses else { return HttpResponse::BadRequest().json(json!({ "success": false, "error": "Node does not have worker p2p addresses" @@ -254,8 +243,8 @@ async fn get_node_logs(node_id: web::Path, app_state: Data) -> let (response_tx, response_rx) = tokio::sync::oneshot::channel(); let get_task_logs_request = crate::p2p::GetTaskLogsRequest { worker_wallet_address: node.address, - worker_p2p_id: p2p_id.clone(), - worker_addresses: p2p_addresses.clone(), + worker_p2p_id: node.p2p_id, + worker_addresses: p2p_addresses, response_tx, }; if let Err(e) = app_state.get_task_logs_tx.send(get_task_logs_request).await { diff --git a/crates/orchestrator/src/api/routes/storage.rs b/crates/orchestrator/src/api/routes/storage.rs index 57a30748..aa8a3cc5 100644 --- a/crates/orchestrator/src/api/routes/storage.rs +++ b/crates/orchestrator/src/api/routes/storage.rs @@ -538,7 +538,7 @@ mod tests { address: Address::ZERO, ip_address: "127.0.0.1".to_string(), port: 8080, - p2p_id: Some("test_p2p_id".to_string()), + p2p_id: "test_p2p_id".to_string(), status: NodeStatus::Healthy, ..Default::default() }; @@ -722,7 +722,7 @@ mod tests { address: Address::ZERO, ip_address: "127.0.0.1".to_string(), port: 8080, - p2p_id: Some("test_p2p_id".to_string()), + p2p_id: "test_p2p_id".to_string(), status: NodeStatus::Healthy, ..Default::default() }; diff --git a/crates/orchestrator/src/discovery/monitor.rs b/crates/orchestrator/src/discovery/monitor.rs index d1ea3133..1d8645cd 100644 --- a/crates/orchestrator/src/discovery/monitor.rs +++ b/crates/orchestrator/src/discovery/monitor.rs @@ -5,55 +5,85 @@ use crate::store::core::StoreContext; use crate::utils::loop_heartbeats::LoopHeartbeats; use alloy::primitives::Address; use alloy::primitives::U256; -use anyhow::Error; -use anyhow::Result; +use anyhow::{bail, Context as _, Error, Result}; use chrono::Utc; -use log::{error, info}; -use shared::models::api::ApiResponse; -use shared::models::node::DiscoveryNode; -use shared::security::request_signer::sign_request_with_nonce; -use shared::web3::wallet::Wallet; +use log::{error, info, warn}; +use shared::models::node::NodeWithMetadata; use std::sync::Arc; use std::time::Duration; +use tokio::sync::mpsc::Sender; use tokio::time::interval; -pub struct DiscoveryMonitor { - coordinator_wallet: Wallet, +struct NodeFetcher { compute_pool_id: u32, - interval_s: u64, - discovery_urls: Vec, - store_context: Arc, - heartbeats: Arc, - http_client: reqwest::Client, - max_healthy_nodes_with_same_endpoint: u32, - status_change_handlers: Vec, + kademlia_action_tx: Sender, + provider: alloy::providers::RootProvider, + contracts: shared::web3::Contracts, } -impl DiscoveryMonitor { - #[allow(clippy::too_many_arguments)] - pub fn new( - coordinator_wallet: Wallet, - compute_pool_id: u32, - interval_s: u64, - discovery_urls: Vec, - store_context: Arc, - heartbeats: Arc, - max_healthy_nodes_with_same_endpoint: u32, - status_change_handlers: Vec, - ) -> Self { - Self { - coordinator_wallet, - compute_pool_id, - interval_s, - discovery_urls, - store_context, - heartbeats, - http_client: reqwest::Client::new(), - max_healthy_nodes_with_same_endpoint, - status_change_handlers, +impl NodeFetcher { + async fn get_nodes(&self) -> Result> { + use futures::stream::FuturesUnordered; + use futures::StreamExt as _; + + // TODO: this actually needs to fetch for compute pool ID only (`self.compute_pool_id`) + let nodes = get_worker_nodes_from_dht(self.kademlia_action_tx.clone()) + .await + .context("failed to get worker nodes from DHT")?; + if nodes.is_empty() { + return Ok(vec![]); + } + + // remove duplicates based on node ID + let mut unique_nodes = Vec::new(); + let mut seen_ids = std::collections::HashSet::new(); + for node in nodes { + if seen_ids.insert(node.id.clone()) && node.compute_pool_id == self.compute_pool_id { + unique_nodes.push(node); + } + } + + info!( + "total unique nodes after deduplication: {}", + unique_nodes.len() + ); + + let futures = FuturesUnordered::new(); + for node in unique_nodes { + futures.push(NodeWithMetadata::new_from_contracts( + node, + &self.provider, + &self.contracts, + )); } + let nodes = futures + .collect::>() + .await + .into_iter() + .filter_map(Result::ok) + .collect::>(); + if nodes.is_empty() { + return Ok(vec![]); + } + + // for node in &nodes { + // if let Err(e) = self.perform_node_updates(node).await { + // error!( + // "failed to perform update for node with id {}: {e}", + // node.node().id + // ); + // } + // } + + Ok(nodes) } +} +struct Updater { + store_context: Arc, + status_change_handlers: Vec, +} +impl Updater { async fn handle_status_change(&self, node: &OrchestratorNode, old_status: NodeStatus) { for handler in &self.status_change_handlers { if let Err(e) = handler.handle_status_change(node, &old_status).await { @@ -70,7 +100,7 @@ impl DiscoveryMonitor { // Get the current node to know the old status let old_status = match self.store_context.node_store.get_node(node_address).await? { Some(node) => node.status, - None => return Err(anyhow::anyhow!("Node not found: {}", node_address)), + None => bail!("node not found: {}", node_address), }; // Update the status in the store @@ -87,215 +117,76 @@ impl DiscoveryMonitor { Ok(()) } - pub async fn run(&self) -> Result<(), Error> { - let mut interval = interval(Duration::from_secs(self.interval_s)); - - loop { - interval.tick().await; - match self.get_nodes().await { - Ok(nodes) => { - info!( - "Successfully synced {} nodes from discovery service", - nodes.len() - ); - } - Err(e) => { - error!("Error syncing nodes from discovery service: {e}"); - } - } - self.heartbeats.update_monitor(); - } - } - async fn fetch_nodes_from_single_discovery( - &self, - discovery_url: &str, - ) -> Result, Error> { - let discovery_route = format!("/api/pool/{}", self.compute_pool_id); - let address = self.coordinator_wallet.address().to_string(); - - let signature = - match sign_request_with_nonce(&discovery_route, &self.coordinator_wallet, None).await { - Ok(sig) => sig, - Err(e) => { - error!("Failed to sign discovery request: {e}"); - return Ok(Vec::new()); - } - }; - - let mut headers = reqwest::header::HeaderMap::new(); - headers.insert( - "x-address", - reqwest::header::HeaderValue::from_str(&address)?, - ); - headers.insert( - "x-signature", - reqwest::header::HeaderValue::from_str(&signature.signature)?, - ); - - let response = match self - .http_client - .get(format!("{discovery_url}{discovery_route}")) - .query(&[("nonce", signature.nonce)]) - .headers(headers) - .send() - .await - { - Ok(resp) => resp, - Err(e) => { - error!("Failed to fetch nodes from discovery service {discovery_url}: {e}"); - return Ok(Vec::new()); - } - }; - - let response_text = match response.text().await { - Ok(text) => text, - Err(e) => { - error!("Failed to read discovery response from {discovery_url}: {e}"); - return Ok(Vec::new()); - } - }; - - let parsed_response: ApiResponse> = - match serde_json::from_str(&response_text) { - Ok(resp) => resp, - Err(e) => { - error!("Failed to parse discovery response from {discovery_url}: {e}"); - return Ok(Vec::new()); - } - }; - - let nodes = parsed_response.data; - let nodes = nodes - .into_iter() - .filter(|node| node.is_validated) - .collect::>(); - - Ok(nodes) - } - - pub async fn fetch_nodes_from_discovery(&self) -> Result, Error> { - let mut all_nodes = Vec::new(); - let mut any_success = false; - - for discovery_url in &self.discovery_urls { - match self.fetch_nodes_from_single_discovery(discovery_url).await { - Ok(nodes) => { - info!( - "Successfully fetched {} nodes from {}", - nodes.len(), - discovery_url - ); - all_nodes.extend(nodes); - any_success = true; - } - Err(e) => { - error!("Failed to fetch nodes from {discovery_url}: {e}"); - } - } - } - - if !any_success { - error!("Failed to fetch nodes from all discovery services"); - return Ok(Vec::new()); - } - - // Remove duplicates based on node ID - let mut unique_nodes = Vec::new(); - let mut seen_ids = std::collections::HashSet::new(); - for node in all_nodes { - if seen_ids.insert(node.node.id.clone()) { - unique_nodes.push(node); - } - } - - info!( - "Total unique nodes after deduplication: {}", - unique_nodes.len() - ); - Ok(unique_nodes) - } - - async fn count_healthy_nodes_with_same_endpoint( + async fn count_healthy_nodes_with_same_peer_id( &self, node_address: Address, - ip_address: &str, - port: u16, - ) -> Result { + peer_id: &p2p::PeerId, + ) -> Result { let nodes = self.store_context.node_store.get_nodes().await?; Ok(nodes .iter() .filter(|other_node| { other_node.address != node_address - && other_node.ip_address == ip_address - && other_node.port == port + && other_node.p2p_id == peer_id.to_string() && other_node.status == NodeStatus::Healthy }) .count() as u32) } - async fn sync_single_node_with_discovery( - &self, - discovery_node: &DiscoveryNode, - ) -> Result<(), Error> { - let node_address = discovery_node.node.id.parse::
()?; + async fn perform_node_updates(&self, node: &NodeWithMetadata) -> Result<()> { + let node_address = node.node().id.parse::
()?; - // Check if there's any healthy node with the same IP and port - let count_healthy_nodes_with_same_endpoint = self - .count_healthy_nodes_with_same_endpoint( + // Check if there's any healthy node with the same peer ID + // TODO: can this case still happen? i think so if there's stale provider records in the dht + let healthy_nodes_with_same_peer_id = self + .count_healthy_nodes_with_same_peer_id( node_address, - &discovery_node.node.ip_address, - discovery_node.node.port, + &node.node().worker_p2p_id.parse::()?, ) - .await?; + .await + .context("failed to count healthy nodes with same peer ID")?; match self.store_context.node_store.get_node(&node_address).await { Ok(Some(existing_node)) => { // If there's a healthy node with same IP and port, and this node isn't healthy, mark it dead - if count_healthy_nodes_with_same_endpoint > 0 + if healthy_nodes_with_same_peer_id > 0 && existing_node.status != NodeStatus::Healthy { info!( - "Node {} shares endpoint {}:{} with a healthy node, marking as dead", - node_address, discovery_node.node.ip_address, discovery_node.node.port + "Node {} shares peer ID {} with a healthy node, marking as dead", + node_address, + node.node().worker_p2p_id ); - if let Err(e) = self - .update_node_status(&node_address, NodeStatus::Dead) + self.update_node_status(&node_address, NodeStatus::Dead) .await - { - error!("Error updating node status: {e}"); - } + .context("failed to update node status to Dead")?; return Ok(()); } - if discovery_node.is_validated && !discovery_node.is_provider_whitelisted { + if node.is_validated() && !node.is_provider_whitelisted() { info!( "Node {node_address} is validated but not provider whitelisted, marking as ejected" ); - if let Err(e) = self - .update_node_status(&node_address, NodeStatus::Ejected) + self.update_node_status(&node_address, NodeStatus::Ejected) .await - { - error!("Error updating node status: {e}"); - } + .context("failed to update node status to Ejected")?; } // If a node is already in ejected state (and hence cannot recover) but the provider // gets whitelisted, we need to mark it as dead so it can actually recover again - if discovery_node.is_validated - && discovery_node.is_provider_whitelisted + if node.is_validated() + && node.is_provider_whitelisted() && existing_node.status == NodeStatus::Ejected { info!( "Node {node_address} is validated and provider whitelisted. Local store status was ejected, marking as dead so node can recover" ); - if let Err(e) = self - .update_node_status(&node_address, NodeStatus::Dead) + self.update_node_status(&node_address, NodeStatus::Dead) .await - { - error!("Error updating node status: {e}"); - } + .context("failed to update node status to Dead")?; } - if !discovery_node.is_active && existing_node.status == NodeStatus::Healthy { + + if !node.is_active() && existing_node.status == NodeStatus::Healthy { // Node is active False but we have it in store and it is healthy // This means that the node likely got kicked by e.g. the validator // Add a grace period check to avoid immediately marking nodes that just became healthy @@ -313,18 +204,14 @@ impl DiscoveryMonitor { info!( "Node {node_address} is no longer active on chain, marking as ejected" ); - if !discovery_node.is_provider_whitelisted { - if let Err(e) = self - .update_node_status(&node_address, NodeStatus::Ejected) + if !node.is_provider_whitelisted() { + self.update_node_status(&node_address, NodeStatus::Ejected) .await - { - error!("Error updating node status: {e}"); - } - } else if let Err(e) = self - .update_node_status(&node_address, NodeStatus::Dead) - .await - { - error!("Error updating node status: {e}"); + .context("failed to update node status to Ejected")?; + } else { + self.update_node_status(&node_address, NodeStatus::Dead) + .await + .context("failed to update node status to Dead")?; } } else { info!( @@ -333,127 +220,272 @@ impl DiscoveryMonitor { } } - if existing_node.ip_address != discovery_node.node.ip_address { + if existing_node.ip_address != node.node().ip_address { info!( "Node {} IP changed from {} to {}", - node_address, existing_node.ip_address, discovery_node.node.ip_address + node_address, + existing_node.ip_address, + node.node().ip_address ); - let mut node = existing_node.clone(); - node.ip_address = discovery_node.node.ip_address.clone(); - let _ = self.store_context.node_store.add_node(node.clone()).await; + let mut existing_node = existing_node.clone(); + existing_node.ip_address = node.node().ip_address.clone(); + self.store_context + .node_store + .add_node(existing_node) + .await + .context("failed to update node IP address")?; } - if existing_node.location.is_none() && discovery_node.location.is_some() { + + if existing_node.location.is_none() && node.location().is_some() { info!( "Node {} location changed from None to {:?}", - node_address, discovery_node.location + node_address, + node.location() ); - if let Some(location) = &discovery_node.location { - let _ = self - .store_context + if let Some(location) = node.location() { + self.store_context .node_store .update_node_location(&node_address, location) - .await; + .await + .context("failed to update node location")?; } } if existing_node.status == NodeStatus::Dead { - if let (Some(last_change), Some(last_updated)) = ( - existing_node.last_status_change, - discovery_node.last_updated, - ) { + if let (Some(last_change), Some(last_updated)) = + (existing_node.last_status_change, node.last_updated()) + { if last_change < last_updated { info!("Node {node_address} is dead but has been updated on discovery, marking as discovered"); - if existing_node.compute_specs != discovery_node.compute_specs { + if existing_node.compute_specs != node.node().compute_specs { info!( "Node {node_address} compute specs changed, marking as discovered" ); let mut node = existing_node.clone(); - node.compute_specs = discovery_node.compute_specs.clone(); - let _ = self.store_context.node_store.add_node(node.clone()).await; + node.compute_specs = node.compute_specs.clone(); + self.store_context + .node_store + .add_node(node.clone()) + .await + .context("failed to update node compute specs")?; } - if let Err(e) = self - .update_node_status(&node_address, NodeStatus::Discovered) + self.update_node_status(&node_address, NodeStatus::Discovered) .await - { - error!("Error updating node status: {e}"); - } + .context("failed to update node status to Discovered")?; } } } - if let Some(balance) = discovery_node.latest_balance { - if balance == U256::ZERO { - info!("Node {node_address} has zero balance, marking as low balance"); - if let Err(e) = self - .update_node_status(&node_address, NodeStatus::LowBalance) - .await - { - error!("Error updating node status: {e}"); - } - } + if node.latest_balance() == U256::ZERO { + info!("Node {node_address} has zero balance, marking as low balance"); + self.update_node_status(&node_address, NodeStatus::LowBalance) + .await + .context("failed to update node status to LowBalance")?; } } Ok(None) => { - // Don't add new node if there's already a healthy node with same IP and port - if count_healthy_nodes_with_same_endpoint - >= self.max_healthy_nodes_with_same_endpoint - { + // Don't add new node if there's already a healthy node with same peer ID + if healthy_nodes_with_same_peer_id > 0 { info!( - "Skipping new node {} as endpoint {}:{} is already used by a healthy node", - node_address, discovery_node.node.ip_address, discovery_node.node.port + "Skipping new node {} as peer ID {} is already used by a healthy node", + node_address, + node.node().worker_p2p_id ); return Ok(()); } info!("Discovered new validated node: {node_address}"); - let mut node = OrchestratorNode::from(discovery_node.clone()); + let mut node = OrchestratorNode::from(node); node.first_seen = Some(Utc::now()); - let _ = self.store_context.node_store.add_node(node.clone()).await; + self.store_context + .node_store + .add_node(node) + .await + .context("failed to add node to store")?; } Err(e) => { - error!("Error syncing node with discovery: {e}"); - return Err(e); + return Err(e.context("failed to get node from store")); } } Ok(()) } +} + +pub struct DiscoveryMonitor { + interval_s: u64, + heartbeats: Arc, + updater: Updater, + node_fetcher: NodeFetcher, +} + +impl DiscoveryMonitor { + #[allow(clippy::too_many_arguments)] + pub fn new( + compute_pool_id: u32, + interval_s: u64, + store_context: Arc, + heartbeats: Arc, + status_change_handlers: Vec, + kademlia_action_tx: Sender, + provider: alloy::providers::RootProvider, + contracts: shared::web3::Contracts, + ) -> Self { + Self { + interval_s, + heartbeats, + updater: Updater { + store_context, + status_change_handlers, + }, + node_fetcher: NodeFetcher { + compute_pool_id, + kademlia_action_tx, + provider, + contracts, + }, + } + } + + pub async fn run(self) { + let Self { + interval_s, + heartbeats, + updater, + node_fetcher, + } = self; + + let mut interval = interval(Duration::from_secs(interval_s)); + + loop { + interval.tick().await; + match node_fetcher.get_nodes().await { + Ok(nodes) => { + for node in &nodes { + if let Err(e) = updater.perform_node_updates(node).await { + error!( + "failed to perform update for node with id {}: {e}", + node.node().id + ); + } + } + + info!("Successfully synced {} nodes from discovery", nodes.len()); + } + Err(e) => { + error!("Error syncing nodes from discovery: {e}"); + } + } + heartbeats.update_monitor(); + } + } +} - async fn get_nodes(&self) -> Result, Error> { - let discovery_nodes = self.fetch_nodes_from_discovery().await?; +async fn get_worker_nodes_from_dht( + kademlia_action_tx: tokio::sync::mpsc::Sender, +) -> Result, anyhow::Error> { + let (kad_action, mut result_rx) = + p2p::KademliaAction::GetProviders(p2p::WORKER_DHT_KEY.as_bytes().to_vec()) + .into_kademlia_action_with_channel(); + if let Err(e) = kademlia_action_tx.send(kad_action).await { + bail!("failed to send Kademlia action: {e}"); + } - for discovery_node in &discovery_nodes { - if let Err(e) = self.sync_single_node_with_discovery(discovery_node).await { - error!("Error syncing node with discovery: {e}"); + info!("🔄 Fetching worker nodes from DHT..."); + let mut workers = std::collections::HashSet::new(); + while let Some(result) = result_rx.recv().await { + match result { + Ok(res) => { + match res { + p2p::KademliaQueryResult::GetProviders(res) => match res { + Ok(res) => match res { + p2p::KademliaGetProvidersOk::FoundProviders { key: _, providers } => { + workers.extend(providers.into_iter()); + } + _ => {} + }, + Err(e) => { + bail!("failed to get providers from DHT: {e}"); + } + }, + _ => { + // this case should never happen + bail!("unexpected Kademlia query result: {res:?}"); + } + } + } + Err(e) => { + bail!("kademlia action failed: {e}"); } } + } - Ok(discovery_nodes - .into_iter() - .map(OrchestratorNode::from) - .collect()) + log::debug!("got {} worker nodes from DHT", workers.len()); + + let mut nodes = Vec::new(); + for peer_id in workers { + let record_key = p2p::worker_dht_key_with_peer_id(&peer_id); + let (kad_action, mut result_rx) = + p2p::KademliaAction::GetRecord(record_key.as_bytes().to_vec()) + .into_kademlia_action_with_channel(); + if let Err(e) = kademlia_action_tx.send(kad_action).await { + bail!("failed to send Kademlia action: {e}"); + } + + while let Some(result) = result_rx.recv().await { + match result { + Ok(res) => { + match res { + p2p::KademliaQueryResult::GetRecord(res) => match res { + Ok(res) => match res { + p2p::KademliaGetRecordOk::FoundRecord(record) => { + match serde_json::from_slice::( + &record.record.value, + ) { + Ok(node) => { + nodes.push(node); + } + Err(e) => { + warn!("failed to deserialize node record: {e}"); + } + } + } + _ => {} + }, + Err(e) => { + warn!("failed to get record from DHT: {e}"); + } + }, + _ => { + // this case should never happen + bail!("unexpected Kademlia query result: {res:?}"); + } + } + } + Err(e) => { + warn!("kademlia action failed: {e}"); + } + } + } } + + Ok(nodes) } #[cfg(test)] mod tests { use alloy::primitives::Address; use shared::models::node::{ComputeSpecs, Node}; - use url::Url; use super::*; use crate::models::node::NodeStatus; use crate::store::core::{RedisStore, StoreContext}; - use crate::ServerMode; #[tokio::test] - async fn test_sync_single_node_with_discovery() { + async fn perform_node_updates_ok() { let node_address = "0x1234567890123456789012345678901234567890"; - let discovery_node = DiscoveryNode { - is_validated: true, - is_provider_whitelisted: true, - is_active: false, - node: Node { + let node = NodeWithMetadata::new( + Node { id: node_address.to_string(), provider_address: node_address.to_string(), ip_address: "127.0.0.1".to_string(), @@ -464,15 +496,22 @@ mod tests { storage_gb: Some(10), ..Default::default() }), + worker_p2p_id: "12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL".to_string(), ..Default::default() }, - is_blacklisted: false, - ..Default::default() - }; + true, + false, + true, + false, + alloy::primitives::U256::from(1000), + None, + None, + None, + ); - let mut orchestrator_node = OrchestratorNode::from(discovery_node.clone()); + let mut orchestrator_node = OrchestratorNode::from(&node); orchestrator_node.status = NodeStatus::Ejected; - orchestrator_node.address = discovery_node.node.id.parse::
().unwrap(); + orchestrator_node.address = node_address.parse::
().unwrap(); orchestrator_node.first_seen = Some(Utc::now()); orchestrator_node.compute_specs = Some(ComputeSpecs { gpu: None, @@ -495,35 +534,18 @@ mod tests { .expect("Redis should be flushed"); let store_context = Arc::new(StoreContext::new(store.clone())); - let discovery_store_context = store_context.clone(); - - let _ = store_context + store_context .node_store .add_node(orchestrator_node.clone()) - .await; - - let fake_wallet = Wallet::new( - "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97", - Url::parse("http://localhost:8545").unwrap(), - ) - .unwrap(); - - let mode = ServerMode::Full; - - let discovery_monitor = DiscoveryMonitor::new( - fake_wallet, - 1, - 10, - vec!["http://localhost:8080".to_string()], - discovery_store_context, - Arc::new(LoopHeartbeats::new(&mode)), - 1, - vec![], - ); + .await + .unwrap(); - let store_context_clone = store_context.clone(); + let updater = Updater { + store_context: store_context.clone(), + status_change_handlers: vec![], + }; - let node_from_store = store_context_clone + let node_from_store = store_context .node_store .get_node(&orchestrator_node.address) .await @@ -533,10 +555,7 @@ mod tests { assert_eq!(node.status, NodeStatus::Ejected); } - discovery_monitor - .sync_single_node_with_discovery(&discovery_node) - .await - .unwrap(); + updater.perform_node_updates(&node).await.unwrap(); let node_after_sync = &store_context .node_store @@ -550,23 +569,27 @@ mod tests { } #[tokio::test] - async fn test_first_seen_timestamp_set_on_new_node() { + async fn first_seen_timestamp_set_on_new_node() { let node_address = "0x2234567890123456789012345678901234567890"; - let discovery_node = DiscoveryNode { - is_validated: true, - is_provider_whitelisted: true, - is_active: true, - node: Node { + let node = NodeWithMetadata::new( + Node { id: node_address.to_string(), provider_address: node_address.to_string(), ip_address: "192.168.1.100".to_string(), port: 8080, compute_pool_id: 1, + worker_p2p_id: "12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL".to_string(), ..Default::default() }, - is_blacklisted: false, - ..Default::default() - }; + true, + true, + true, + false, + alloy::primitives::U256::from(1000), + None, + None, + None, + ); let store = Arc::new(RedisStore::new_test()); let mut con = store @@ -582,40 +605,22 @@ mod tests { .expect("Redis should be flushed"); let store_context = Arc::new(StoreContext::new(store.clone())); - - let fake_wallet = Wallet::new( - "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97", - Url::parse("http://localhost:8545").unwrap(), - ) - .unwrap(); - - let mode = ServerMode::Full; - - let discovery_monitor = DiscoveryMonitor::new( - fake_wallet, - 1, - 10, - vec!["http://localhost:8080".to_string()], - store_context.clone(), - Arc::new(LoopHeartbeats::new(&mode)), - 1, - vec![], - ); + let updater = Updater { + store_context: store_context.clone(), + status_change_handlers: vec![], + }; let time_before = Utc::now(); // Sync a new node that doesn't exist in the store - discovery_monitor - .sync_single_node_with_discovery(&discovery_node) - .await - .unwrap(); + updater.perform_node_updates(&node).await.unwrap(); let time_after = Utc::now(); // Verify the node was added with first_seen timestamp let node_from_store = store_context .node_store - .get_node(&discovery_node.node.id.parse::
().unwrap()) + .get_node(&node_address.parse::
().unwrap()) .await .unwrap(); @@ -638,32 +643,33 @@ mod tests { tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; // Update discovery data to simulate a change (e.g., IP address change) - let updated_discovery_node = DiscoveryNode { - is_validated: true, - is_provider_whitelisted: true, - is_active: true, - node: Node { + let updated_node = NodeWithMetadata::new( + Node { id: node_address.to_string(), provider_address: node_address.to_string(), ip_address: "192.168.1.101".to_string(), // Changed IP port: 8080, compute_pool_id: 1, + worker_p2p_id: "12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL".to_string(), ..Default::default() }, - is_blacklisted: false, - ..Default::default() - }; + true, + true, + true, + false, + alloy::primitives::U256::from(1000), + None, + None, + None, + ); // Sync the node again - discovery_monitor - .sync_single_node_with_discovery(&updated_discovery_node) - .await - .unwrap(); + updater.perform_node_updates(&updated_node).await.unwrap(); // Verify the node was updated but first_seen is preserved let node_after_resync = store_context .node_store - .get_node(&discovery_node.node.id.parse::
().unwrap()) + .get_node(&node_address.parse::
().unwrap()) .await .unwrap() .unwrap(); @@ -679,7 +685,7 @@ mod tests { } #[tokio::test] - async fn test_sync_node_with_same_endpoint() { + async fn sync_node_with_same_peer_id() { let store = Arc::new(RedisStore::new_test()); let mut con = store .client @@ -697,11 +703,8 @@ mod tests { // Create first node (will be healthy) let node1_address = "0x1234567890123456789012345678901234567890"; - let node1 = DiscoveryNode { - is_validated: true, - is_provider_whitelisted: true, - is_active: true, - node: Node { + let node1 = NodeWithMetadata::new( + Node { id: node1_address.to_string(), provider_address: node1_address.to_string(), ip_address: "127.0.0.1".to_string(), @@ -712,50 +715,62 @@ mod tests { storage_gb: Some(10), ..Default::default() }), + worker_p2p_id: "12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL".to_string(), ..Default::default() }, - is_blacklisted: false, - ..Default::default() - }; + true, + true, + true, + false, + alloy::primitives::U256::from(1000), + None, + None, + None, + ); - let mut orchestrator_node1 = OrchestratorNode::from(node1.clone()); + let mut orchestrator_node1 = OrchestratorNode::from(&node1); orchestrator_node1.status = NodeStatus::Healthy; - orchestrator_node1.address = node1.node.id.parse::
().unwrap(); + orchestrator_node1.address = node1_address.parse::
().unwrap(); let _ = store_context .node_store .add_node(orchestrator_node1.clone()) .await; - // Create second node with same IP and port + // Create second node with same peer ID let node2_address = "0x2234567890123456789012345678901234567890"; - let mut node2 = node1.clone(); - node2.node.id = node2_address.to_string(); - node2.node.provider_address = node2_address.to_string(); - - let fake_wallet = Wallet::new( - "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97", - Url::parse("http://localhost:8545").unwrap(), - ) - .unwrap(); - - let mode = ServerMode::Full; - let discovery_monitor = DiscoveryMonitor::new( - fake_wallet, - 1, - 10, - vec!["http://localhost:8080".to_string()], - store_context.clone(), - Arc::new(LoopHeartbeats::new(&mode)), - 1, - vec![], + let node2 = NodeWithMetadata::new( + Node { + id: node2_address.to_string(), + provider_address: node2_address.to_string(), + ip_address: "127.0.0.1".to_string(), + port: 8080, + compute_pool_id: 1, + compute_specs: Some(ComputeSpecs { + ram_mb: Some(1024), + storage_gb: Some(10), + ..Default::default() + }), + worker_p2p_id: "12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL".to_string(), + ..Default::default() + }, + true, + true, + true, + false, + alloy::primitives::U256::from(1000), + None, + None, + None, ); + let updater = Updater { + store_context: store_context.clone(), + status_change_handlers: vec![], + }; + // Try to sync the second node - discovery_monitor - .sync_single_node_with_discovery(&node2) - .await - .unwrap(); + updater.perform_node_updates(&node2).await.unwrap(); // Verify second node was not added let node2_result = store_context @@ -765,31 +780,7 @@ mod tests { .unwrap(); assert!( node2_result.is_none(), - "Node with same endpoint should not be added" - ); - - // Create third node with same IP but different port (should be allowed) - let node3_address = "0x3234567890123456789012345678901234567890"; - let mut node3 = node1.clone(); - node3.node.id = node3_address.to_string(); - node3.node.provider_address = node3_address.to_string(); - node3.node.port = 8081; // Different port - - // Try to sync the third node - discovery_monitor - .sync_single_node_with_discovery(&node3) - .await - .unwrap(); - - // Verify third node was added (different port) - let node3_result = store_context - .node_store - .get_node(&node3_address.parse::
().unwrap()) - .await - .unwrap(); - assert!( - node3_result.is_some(), - "Node with different port should be added" + "Node with same peer ID should not be added" ); } } diff --git a/crates/orchestrator/src/models/node.rs b/crates/orchestrator/src/models/node.rs index bf9b4b95..bffb3433 100644 --- a/crates/orchestrator/src/models/node.rs +++ b/crates/orchestrator/src/models/node.rs @@ -2,7 +2,7 @@ use alloy::primitives::Address; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use shared::models::heartbeat::TaskDetails; -use shared::models::node::{ComputeSpecs, DiscoveryNode, NodeLocation}; +use shared::models::node::{ComputeSpecs, NodeLocation, NodeWithMetadata}; use shared::models::task::TaskState; use std::fmt::{self, Display}; use utoipa::ToSchema; @@ -21,7 +21,7 @@ pub struct OrchestratorNode { #[serde(default)] pub task_details: Option, pub version: Option, - pub p2p_id: Option, + pub p2p_id: String, pub last_status_change: Option>, #[serde(default)] pub first_seen: Option>, @@ -29,8 +29,6 @@ pub struct OrchestratorNode { #[serde(default)] pub compute_specs: Option, #[serde(default)] - pub worker_p2p_id: Option, - #[serde(default)] pub worker_p2p_addresses: Option>, #[serde(default)] pub location: Option, @@ -43,24 +41,23 @@ where serializer.serialize_str(&address.to_string()) } -impl From for OrchestratorNode { - fn from(discovery_node: DiscoveryNode) -> Self { +impl From<&NodeWithMetadata> for OrchestratorNode { + fn from(other: &NodeWithMetadata) -> Self { Self { - address: discovery_node.id.parse().unwrap(), - ip_address: discovery_node.ip_address.clone(), - port: discovery_node.port, + address: other.node().id.parse().unwrap(), + ip_address: other.node().ip_address.clone(), + port: other.node().port, status: NodeStatus::Discovered, task_id: None, task_state: None, version: None, - p2p_id: None, + p2p_id: other.node().worker_p2p_id.clone(), last_status_change: None, first_seen: None, task_details: None, - compute_specs: discovery_node.compute_specs.clone(), - worker_p2p_id: discovery_node.worker_p2p_id.clone(), - worker_p2p_addresses: discovery_node.worker_p2p_addresses.clone(), - location: discovery_node.location.clone(), + compute_specs: other.node().compute_specs.clone(), + worker_p2p_addresses: other.node().worker_p2p_addresses.clone(), + location: other.location().cloned(), } } } diff --git a/crates/orchestrator/src/node/invite.rs b/crates/orchestrator/src/node/invite.rs index 8391d047..63403c44 100644 --- a/crates/orchestrator/src/node/invite.rs +++ b/crates/orchestrator/src/node/invite.rs @@ -115,10 +115,9 @@ impl NodeInviter { } async fn send_invite(&self, node: &OrchestratorNode) -> Result<(), anyhow::Error> { - if node.worker_p2p_id.is_none() || node.worker_p2p_addresses.is_none() { + if node.worker_p2p_addresses.is_none() { return Err(anyhow::anyhow!("Node does not have p2p information")); } - let p2p_id = node.worker_p2p_id.as_ref().unwrap(); let p2p_addresses = node.worker_p2p_addresses.as_ref().unwrap(); // Generate random nonce and expiration @@ -145,12 +144,12 @@ impl NodeInviter { nonce, }; - info!("Sending invite to node: {p2p_id}"); + info!("Sending invite to node: {}", node.p2p_id); let (response_tx, response_rx) = tokio::sync::oneshot::channel(); let invite = InviteRequestWithMetadata { worker_wallet_address: node.address, - worker_p2p_id: p2p_id.clone(), + worker_p2p_id: node.p2p_id.clone(), worker_addresses: p2p_addresses.clone(), invite: payload, response_tx, diff --git a/crates/orchestrator/src/plugins/node_groups/mod.rs b/crates/orchestrator/src/plugins/node_groups/mod.rs index 205f1576..0812f4dd 100644 --- a/crates/orchestrator/src/plugins/node_groups/mod.rs +++ b/crates/orchestrator/src/plugins/node_groups/mod.rs @@ -492,7 +492,6 @@ impl NodeGroupsPlugin { let mut healthy_nodes = nodes .iter() .filter(|node| node.status == NodeStatus::Healthy) - .filter(|node| node.p2p_id.is_some()) .filter(|node| !assigned_nodes.contains_key(&node.address.to_string())) .collect::>(); info!( diff --git a/crates/orchestrator/src/plugins/node_groups/scheduler_impl.rs b/crates/orchestrator/src/plugins/node_groups/scheduler_impl.rs index 3aaa27c2..180bdbfc 100644 --- a/crates/orchestrator/src/plugins/node_groups/scheduler_impl.rs +++ b/crates/orchestrator/src/plugins/node_groups/scheduler_impl.rs @@ -122,7 +122,7 @@ impl NodeGroupsPlugin { .get_node(&Address::from_str(next_node_addr).unwrap()) .await { - next_node.p2p_id.unwrap_or_default() + next_node.p2p_id } else { String::new() }; diff --git a/crates/orchestrator/src/plugins/node_groups/tests.rs b/crates/orchestrator/src/plugins/node_groups/tests.rs index a7d73b36..89902066 100644 --- a/crates/orchestrator/src/plugins/node_groups/tests.rs +++ b/crates/orchestrator/src/plugins/node_groups/tests.rs @@ -38,7 +38,7 @@ fn create_test_node( ip_address, port: 8080, status, - p2p_id: Some("test_p2p_id".to_string()), + p2p_id: "test_p2p_id".to_string(), compute_specs, ..Default::default() } diff --git a/crates/orchestrator/src/store/domains/node_store.rs b/crates/orchestrator/src/store/domains/node_store.rs index d9b83381..0874c64c 100644 --- a/crates/orchestrator/src/store/domains/node_store.rs +++ b/crates/orchestrator/src/store/domains/node_store.rs @@ -51,9 +51,7 @@ impl NodeStore { if let Some(version) = &node.version { fields.push(("version".to_string(), version.clone())); } - if let Some(p2p_id) = &node.p2p_id { - fields.push(("p2p_id".to_string(), p2p_id.clone())); - } + fields.push(("p2p_id".to_string(), node.p2p_id.clone())); if let Some(last_status_change) = &node.last_status_change { fields.push(( "last_status_change".to_string(), @@ -68,9 +66,6 @@ impl NodeStore { .map_err(|e| anyhow::anyhow!("Failed to serialize compute_specs: {}", e))?; fields.push(("compute_specs".to_string(), compute_specs_json)); } - if let Some(worker_p2p_id) = &node.worker_p2p_id { - fields.push(("worker_p2p_id".to_string(), worker_p2p_id.clone())); - } if let Some(worker_p2p_addresses) = &node.worker_p2p_addresses { let worker_p2p_addresses_json = serde_json::to_string(worker_p2p_addresses) .map_err(|e| anyhow::anyhow!("Failed to serialize worker_p2p_addresses: {}", e))?; @@ -121,7 +116,10 @@ impl NodeStore { .get("task_details") .and_then(|s| serde_json::from_str(s).ok()); let version = fields.get("version").cloned(); - let p2p_id = fields.get("p2p_id").cloned(); + let p2p_id = fields + .get("p2p_id") + .ok_or_else(|| anyhow::anyhow!("Missing p2p_id field"))? + .clone(); let last_status_change = fields .get("last_status_change") .and_then(|s| DateTime::parse_from_rfc3339(s).ok()) @@ -133,7 +131,6 @@ impl NodeStore { let compute_specs = fields .get("compute_specs") .and_then(|s| serde_json::from_str(s).ok()); - let worker_p2p_id = fields.get("worker_p2p_id").cloned(); let worker_p2p_addresses = fields .get("worker_p2p_addresses") .and_then(|s| serde_json::from_str(s).ok()); @@ -154,7 +151,6 @@ impl NodeStore { last_status_change, first_seen, compute_specs, - worker_p2p_id, worker_p2p_addresses, location, }) diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index b3cdeb86..bcb1fc4d 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -13,7 +13,7 @@ use libp2p::request_response; use libp2p::swarm::NetworkBehaviour; use libp2p::Multiaddr; use libp2p::PeerId; -use log::{debug, info}; +use log::debug; use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; @@ -187,7 +187,7 @@ impl BehaviourEvent { kad::Event::RoutingUpdated { peer, addresses, .. } => { - info!("kademlia routing updated for peer {peer:?} with addresses {addresses:?}"); + debug!("kademlia routing updated for peer {peer:?} with addresses {addresses:?}"); } // TODO: also handle InboundRequest::AddProvider and InboundRequest::PutRecord, // as these are new workers joining the network diff --git a/crates/shared/src/models/node.rs b/crates/shared/src/models/node.rs index 16bb68bc..f5fec414 100644 --- a/crates/shared/src/models/node.rs +++ b/crates/shared/src/models/node.rs @@ -1,6 +1,6 @@ -use crate::web3::{contracts::core::builder::Contracts, wallet::WalletProvider}; +use crate::web3::contracts::core::builder::Contracts; use alloy::primitives::{Address, U256}; -use alloy::providers::Provider as _; +use alloy_provider::Provider; use anyhow::{anyhow, Context as _}; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; @@ -19,10 +19,9 @@ pub struct Node { pub port: u16, pub compute_pool_id: u32, pub compute_specs: Option, + pub worker_p2p_id: String, // TODO: change to p2p::PeerId #[serde(skip_serializing_if = "Option::is_none")] - pub worker_p2p_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub worker_p2p_addresses: Option>, + pub worker_p2p_addresses: Option>, // TODO: change to p2p::Multiaddr } #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, ToSchema)] @@ -590,10 +589,10 @@ impl NodeWithMetadata { } } - pub async fn new_from_contracts( + pub async fn new_from_contracts( node: Node, - provider: &WalletProvider, - contracts: &Contracts, + provider: &P, + contracts: &Contracts

, ) -> anyhow::Result { let provider_address = Address::from_str(&node.provider_address).context("invalid provider address")?; @@ -674,6 +673,7 @@ impl NodeWithMetadata { } } +// TODO: delete #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Default, ToSchema)] pub struct DiscoveryNode { #[serde(flatten)] diff --git a/crates/shared/src/web3/mod.rs b/crates/shared/src/web3/mod.rs index ba58b0bd..6383d272 100644 --- a/crates/shared/src/web3/mod.rs +++ b/crates/shared/src/web3/mod.rs @@ -1,2 +1,4 @@ pub mod contracts; pub mod wallet; + +pub use contracts::core::builder::Contracts; diff --git a/crates/validator/src/validators/hardware_challenge.rs b/crates/validator/src/validators/hardware_challenge.rs index 6a57676b..f72ae669 100644 --- a/crates/validator/src/validators/hardware_challenge.rs +++ b/crates/validator/src/validators/hardware_challenge.rs @@ -17,12 +17,6 @@ impl HardwareChallenge { } pub(crate) async fn challenge_node(&self, node: &Node) -> Result<()> { - // Check if node has P2P ID and addresses - let p2p_id = node - .worker_p2p_id - .clone() - .ok_or_else(|| anyhow::anyhow!("Node {} does not have P2P ID", node.id))?; - let p2p_addresses = node .worker_p2p_addresses .clone() @@ -44,7 +38,7 @@ impl HardwareChallenge { let (response_tx, response_rx) = tokio::sync::oneshot::channel(); let hardware_challenge = HardwareChallengeRequest { worker_wallet_address: node_address, - worker_p2p_id: p2p_id, + worker_p2p_id: node.worker_p2p_id.clone(), worker_addresses: p2p_addresses, challenge: challenge_with_timestamp, response_tx, diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 9c34ef1d..26c40b20 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -355,7 +355,7 @@ pub async fn execute_command( .to_string(), compute_specs: None, compute_pool_id: *compute_pool_id, - worker_p2p_id: None, + worker_p2p_id: state.get_p2p_id().to_string(), worker_p2p_addresses: None, }; @@ -771,7 +771,7 @@ pub async fn execute_command( }; let peer_id = p2p_service.peer_id(); - node_config.worker_p2p_id = Some(peer_id.to_string()); + node_config.worker_p2p_id = p2p_service.peer_id().to_string(); let external_p2p_address = format!("/ip4/{}/tcp/{}", node_config.ip_address, *libp2p_port); node_config.worker_p2p_addresses = Some( @@ -910,7 +910,7 @@ pub async fn execute_command( compute_specs: None, provider_address: String::new(), compute_pool_id: 0, - worker_p2p_id: None, + worker_p2p_id: "empty".to_string(), // TODO: this should be a different type, as peer id is not needed for this code path worker_p2p_addresses: None, }; From 1ae81e63c4b4c9a8811b0e376ffdca04a0992cf8 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 16 Jul 2025 15:18:08 -0400 Subject: [PATCH 07/14] start discovery monitor in orchestrator again --- crates/orchestrator/src/main.rs | 77 ++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 34 deletions(-) diff --git a/crates/orchestrator/src/main.rs b/crates/orchestrator/src/main.rs index 3e78ce4e..d2621d03 100644 --- a/crates/orchestrator/src/main.rs +++ b/crates/orchestrator/src/main.rs @@ -1,5 +1,7 @@ +use alloy::providers::Provider; use anyhow::Result; use clap::Parser; +use futures::FutureExt; use log::debug; use log::error; use log::info; @@ -188,7 +190,15 @@ async fn main() -> Result<()> { tokio::task::spawn(p2p_service.run()); - let contracts = ContractBuilder::new(wallet.provider()) + let contracts = ContractBuilder::new(wallet.provider().root().clone()) + .with_compute_registry() + .with_ai_token() + .with_prime_network() + .with_compute_pool() + .build() + .unwrap(); + + let contracts_with_wallet = ContractBuilder::new(wallet.provider()) .with_compute_registry() .with_ai_token() .with_prime_network() @@ -306,37 +316,36 @@ async fn main() -> Result<()> { }); } - // // Create status_update_plugins for discovery monitor - // let mut discovery_status_update_plugins: Vec = vec![]; - - // // Add webhook plugins to discovery status update plugins - // for plugin in &webhook_plugins { - // discovery_status_update_plugins.push(plugin.into()); - // } - - // // Add node groups plugin if available - // if let Some(group_plugin) = node_groups_plugin.clone() { - // discovery_status_update_plugins.push(group_plugin.into()); - // } - - // let discovery_store_context = store_context.clone(); - // let discovery_heartbeats = heartbeats.clone(); - // tasks.spawn({ - // let wallet = wallet.clone(); - // async move { - // let monitor = DiscoveryMonitor::new( - // wallet, - // compute_pool_id, - // args.discovery_refresh_interval, - // args.discovery_urls, - // discovery_store_context.clone(), - // discovery_heartbeats.clone(), - // args.max_healthy_nodes_with_same_endpoint, - // discovery_status_update_plugins, - // ); - // monitor.run().await - // } - // }); + // Create status_update_plugins for discovery monitor + let mut discovery_status_update_plugins: Vec = vec![]; + + // Add webhook plugins to discovery status update plugins + for plugin in &webhook_plugins { + discovery_status_update_plugins.push(plugin.into()); + } + + // Add node groups plugin if available + if let Some(group_plugin) = node_groups_plugin.clone() { + discovery_status_update_plugins.push(group_plugin.into()); + } + + let discovery_store_context = store_context.clone(); + let discovery_heartbeats = heartbeats.clone(); + let monitor = orchestrator::DiscoveryMonitor::new( + compute_pool_id, + args.discovery_refresh_interval, + discovery_store_context.clone(), + discovery_heartbeats.clone(), + discovery_status_update_plugins, + kademlia_action_tx, + wallet.provider().root().clone(), + contracts.clone(), + ); + + tasks.spawn( + // TODO: refactor task handling + monitor.run().map(|_| Ok(())), + ); let inviter_store_context = store_context.clone(); let inviter_heartbeats = heartbeats.clone(); @@ -381,7 +390,7 @@ async fn main() -> Result<()> { let status_update_heartbeats = heartbeats.clone(); let status_update_metrics = metrics_context.clone(); tasks.spawn({ - let contracts = contracts.clone(); + let contracts = contracts_with_wallet.clone(); async move { let status_updater = NodeStatusUpdater::new( status_update_store_context.clone(), @@ -429,7 +438,7 @@ async fn main() -> Result<()> { heartbeats.clone(), store.clone(), args.hourly_s3_upload_limit, - Some(contracts), + Some(contracts_with_wallet), compute_pool_id, server_mode, scheduler, From 7ce274a609f3516b82f0dda8dc8f198bf7505df3 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 16 Jul 2025 16:00:51 -0400 Subject: [PATCH 08/14] cleanup, remove discovery crate --- Cargo.lock | 1296 +----------------- Cargo.toml | 17 +- crates/bootnode/Cargo.toml | 2 +- crates/discovery/Cargo.toml | 25 - crates/discovery/Dockerfile | 30 - crates/discovery/src/api/mod.rs | 2 - crates/discovery/src/api/routes/get_nodes.rs | 432 ------ crates/discovery/src/api/routes/mod.rs | 2 - crates/discovery/src/api/routes/node.rs | 772 ----------- crates/discovery/src/api/server.rs | 164 --- crates/discovery/src/chainsync/mod.rs | 3 - crates/discovery/src/chainsync/sync.rs | 222 --- crates/discovery/src/lib.rs | 12 - crates/discovery/src/location_enrichment.rs | 119 -- crates/discovery/src/location_service.rs | 80 -- crates/discovery/src/main.rs | 174 --- crates/discovery/src/store/mod.rs | 2 - crates/discovery/src/store/node_store.rs | 173 --- crates/discovery/src/store/redis.rs | 72 - crates/orchestrator/Cargo.toml | 2 +- crates/orchestrator/src/discovery/monitor.rs | 94 +- crates/p2p/src/behaviour.rs | 14 +- crates/p2p/src/discovery.rs | 2 +- crates/p2p/src/lib.rs | 1 + crates/shared/Cargo.toml | 2 - crates/shared/src/models/mod.rs | 3 + crates/shared/src/models/node.rs | 74 +- crates/shared/src/p2p/discovery.rs | 98 ++ crates/shared/src/p2p/mod.rs | 2 + crates/shared/src/p2p/service.rs | 2 + crates/shared/src/web3/mod.rs | 1 + crates/validator/src/main.rs | 47 +- crates/validator/src/validator.rs | 103 +- crates/worker/Cargo.toml | 2 +- crates/worker/src/p2p/mod.rs | 2 + 35 files changed, 177 insertions(+), 3871 deletions(-) delete mode 100644 crates/discovery/Cargo.toml delete mode 100644 crates/discovery/Dockerfile delete mode 100644 crates/discovery/src/api/mod.rs delete mode 100644 crates/discovery/src/api/routes/get_nodes.rs delete mode 100644 crates/discovery/src/api/routes/mod.rs delete mode 100644 crates/discovery/src/api/routes/node.rs delete mode 100644 crates/discovery/src/api/server.rs delete mode 100644 crates/discovery/src/chainsync/mod.rs delete mode 100644 crates/discovery/src/chainsync/sync.rs delete mode 100644 crates/discovery/src/lib.rs delete mode 100644 crates/discovery/src/location_enrichment.rs delete mode 100644 crates/discovery/src/location_service.rs delete mode 100644 crates/discovery/src/main.rs delete mode 100644 crates/discovery/src/store/mod.rs delete mode 100644 crates/discovery/src/store/node_store.rs delete mode 100644 crates/discovery/src/store/redis.rs create mode 100644 crates/shared/src/p2p/discovery.rs diff --git a/Cargo.lock b/Cargo.lock index cbddf9ff..de650dfb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -221,7 +221,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" dependencies = [ - "bytes", "crypto-common", "generic-array", ] @@ -314,7 +313,7 @@ checksum = "7734aecfc58a597dde036e4c5cace2ae43e2f8bf3d406b022a1ef34da178dd49" dependencies = [ "alloy-primitives", "num_enum", - "strum 0.27.1", + "strum", ] [[package]] @@ -784,7 +783,7 @@ dependencies = [ "derive_more 2.0.1", "rand 0.8.5", "serde", - "strum 0.27.1", + "strum", ] [[package]] @@ -1020,7 +1019,7 @@ dependencies = [ "rustls", "serde_json", "tokio", - "tokio-tungstenite 0.26.2", + "tokio-tungstenite", "tracing", "ws_stream_wasm", ] @@ -1474,17 +1473,6 @@ dependencies = [ "fs_extra", ] -[[package]] -name = "backon" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd0b50b1b78dbadd44ab18b3c794e496f3a139abb9fbc27d9c94c4eebbb96496" -dependencies = [ - "fastrand 2.3.0", - "gloo-timers 0.3.0", - "tokio", -] - [[package]] name = "backtrace" version = "0.3.74" @@ -1737,12 +1725,6 @@ dependencies = [ "tracing-subscriber", ] -[[package]] -name = "bounded-integer" -version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "102dbef1187b1893e6dfe05a774e79fd52265f49f214f6879c8ff49f52c8188b" - [[package]] name = "brotli" version = "7.0.0" @@ -2005,12 +1987,6 @@ dependencies = [ "cc", ] -[[package]] -name = "cobs" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" - [[package]] name = "colorchoice" version = "1.0.3" @@ -2128,16 +2104,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "cordyceps" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0392f465ceba1713d30708f61c160ebf4dc1cf86bb166039d16b11ad4f3b5b6" -dependencies = [ - "loom", - "tracing", -] - [[package]] name = "core-foundation" version = "0.9.4" @@ -2196,21 +2162,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "critical-section" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b" - -[[package]] -name = "crossbeam-channel" -version = "0.5.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -2265,38 +2216,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "crypto_box" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16182b4f39a82ec8a6851155cc4c0cda3065bb1db33651726a29e1951de0f009" -dependencies = [ - "aead", - "chacha20", - "crypto_secretbox", - "curve25519-dalek", - "salsa20", - "serdect", - "subtle", - "zeroize", -] - -[[package]] -name = "crypto_secretbox" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d6cf87adf719ddf43a805e92c6870a531aedda35ff640442cbaf8674e141e1" -dependencies = [ - "aead", - "chacha20", - "cipher", - "generic-array", - "poly1305", - "salsa20", - "subtle", - "zeroize", -] - [[package]] name = "ctr" version = "0.9.2" @@ -2317,9 +2236,7 @@ dependencies = [ "curve25519-dalek-derive", "digest 0.10.7", "fiat-crypto", - "rand_core 0.6.4", "rustc_version 0.4.1", - "serde", "subtle", "zeroize", ] @@ -2417,7 +2334,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" dependencies = [ "const-oid", - "der_derive", "pem-rfc7468", "zeroize", ] @@ -2436,17 +2352,6 @@ dependencies = [ "rusticata-macros", ] -[[package]] -name = "der_derive" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8034092389675178f570469e6c3b0465d3d30b4505c294a6550db47f3c17ad18" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - [[package]] name = "deranged" version = "0.4.0" @@ -2568,12 +2473,6 @@ dependencies = [ "url", ] -[[package]] -name = "diatomic-waker" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab03c107fafeb3ee9f5925686dbb7a73bc76e3932abb0d2b365cb64b169cf04c" - [[package]] name = "digest" version = "0.9.0" @@ -2616,28 +2515,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "discovery" -version = "0.3.11" -dependencies = [ - "actix-web", - "alloy", - "anyhow", - "clap", - "env_logger", - "futures", - "log", - "redis", - "redis-test", - "reqwest", - "serde", - "serde_json", - "shared", - "tokio", - "tokio-util", - "url", -] - [[package]] name = "displaydoc" version = "0.2.5" @@ -2649,32 +2526,12 @@ dependencies = [ "syn 2.0.101", ] -[[package]] -name = "dlopen2" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b4f5f101177ff01b8ec4ecc81eead416a8aa42819a2869311b3420fa114ffa" -dependencies = [ - "libc", - "once_cell", - "winapi", -] - [[package]] name = "doctest-file" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aac81fa3e28d21450aa4d2ac065992ba96a1d7303efbce51a95f4fd175b67562" -[[package]] -name = "document-features" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95249b50c6c185bee49034bcb378a49dc2b5dff0be90ff6616d31d64febab05d" -dependencies = [ - "litrs", -] - [[package]] name = "dtoa" version = "1.0.10" @@ -2715,7 +2572,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53" dependencies = [ "pkcs8", - "serde", "signature", ] @@ -2727,7 +2583,6 @@ checksum = "4a3daa8e81a3963a60642bcc1f90a670680bd4a77535faa384e9d1c79d620871" dependencies = [ "curve25519-dalek", "ed25519", - "rand_core 0.6.4", "serde", "sha2", "subtle", @@ -2764,18 +2619,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "embedded-io" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" - -[[package]] -name = "embedded-io" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" - [[package]] name = "encode_unicode" version = "1.0.0" @@ -2803,26 +2646,6 @@ dependencies = [ "syn 2.0.101", ] -[[package]] -name = "enumflags2" -version = "0.7.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba2f4b465f5318854c6f8dd686ede6c0a9dc67d4b1ac241cf0eb51521a309147" -dependencies = [ - "enumflags2_derive", -] - -[[package]] -name = "enumflags2_derive" -version = "0.7.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4caf64a58d7a6d65ab00639b046ff54399a39f5f2554728895ace4b297cd79" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - [[package]] name = "env_filter" version = "0.1.3" @@ -2852,21 +2675,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" -[[package]] -name = "erased-serde" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c138974f9d5e7fe373eb04df7cae98833802ae4b11c24ac7039a21d5af4b26c" -dependencies = [ - "serde", -] - -[[package]] -name = "erased_set" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a02a5d186d7bf1cb21f1f95e1a9cfa5c1f2dcd803a47aad454423ceec13525c5" - [[package]] name = "errno" version = "0.3.11" @@ -2908,12 +2716,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "fallible-iterator" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" - [[package]] name = "fastrand" version = "1.9.0" @@ -3002,18 +2804,6 @@ dependencies = [ "miniz_oxide", ] -[[package]] -name = "flume" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" -dependencies = [ - "futures-core", - "futures-sink", - "nanorand", - "spin 0.9.8", -] - [[package]] name = "fnv" version = "1.0.7" @@ -3097,19 +2887,6 @@ dependencies = [ "futures-util", ] -[[package]] -name = "futures-buffered" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe940397c8b744b9c2c974791c2c08bca2c3242ce0290393249e98f215a00472" -dependencies = [ - "cordyceps", - "diatomic-waker", - "futures-core", - "pin-project-lite", - "spin 0.9.8", -] - [[package]] name = "futures-channel" version = "0.3.31" @@ -3165,10 +2942,7 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f5edaec856126859abb19ed65f39e90fea3a9574b9707f13539acf4abf7eb532" dependencies = [ - "fastrand 2.3.0", "futures-core", - "futures-io", - "parking", "pin-project-lite", ] @@ -3234,7 +3008,7 @@ version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" dependencies = [ - "gloo-timers 0.2.6", + "gloo-timers", "send_wrapper 0.4.0", ] @@ -3262,19 +3036,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42012b0f064e01aa58b545fe3727f90f7dd4020f4a3ea735b50344965f5a57e9" -[[package]] -name = "generator" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc6bd114ceda131d3b1d665eba35788690ad37f5916457286b32ab6fd3c438dd" -dependencies = [ - "cfg-if", - "libc", - "log", - "rustversion", - "windows 0.58.0", -] - [[package]] name = "generic-array" version = "0.14.7" @@ -3347,18 +3108,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "gloo-timers" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" -dependencies = [ - "futures-channel", - "futures-core", - "js-sys", - "wasm-bindgen", -] - [[package]] name = "google-cloud-auth" version = "0.17.2" @@ -3594,31 +3343,6 @@ dependencies = [ "url", ] -[[package]] -name = "hickory-proto" -version = "0.25.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8a6fe56c0038198998a6f217ca4e7ef3a5e51f46163bd6dd60b5c71ca6c6502" -dependencies = [ - "async-trait", - "cfg-if", - "data-encoding", - "enum-as-inner", - "futures-channel", - "futures-io", - "futures-util", - "idna", - "ipnet", - "once_cell", - "rand 0.9.1", - "ring 0.17.14", - "thiserror 2.0.12", - "tinyvec", - "tokio", - "tracing", - "url", -] - [[package]] name = "hickory-resolver" version = "0.24.4" @@ -3627,7 +3351,7 @@ checksum = "cbb117a1ca520e111743ab2f6688eddee69db4e0ea242545a604dce8a66fd22e" dependencies = [ "cfg-if", "futures-util", - "hickory-proto 0.24.4", + "hickory-proto", "ipconfig", "lru-cache", "once_cell", @@ -3640,27 +3364,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "hickory-resolver" -version = "0.25.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc62a9a99b0bfb44d2ab95a7208ac952d31060efc16241c87eaf36406fecf87a" -dependencies = [ - "cfg-if", - "futures-util", - "hickory-proto 0.25.2", - "ipconfig", - "moka", - "once_cell", - "parking_lot 0.12.3", - "rand 0.9.1", - "resolv-conf", - "smallvec", - "thiserror 2.0.12", - "tokio", - "tracing", -] - [[package]] name = "hkdf" version = "0.12.4" @@ -3679,22 +3382,6 @@ dependencies = [ "digest 0.10.7", ] -[[package]] -name = "hmac-sha1" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b05da5b9e5d4720bfb691eebb2b9d42da3570745da71eac8a1f5bb7e59aab88" -dependencies = [ - "hmac", - "sha1", -] - -[[package]] -name = "hmac-sha256" -version = "1.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a8575493d277c9092b988c780c94737fb9fd8651a1001e16bee3eccfc1baedb" - [[package]] name = "home" version = "0.5.11" @@ -3716,12 +3403,6 @@ dependencies = [ "windows 0.57.0", ] -[[package]] -name = "hostname-validator" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f558a64ac9af88b5ba400d99b579451af0d39c6d360980045b91aac966d705e2" - [[package]] name = "http" version = "0.2.12" @@ -4126,10 +3807,10 @@ dependencies = [ "ipnet", "log", "netlink-packet-core", - "netlink-packet-route 0.17.1", + "netlink-packet-route", "netlink-proto", "netlink-sys", - "rtnetlink 0.13.1", + "rtnetlink", "system-configuration", "tokio", "windows 0.52.0", @@ -4155,31 +3836,10 @@ dependencies = [ ] [[package]] -name = "igd-next" -version = "0.15.1" +name = "impl-codec" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76b0d7d4541def58a37bf8efc559683f21edce7c82f0d866c93ac21f7e098f93" -dependencies = [ - "async-trait", - "attohttpc", - "bytes", - "futures", - "http 1.3.1", - "http-body-util", - "hyper 1.6.0", - "hyper-util", - "log", - "rand 0.8.5", - "tokio", - "url", - "xmltree", -] - -[[package]] -name = "impl-codec" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba6a270039626615617f3f36d15fc827041df3b78c439da2cadfa47455a77f2f" +checksum = "ba6a270039626615617f3f36d15fc827041df3b78c439da2cadfa47455a77f2f" dependencies = [ "parity-scale-codec", ] @@ -4306,193 +3966,6 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" -[[package]] -name = "iroh" -version = "0.34.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37432887a6836e7a832fccb121b5f0ee6cd953c506f99b0278bdbedf8dee0e88" -dependencies = [ - "aead", - "anyhow", - "atomic-waker", - "backon", - "bytes", - "cfg_aliases", - "concurrent-queue", - "crypto_box", - "data-encoding", - "der", - "derive_more 1.0.0", - "ed25519-dalek", - "futures-util", - "hickory-resolver 0.25.2", - "http 1.3.1", - "igd-next 0.15.1", - "instant", - "iroh-base", - "iroh-metrics", - "iroh-quinn", - "iroh-quinn-proto", - "iroh-quinn-udp", - "iroh-relay", - "n0-future", - "netdev", - "netwatch", - "pin-project", - "pkarr", - "portmapper", - "rand 0.8.5", - "rcgen 0.13.2", - "reqwest", - "ring 0.17.14", - "rustls", - "rustls-webpki 0.102.8", - "serde", - "smallvec", - "strum 0.26.3", - "stun-rs", - "surge-ping", - "thiserror 2.0.12", - "time", - "tokio", - "tokio-stream", - "tokio-util", - "tracing", - "url", - "wasm-bindgen-futures", - "webpki-roots 0.26.9", - "x509-parser", - "z32", -] - -[[package]] -name = "iroh-base" -version = "0.34.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cd952d9e25e521d6aeb5b79f2fe32a0245da36aae3569e50f6010b38a5f0923" -dependencies = [ - "curve25519-dalek", - "data-encoding", - "derive_more 1.0.0", - "ed25519-dalek", - "rand_core 0.6.4", - "serde", - "thiserror 2.0.12", - "url", -] - -[[package]] -name = "iroh-metrics" -version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0f7cd1ffe3b152a5f4f4c1880e01e07d96001f20e02cc143cb7842987c616b3" -dependencies = [ - "erased_set", - "prometheus-client", - "serde", - "struct_iterable", - "thiserror 2.0.12", - "tracing", -] - -[[package]] -name = "iroh-quinn" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76c6245c9ed906506ab9185e8d7f64857129aee4f935e899f398a3bd3b70338d" -dependencies = [ - "bytes", - "cfg_aliases", - "iroh-quinn-proto", - "iroh-quinn-udp", - "pin-project-lite", - "rustc-hash 2.1.1", - "rustls", - "socket2", - "thiserror 2.0.12", - "tokio", - "tracing", - "web-time", -] - -[[package]] -name = "iroh-quinn-proto" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "929d5d8fa77d5c304d3ee7cae9aede31f13908bd049f9de8c7c0094ad6f7c535" -dependencies = [ - "bytes", - "getrandom 0.2.16", - "rand 0.8.5", - "ring 0.17.14", - "rustc-hash 2.1.1", - "rustls", - "rustls-pki-types", - "slab", - "thiserror 2.0.12", - "tinyvec", - "tracing", - "web-time", -] - -[[package]] -name = "iroh-quinn-udp" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c53afaa1049f7c83ea1331f5ebb9e6ebc5fdd69c468b7a22dd598b02c9bcc973" -dependencies = [ - "cfg_aliases", - "libc", - "once_cell", - "socket2", - "tracing", - "windows-sys 0.59.0", -] - -[[package]] -name = "iroh-relay" -version = "0.34.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40d2d7b50d999922791c6c14c25e13f55711e182618cb387bafa0896ffe0b930" -dependencies = [ - "anyhow", - "bytes", - "cfg_aliases", - "data-encoding", - "derive_more 1.0.0", - "hickory-resolver 0.25.2", - "http 1.3.1", - "http-body-util", - "hyper 1.6.0", - "hyper-util", - "iroh-base", - "iroh-metrics", - "iroh-quinn", - "iroh-quinn-proto", - "lru 0.12.5", - "n0-future", - "num_enum", - "pin-project", - "pkarr", - "postcard", - "rand 0.8.5", - "reqwest", - "rustls", - "rustls-webpki 0.102.8", - "serde", - "strum 0.26.3", - "stun-rs", - "thiserror 2.0.12", - "tokio", - "tokio-rustls", - "tokio-tungstenite-wasm", - "tokio-util", - "tracing", - "url", - "webpki-roots 0.26.9", - "z32", -] - [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -4819,7 +4292,7 @@ checksum = "97f37f30d5c7275db282ecd86e54f29dd2176bd3ac656f06abf43bedb21eb8bd" dependencies = [ "async-trait", "futures", - "hickory-resolver 0.24.4", + "hickory-resolver", "libp2p-core", "libp2p-identity", "parking_lot 0.12.3", @@ -4944,7 +4417,7 @@ checksum = "14b8546b6644032565eb29046b42744aee1e9f261ed99671b2c93fb140dba417" dependencies = [ "data-encoding", "futures", - "hickory-proto 0.24.4", + "hickory-proto", "if-watch", "libp2p-core", "libp2p-identity", @@ -5217,7 +4690,7 @@ checksum = "01bf2d1b772bd3abca049214a3304615e6a36fa6ffc742bdd1ba774486200b8f" dependencies = [ "futures", "futures-timer", - "igd-next 0.14.3", + "igd-next", "libp2p-core", "libp2p-swarm", "tokio", @@ -5344,12 +4817,6 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856" -[[package]] -name = "litrs" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5" - [[package]] name = "local-channel" version = "0.1.5" @@ -5393,19 +4860,6 @@ dependencies = [ "prost-types", ] -[[package]] -name = "loom" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" -dependencies = [ - "cfg-if", - "generator", - "scoped-tls", - "tracing", - "tracing-subscriber", -] - [[package]] name = "lru" version = "0.12.5" @@ -5482,12 +4936,6 @@ dependencies = [ "digest 0.10.7", ] -[[package]] -name = "md5" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" - [[package]] name = "memchr" version = "2.7.4" @@ -5570,25 +5018,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "moka" -version = "0.12.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9321642ca94a4282428e6ea4af8cc2ca4eac48ac7a6a4ea8f33f76d0ce70926" -dependencies = [ - "crossbeam-channel", - "crossbeam-epoch", - "crossbeam-utils", - "loom", - "parking_lot 0.12.3", - "portable-atomic", - "rustc_version 0.4.1", - "smallvec", - "tagptr", - "thiserror 1.0.69", - "uuid", -] - [[package]] name = "multiaddr" version = "0.18.2" @@ -5687,27 +5116,6 @@ dependencies = [ "unsigned-varint 0.7.2", ] -[[package]] -name = "n0-future" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb0e5d99e681ab3c938842b96fcb41bf8a7bb4bfdb11ccbd653a7e83e06c794" -dependencies = [ - "cfg_aliases", - "derive_more 1.0.0", - "futures-buffered", - "futures-lite 2.6.0", - "futures-util", - "js-sys", - "pin-project", - "send_wrapper 0.6.0", - "tokio", - "tokio-util", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-time", -] - [[package]] name = "nalgebra" version = "0.33.2" @@ -5735,15 +5143,6 @@ dependencies = [ "syn 2.0.101", ] -[[package]] -name = "nanorand" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" -dependencies = [ - "getrandom 0.2.16", -] - [[package]] name = "native-tls" version = "0.2.14" @@ -5761,23 +5160,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "netdev" -version = "0.31.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f901362e84cd407be6f8cd9d3a46bccf09136b095792785401ea7d283c79b91d" -dependencies = [ - "dlopen2", - "ipnet", - "libc", - "netlink-packet-core", - "netlink-packet-route 0.17.1", - "netlink-sys", - "once_cell", - "system-configuration", - "windows-sys 0.52.0", -] - [[package]] name = "netlink-packet-core" version = "0.7.0" @@ -5803,20 +5185,6 @@ dependencies = [ "netlink-packet-utils", ] -[[package]] -name = "netlink-packet-route" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74c171cd77b4ee8c7708da746ce392440cb7bcf618d122ec9ecc607b12938bf4" -dependencies = [ - "anyhow", - "byteorder", - "libc", - "log", - "netlink-packet-core", - "netlink-packet-utils", -] - [[package]] name = "netlink-packet-utils" version = "0.5.2" @@ -5856,39 +5224,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "netwatch" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7879c2cfdf30d92f2be89efa3169b3d78107e3ab7f7b9a37157782569314e1" -dependencies = [ - "atomic-waker", - "bytes", - "cfg_aliases", - "derive_more 1.0.0", - "iroh-quinn-udp", - "js-sys", - "libc", - "n0-future", - "netdev", - "netlink-packet-core", - "netlink-packet-route 0.19.0", - "netlink-sys", - "rtnetlink 0.13.1", - "rtnetlink 0.14.1", - "serde", - "socket2", - "thiserror 2.0.12", - "time", - "tokio", - "tokio-util", - "tracing", - "web-sys", - "windows 0.59.0", - "windows-result 0.3.2", - "wmi", -] - [[package]] name = "nix" version = "0.26.4" @@ -5902,17 +5237,6 @@ dependencies = [ "pin-utils", ] -[[package]] -name = "nix" -version = "0.27.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" -dependencies = [ - "bitflags 2.9.0", - "cfg-if", - "libc", -] - [[package]] name = "nix" version = "0.29.0" @@ -5925,12 +5249,6 @@ dependencies = [ "libc", ] -[[package]] -name = "no-std-net" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43794a0ace135be66a25d3ae77d41b91615fb68ae937f904090203e81f755b65" - [[package]] name = "nohash-hasher" version = "0.2.0" @@ -6046,7 +5364,6 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" dependencies = [ - "proc-macro-crate", "proc-macro2", "quote", "syn 2.0.101", @@ -6111,10 +5428,6 @@ name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" -dependencies = [ - "critical-section", - "portable-atomic", -] [[package]] name = "opaque-debug" @@ -6364,40 +5677,6 @@ dependencies = [ "ucd-trie", ] -[[package]] -name = "pest_derive" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d725d9cfd79e87dccc9341a2ef39d1b6f6353d68c4b33c177febbe1a402c97c5" -dependencies = [ - "pest", - "pest_generator", -] - -[[package]] -name = "pest_generator" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db7d01726be8ab66ab32f9df467ae8b1148906685bbe75c82d1e65d7f5b3f841" -dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn 2.0.101", -] - -[[package]] -name = "pest_meta" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9f832470494906d1fca5329f8ab5791cc60beb230c74815dff541cbd2b5ca0" -dependencies = [ - "once_cell", - "pest", - "sha2", -] - [[package]] name = "pharos" version = "0.5.3" @@ -6440,30 +5719,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" -[[package]] -name = "pkarr" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92eff194c72f00f3076855b413ad2d940e3a6e307fa697e5c7733e738341aed4" -dependencies = [ - "bytes", - "document-features", - "ed25519-dalek", - "flume", - "futures", - "js-sys", - "lru 0.12.5", - "self_cell", - "simple-dns", - "thiserror 2.0.12", - "tracing", - "ureq", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "z32", -] - [[package]] name = "pkcs8" version = "0.10.2" @@ -6480,48 +5735,6 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" -[[package]] -name = "pnet_base" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe4cf6fb3ab38b68d01ab2aea03ed3d1132b4868fa4e06285f29f16da01c5f4c" -dependencies = [ - "no-std-net", -] - -[[package]] -name = "pnet_macros" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "688b17499eee04a0408aca0aa5cba5fc86401d7216de8a63fdf7a4c227871804" -dependencies = [ - "proc-macro2", - "quote", - "regex", - "syn 2.0.101", -] - -[[package]] -name = "pnet_macros_support" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eea925b72f4bd37f8eab0f221bbe4c78b63498350c983ffa9dd4bcde7e030f56" -dependencies = [ - "pnet_base", -] - -[[package]] -name = "pnet_packet" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9a005825396b7fe7a38a8e288dbc342d5034dac80c15212436424fef8ea90ba" -dependencies = [ - "glob", - "pnet_base", - "pnet_macros", - "pnet_macros_support", -] - [[package]] name = "pollable-map" version = "0.1.7" @@ -6585,58 +5798,6 @@ dependencies = [ "portable-atomic", ] -[[package]] -name = "portmapper" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "247dcb75747c53cc433d6d8963a064187eec4a676ba13ea33143f1c9100e754f" -dependencies = [ - "base64 0.22.1", - "bytes", - "derive_more 1.0.0", - "futures-lite 2.6.0", - "futures-util", - "igd-next 0.15.1", - "iroh-metrics", - "libc", - "netwatch", - "num_enum", - "rand 0.8.5", - "serde", - "smallvec", - "socket2", - "thiserror 2.0.12", - "time", - "tokio", - "tokio-util", - "tracing", - "url", -] - -[[package]] -name = "postcard" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "170a2601f67cc9dba8edd8c4870b15f71a6a2dc196daec8c83f72b59dff628a8" -dependencies = [ - "cobs", - "embedded-io 0.4.0", - "embedded-io 0.6.1", - "postcard-derive", - "serde", -] - -[[package]] -name = "postcard-derive" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0239fa9c1d225d4b7eb69925c25c5e082307a141e470573fbbe3a817ce6a7a37" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "powerfmt" version = "0.2.0" @@ -6652,40 +5813,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "precis-core" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c2e7b31f132e0c6f8682cfb7bf4a5340dbe925b7986618d0826a56dfe0c8e56" -dependencies = [ - "precis-tools", - "ucd-parse", - "unicode-normalization", -] - -[[package]] -name = "precis-profiles" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc4f67f78f50388f03494794766ba824a704db16fb5d400fe8d545fa7bc0d3f1" -dependencies = [ - "lazy_static", - "precis-core", - "precis-tools", - "unicode-normalization", -] - -[[package]] -name = "precis-tools" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6cc1eb2d5887ac7bfd2c0b745764db89edb84b856e4214e204ef48ef96d10c4a" -dependencies = [ - "lazy_static", - "regex", - "ucd-parse", -] - [[package]] name = "prettyplease" version = "0.2.32" @@ -6988,16 +6115,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "quoted-string-parser" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dc75379cdb451d001f1cb667a9f74e8b355e9df84cc5193513cbe62b96fc5e9" -dependencies = [ - "pest", - "pest_derive", -] - [[package]] name = "r-efi" version = "5.2.0" @@ -7354,7 +6471,7 @@ dependencies = [ "cc", "libc", "once_cell", - "spin 0.5.2", + "spin", "untrusted 0.7.1", "web-sys", "winapi", @@ -7411,7 +6528,7 @@ dependencies = [ "futures", "log", "netlink-packet-core", - "netlink-packet-route 0.17.1", + "netlink-packet-route", "netlink-packet-utils", "netlink-proto", "netlink-sys", @@ -7420,24 +6537,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "rtnetlink" -version = "0.14.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b684475344d8df1859ddb2d395dd3dac4f8f3422a1aa0725993cb375fc5caba5" -dependencies = [ - "futures", - "log", - "netlink-packet-core", - "netlink-packet-route 0.19.0", - "netlink-packet-utils", - "netlink-proto", - "netlink-sys", - "nix 0.27.1", - "thiserror 1.0.69", - "tokio", -] - [[package]] name = "ruint" version = "1.14.0" @@ -7525,7 +6624,7 @@ dependencies = [ "futures-timeout", "futures-timer", "getrandom 0.2.16", - "hickory-resolver 0.24.4", + "hickory-resolver", "hkdf", "idb", "indexmap 2.9.0", @@ -7730,17 +6829,6 @@ dependencies = [ "untrusted 0.9.0", ] -[[package]] -name = "rustls-webpki" -version = "0.102.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" -dependencies = [ - "ring 0.17.14", - "rustls-pki-types", - "untrusted 0.9.0", -] - [[package]] name = "rustls-webpki" version = "0.103.1" @@ -7789,21 +6877,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] -name = "safe_arch" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96b02de82ddbe1b636e6170c21be622223aea188ef2e139be0a5b219ec215323" -dependencies = [ - "bytemuck", -] - -[[package]] -name = "salsa20" -version = "0.10.2" +name = "safe_arch" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +checksum = "96b02de82ddbe1b636e6170c21be622223aea188ef2e139be0a5b219ec215323" dependencies = [ - "cipher", + "bytemuck", ] [[package]] @@ -7824,12 +6903,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "scoped-tls" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" - [[package]] name = "scopeguard" version = "1.2.0" @@ -7895,12 +6968,6 @@ dependencies = [ "libc", ] -[[package]] -name = "self_cell" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f7d95a54511e0c7be3f51e8867aa8cf35148d7b9445d44de2f943e2b206e749" - [[package]] name = "semver" version = "0.11.0" @@ -8173,9 +7240,7 @@ dependencies = [ "futures-util", "google-cloud-storage", "hex", - "iroh", "log", - "nalgebra", "p2p", "rand 0.8.5", "rand 0.9.1", @@ -8241,15 +7306,6 @@ version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" -[[package]] -name = "simple-dns" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee851d0e5e7af3721faea1843e8015e820a234f81fda3dea9247e15bac9a86a" -dependencies = [ - "bitflags 2.9.0", -] - [[package]] name = "simple_asn1" version = "0.5.4" @@ -8357,15 +7413,6 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" -dependencies = [ - "lock_api", -] - [[package]] name = "spki" version = "0.7.3" @@ -8416,64 +7463,13 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "struct_iterable" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "849a064c6470a650b72e41fa6c057879b68f804d113af92900f27574828e7712" -dependencies = [ - "struct_iterable_derive", - "struct_iterable_internal", -] - -[[package]] -name = "struct_iterable_derive" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb939ce88a43ea4e9d012f2f6b4cc789deb2db9d47bad697952a85d6978662c" -dependencies = [ - "erased-serde", - "proc-macro2", - "quote", - "struct_iterable_internal", - "syn 2.0.101", -] - -[[package]] -name = "struct_iterable_internal" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9426b2a0c03e6cc2ea8dbc0168dbbf943f88755e409fb91bcb8f6a268305f4a" - -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" -dependencies = [ - "strum_macros 0.26.4", -] - [[package]] name = "strum" version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32" dependencies = [ - "strum_macros 0.27.1", -] - -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.101", + "strum_macros", ] [[package]] @@ -8508,52 +7504,12 @@ dependencies = [ "webrtc-util", ] -[[package]] -name = "stun-rs" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb921f10397d5669e1af6455e9e2d367bf1f9cebcd6b1dd1dc50e19f6a9ac2ac" -dependencies = [ - "base64 0.22.1", - "bounded-integer", - "byteorder", - "crc", - "enumflags2", - "fallible-iterator", - "hmac-sha1", - "hmac-sha256", - "hostname-validator", - "lazy_static", - "md5", - "paste", - "precis-core", - "precis-profiles", - "quoted-string-parser", - "rand 0.9.1", -] - [[package]] name = "subtle" version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" -[[package]] -name = "surge-ping" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fda78103d8016bb25c331ddc54af634e801806463682cc3e549d335df644d95" -dependencies = [ - "hex", - "parking_lot 0.12.3", - "pnet_packet", - "rand 0.9.1", - "socket2", - "thiserror 1.0.69", - "tokio", - "tracing", -] - [[package]] name = "syn" version = "1.0.109" @@ -8644,12 +7600,6 @@ dependencies = [ "libc", ] -[[package]] -name = "tagptr" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" - [[package]] name = "tap" version = "1.0.1" @@ -8736,7 +7686,6 @@ checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" dependencies = [ "deranged", "itoa", - "js-sys", "num-conv", "powerfmt", "serde", @@ -8855,18 +7804,6 @@ dependencies = [ "tokio-util", ] -[[package]] -name = "tokio-tungstenite" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9" -dependencies = [ - "futures-util", - "log", - "tokio", - "tungstenite 0.24.0", -] - [[package]] name = "tokio-tungstenite" version = "0.26.2" @@ -8879,28 +7816,10 @@ dependencies = [ "rustls-pki-types", "tokio", "tokio-rustls", - "tungstenite 0.26.2", + "tungstenite", "webpki-roots 0.26.9", ] -[[package]] -name = "tokio-tungstenite-wasm" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e21a5c399399c3db9f08d8297ac12b500e86bca82e930253fdc62eaf9c0de6ae" -dependencies = [ - "futures-channel", - "futures-util", - "http 1.3.1", - "httparse", - "js-sys", - "thiserror 1.0.69", - "tokio", - "tokio-tungstenite 0.24.0", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "tokio-util" version = "0.7.15" @@ -9073,24 +7992,6 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "tungstenite" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a" -dependencies = [ - "byteorder", - "bytes", - "data-encoding", - "http 1.3.1", - "httparse", - "log", - "rand 0.8.5", - "sha1", - "thiserror 1.0.69", - "utf-8", -] - [[package]] name = "tungstenite" version = "0.26.2" @@ -9116,15 +8017,6 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" -[[package]] -name = "ucd-parse" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06ff81122fcbf4df4c1660b15f7e3336058e7aec14437c9f85c6b31a0f279b9" -dependencies = [ - "regex-lite", -] - [[package]] name = "ucd-trie" version = "0.1.7" @@ -9161,15 +8053,6 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" -[[package]] -name = "unicode-normalization" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" -dependencies = [ - "tinyvec", -] - [[package]] name = "unicode-segmentation" version = "1.12.0" @@ -9226,21 +8109,6 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" -[[package]] -name = "ureq" -version = "2.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" -dependencies = [ - "base64 0.22.1", - "log", - "once_cell", - "rustls", - "rustls-pki-types", - "url", - "webpki-roots 0.26.9", -] - [[package]] name = "url" version = "2.5.4" @@ -9250,7 +8118,6 @@ dependencies = [ "form_urlencoded", "idna", "percent-encoding", - "serde", ] [[package]] @@ -9697,26 +8564,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" -dependencies = [ - "windows-core 0.58.0", - "windows-targets 0.52.6", -] - -[[package]] -name = "windows" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f919aee0a93304be7f62e8e5027811bbba96bcb1de84d6618be56e43f8a32a1" -dependencies = [ - "windows-core 0.59.0", - "windows-targets 0.53.0", -] - [[package]] name = "windows-core" version = "0.52.0" @@ -9738,32 +8585,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-core" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" -dependencies = [ - "windows-implement 0.58.0", - "windows-interface 0.58.0", - "windows-result 0.2.0", - "windows-strings 0.1.0", - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-core" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "810ce18ed2112484b0d4e15d022e5f598113e220c53e373fb31e67e21670c1ce" -dependencies = [ - "windows-implement 0.59.0", - "windows-interface 0.59.1", - "windows-result 0.3.2", - "windows-strings 0.3.1", - "windows-targets 0.53.0", -] - [[package]] name = "windows-core" version = "0.61.0" @@ -9788,28 +8609,6 @@ dependencies = [ "syn 2.0.101", ] -[[package]] -name = "windows-implement" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - -[[package]] -name = "windows-implement" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83577b051e2f49a058c308f17f273b570a6a758386fc291b5f6a934dd84e48c1" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - [[package]] name = "windows-implement" version = "0.60.0" @@ -9832,17 +8631,6 @@ dependencies = [ "syn 2.0.101", ] -[[package]] -name = "windows-interface" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.101", -] - [[package]] name = "windows-interface" version = "0.59.1" @@ -9880,15 +8668,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-result" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" -dependencies = [ - "windows-targets 0.52.6", -] - [[package]] name = "windows-result" version = "0.3.2" @@ -9898,16 +8677,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-strings" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" -dependencies = [ - "windows-result 0.2.0", - "windows-targets 0.52.6", -] - [[package]] name = "windows-strings" version = "0.3.1" @@ -10166,21 +8935,6 @@ dependencies = [ "bitflags 2.9.0", ] -[[package]] -name = "wmi" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7787dacdd8e71cbc104658aade4009300777f9b5fda6a75f19145fedb8a18e71" -dependencies = [ - "chrono", - "futures", - "log", - "serde", - "thiserror 2.0.12", - "windows 0.59.0", - "windows-core 0.59.0", -] - [[package]] name = "worker" version = "0.3.11" @@ -10392,12 +9146,6 @@ dependencies = [ "synstructure", ] -[[package]] -name = "z32" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2164e798d9e3d84ee2c91139ace54638059a3b23e361f5c11781c2c6459bde0f" - [[package]] name = "zerocopy" version = "0.8.25" diff --git a/Cargo.toml b/Cargo.toml index 0aed3ebf..7aaa6304 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,13 +1,12 @@ [workspace] members = [ - "crates/discovery", - "crates/worker", - "crates/validator", - "crates/shared", + "crates/bootnode", + "crates/dev-utils", "crates/orchestrator", "crates/p2p", - "crates/dev-utils", - "crates/bootnode", + "crates/shared", + "crates/worker", + "crates/validator", ] resolver = "2" @@ -18,7 +17,7 @@ p2p = { path = "crates/p2p" } actix-web = "4.9.0" clap = { version = "4.5.27", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] } -tokio = { version = "1.43.0", features = ["full", "macros"] } +tokio = { version = "1.43.0" } uuid = { version = "1.12.1", features = ["v4", "serde"] } log = { version = "0.4.26" } env_logger = { version = "0.11.6" } @@ -30,7 +29,6 @@ serde_json = "1.0.137" reqwest = "0.12.12" hex = "0.4.3" anyhow = "1.0.95" -toml = "0.8.20" tokio-util = "0.7.13" futures = "0.3.31" chrono = "0.4.40" @@ -40,13 +38,12 @@ redis = "0.28.1" redis-test = "0.8.0" stun = "0.7.0" mockito = "1.7.0" -iroh = "0.34.1" rand_v8 = { package = "rand", version = "0.8.5", features = ["std"] } -rand_core_v6 = { package = "rand_core", version = "0.6.4", features = ["std"] } ipld-core = "0.4" rust-ipfs = "0.14" cid = "0.11" tracing = "0.1.41" +tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } [workspace.package] version = "0.3.11" diff --git a/crates/bootnode/Cargo.toml b/crates/bootnode/Cargo.toml index a7124269..14095161 100644 --- a/crates/bootnode/Cargo.toml +++ b/crates/bootnode/Cargo.toml @@ -12,7 +12,7 @@ hex = { workspace = true } tokio = { workspace = true } tokio-util = { workspace = true } tracing = { workspace = true } -tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } +tracing-subscriber = { workspace = true, features = ["env-filter"] } [lints] workspace = true diff --git a/crates/discovery/Cargo.toml b/crates/discovery/Cargo.toml deleted file mode 100644 index 567d4845..00000000 --- a/crates/discovery/Cargo.toml +++ /dev/null @@ -1,25 +0,0 @@ -[package] -name = "discovery" -version.workspace = true -edition.workspace = true - -[lints] -workspace = true - -[dependencies] -actix-web = { workspace = true } -alloy = { workspace = true } -anyhow = { workspace = true } -clap = { workspace = true } -env_logger = { workspace = true } -futures = { workspace = true } -log = { workspace = true } -redis = { workspace = true, features = ["tokio-comp"] } -redis-test = { workspace = true } -reqwest = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -shared = { workspace = true } -tokio = { workspace = true } -tokio-util = { workspace = true } -url = { workspace = true } diff --git a/crates/discovery/Dockerfile b/crates/discovery/Dockerfile deleted file mode 100644 index 1b9ceec2..00000000 --- a/crates/discovery/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -FROM ubuntu:22.04 - -RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/lib/apt/lists/* -COPY release-artifacts/discovery-linux-x86_64 /usr/local/bin/discovery -RUN chmod +x /usr/local/bin/discovery - -ENV RPC_URL="http://localhost:8545" -ENV PLATFORM_API_KEY="prime" -ENV REDIS_URL="redis://localhost:6380" -ENV PORT="8089" -ENV MAX_NODES_PER_IP="3" -ENV MODE="full" -ENV LOCATION_SERVICE_URL="" -ENV LOCATION_SERVICE_API_KEY="" - -RUN echo '#!/bin/sh\n\ -exec /usr/local/bin/discovery \ ---rpc-url "$RPC_URL" \ ---platform-api-key "$PLATFORM_API_KEY" \ ---redis-url "$REDIS_URL" \ ---port "$PORT" \ ---max-nodes-per-ip "$MAX_NODES_PER_IP" \ ---mode "$MODE" \ -$([ ! -z "$LOCATION_SERVICE_URL" ] && echo "--location-service-url $LOCATION_SERVICE_URL") \ -$([ ! -z "$LOCATION_SERVICE_API_KEY" ] && echo "--location-service-api-key $LOCATION_SERVICE_API_KEY") \ -"$@"' > /entrypoint.sh && \ -chmod +x /entrypoint.sh - -EXPOSE 8089 -ENTRYPOINT ["/entrypoint.sh"] \ No newline at end of file diff --git a/crates/discovery/src/api/mod.rs b/crates/discovery/src/api/mod.rs deleted file mode 100644 index 3c271c34..00000000 --- a/crates/discovery/src/api/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub(crate) mod routes; -pub(crate) mod server; diff --git a/crates/discovery/src/api/routes/get_nodes.rs b/crates/discovery/src/api/routes/get_nodes.rs deleted file mode 100644 index acf21ccb..00000000 --- a/crates/discovery/src/api/routes/get_nodes.rs +++ /dev/null @@ -1,432 +0,0 @@ -use crate::api::server::AppState; -use actix_web::{ - web::Data, - web::{self}, - HttpResponse, -}; -use alloy::primitives::U256; -use shared::models::api::ApiResponse; -use shared::models::node::DiscoveryNode; - -pub(crate) async fn get_nodes(data: Data) -> HttpResponse { - let nodes = data.node_store.get_nodes().await; - match nodes { - Ok(nodes) => { - let response = ApiResponse::new(true, nodes); - HttpResponse::Ok().json(response) - } - Err(_) => HttpResponse::InternalServerError() - .json(ApiResponse::new(false, "Internal server error")), - } -} - -fn filter_nodes_for_pool(nodes: Vec, pool_id: u32) -> Vec { - let nodes_for_pool: Vec = nodes - .iter() - .filter(|node| node.compute_pool_id == pool_id) - .cloned() - .collect(); - - // Filter out nodes with IPs that are currently active in another pool - let filtered: Vec = nodes_for_pool - .iter() - .filter(|node| { - // Check if there's any other node with the same IP address in a different pool that is active - !nodes.iter().any(|other| { - other.ip_address == node.ip_address - && other.compute_pool_id != node.compute_pool_id - && other.is_active - }) - }) - .cloned() - .collect(); - filtered -} - -pub(crate) async fn get_nodes_for_pool( - data: Data, - pool_id: web::Path, - req: actix_web::HttpRequest, -) -> HttpResponse { - let nodes = data.node_store.get_nodes().await; - match nodes { - Ok(nodes) => { - let id_clone = pool_id.clone(); - let pool_contract_id: U256 = match id_clone.parse::() { - Ok(id) => id, - Err(_) => { - return HttpResponse::BadRequest() - .json(ApiResponse::new(false, "Invalid pool ID format")); - } - }; - let pool_id: u32 = match pool_id.parse() { - Ok(id) => id, - Err(_) => { - return HttpResponse::BadRequest() - .json(ApiResponse::new(false, "Invalid pool ID format")); - } - }; - - match data.contracts.clone() { - Some(contracts) => { - let Ok(pool_info) = - contracts.compute_pool.get_pool_info(pool_contract_id).await - else { - return HttpResponse::NotFound() - .json(ApiResponse::new(false, "Pool not found")); - }; - let owner = pool_info.creator; - let manager = pool_info.compute_manager_key; - let address_str = match req.headers().get("x-address") { - Some(address) => match address.to_str() { - Ok(addr) => addr.to_string(), - Err(_) => { - return HttpResponse::BadRequest().json(ApiResponse::new( - false, - "Invalid x-address header - parsing issue", - )) - } - }, - None => { - return HttpResponse::BadRequest() - .json(ApiResponse::new(false, "Missing x-address header")) - } - }; - - // Normalize the address strings for comparison - let owner_str = owner.to_string().to_lowercase(); - let manager_str = manager.to_string().to_lowercase(); - let address_str_normalized = address_str.to_lowercase(); - - if address_str_normalized != owner_str && address_str_normalized != manager_str - { - return HttpResponse::BadRequest().json(ApiResponse::new( - false, - "Invalid x-address header - not owner or manager", - )); - } - } - None => { - return HttpResponse::BadRequest() - .json(ApiResponse::new(false, "No contracts found")) - } - } - - let nodes_for_pool: Vec = filter_nodes_for_pool(nodes, pool_id); - let response = ApiResponse::new(true, nodes_for_pool); - HttpResponse::Ok().json(response) - } - Err(_) => HttpResponse::InternalServerError() - .json(ApiResponse::new(false, "Internal server error")), - } -} - -pub(crate) async fn get_node_by_subkey( - node_id: web::Path, - data: Data, -) -> HttpResponse { - let node = data.node_store.get_node_by_id(&node_id.to_string()).await; - - match node { - Ok(Some(node)) => HttpResponse::Ok().json(ApiResponse::new(true, node)), - Ok(None) => HttpResponse::NotFound().json(ApiResponse::new(false, "Node not found")), - Err(_) => HttpResponse::InternalServerError() - .json(ApiResponse::new(false, "Internal server error")), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::store::node_store::NodeStore; - use crate::store::redis::RedisStore; - use actix_web::test; - use actix_web::web::get; - use actix_web::App; - use shared::models::node::DiscoveryNode; - use shared::models::node::Node; - use std::sync::Arc; - use std::thread; - use std::time::Duration; - use std::time::SystemTime; - use tokio::sync::Mutex; - - #[actix_web::test] - async fn test_get_nodes() { - let app_state = AppState { - node_store: Arc::new(NodeStore::new(RedisStore::new_test())), - contracts: None, - last_chain_sync: Arc::new(Mutex::new(None::)), - max_nodes_per_ip: 1, - chain_sync_enabled: true, - }; - let app = test::init_service( - App::new() - .app_data(Data::new(app_state.clone())) - .route("/nodes", get().to(get_nodes)), - ) - .await; - - let sample_node = Node { - id: "0x32A8dFdA26948728e5351e61d62C190510CF1C88".to_string(), - provider_address: "0x32A8dFdA26948728e5351e61d62C190510CF1C88".to_string(), - ip_address: "127.0.0.1".to_string(), - port: 8080, - compute_pool_id: 0, - ..Default::default() - }; - match app_state.node_store.register_node(sample_node).await { - Ok(_) => (), - Err(_) => { - panic!("Error registering node"); - } - } - - let req = test::TestRequest::get().uri("/nodes").to_request(); - let resp = test::call_service(&app, req).await; - assert!(resp.status().is_success()); - let body = test::read_body(resp).await; - let api_response: ApiResponse> = match serde_json::from_slice(&body) { - Ok(response) => response, - Err(_) => panic!("Failed to deserialize response"), - }; - assert!(api_response.success); - assert_eq!(api_response.data.len(), 1); - } - - #[actix_web::test] - async fn test_nodes_sorted_by_newest_first() { - let app_state = AppState { - node_store: Arc::new(NodeStore::new(RedisStore::new_test())), - contracts: None, - last_chain_sync: Arc::new(Mutex::new(None::)), - max_nodes_per_ip: 1, - chain_sync_enabled: true, - }; - let app = test::init_service( - App::new() - .app_data(Data::new(app_state.clone())) - .route("/nodes", get().to(get_nodes)), - ) - .await; - - // Register older node first - let older_node = Node { - id: "0x32A8dFdA26948728e5351e61d62C190510CF1C88".to_string(), - provider_address: "0x32A8dFdA26948728e5351e61d62C190510CF1C88".to_string(), - ip_address: "127.0.0.1".to_string(), - port: 8080, - compute_pool_id: 0, - ..Default::default() - }; - match app_state.node_store.register_node(older_node).await { - Ok(_) => (), - Err(_) => { - panic!("Error registering node"); - } - } - - // Wait a moment to ensure timestamps are different - thread::sleep(Duration::from_millis(100)); - - // Register newer node - let newer_node = Node { - id: "0x45B8dFdA26948728e5351e61d62C190510CF1C99".to_string(), - provider_address: "0x45B8dFdA26948728e5351e61d62C190510CF1C99".to_string(), - ip_address: "127.0.0.2".to_string(), - port: 8081, - compute_pool_id: 0, - ..Default::default() - }; - match app_state.node_store.register_node(newer_node).await { - Ok(_) => (), - Err(_) => { - panic!("Error registering node"); - } - } - - let req = test::TestRequest::get().uri("/nodes").to_request(); - let resp = test::call_service(&app, req).await; - assert!(resp.status().is_success()); - - let body = test::read_body(resp).await; - let api_response: ApiResponse> = match serde_json::from_slice(&body) { - Ok(response) => response, - Err(_) => panic!("Failed to deserialize response"), - }; - - assert!(api_response.success); - assert_eq!(api_response.data.len(), 2); - - // Verify the newer node is first in the list - assert_eq!( - api_response.data[0].id, - "0x45B8dFdA26948728e5351e61d62C190510CF1C99" - ); - assert_eq!( - api_response.data[1].id, - "0x32A8dFdA26948728e5351e61d62C190510CF1C88" - ); - } - - #[actix_web::test] - async fn test_filter_nodes_for_pool() { - // Create test nodes for different pools - let mut nodes = vec![ - DiscoveryNode { - node: Node { - id: "0x1111".to_string(), - provider_address: "0x1111".to_string(), - ip_address: "192.168.1.1".to_string(), - port: 8080, - compute_pool_id: 1, - ..Default::default() - }, - is_validated: true, - is_provider_whitelisted: true, - is_active: true, - is_blacklisted: false, - ..Default::default() - }, - DiscoveryNode { - node: Node { - id: "0x2222".to_string(), - provider_address: "0x2222".to_string(), - ip_address: "192.168.1.2".to_string(), - port: 8080, - compute_pool_id: 1, - ..Default::default() - }, - is_validated: true, - is_provider_whitelisted: true, - is_active: false, - is_blacklisted: false, - ..Default::default() - }, - ]; - - // Pool 2 nodes - nodes.push(DiscoveryNode { - node: Node { - id: "0x3333".to_string(), - provider_address: "0x3333".to_string(), - ip_address: "192.168.1.3".to_string(), - port: 8080, - compute_pool_id: 2, - ..Default::default() - }, - is_validated: true, - is_provider_whitelisted: true, - is_active: true, - is_blacklisted: false, - ..Default::default() - }); - - // Node with same IP in different pools (active in pool 3) - nodes.push(DiscoveryNode { - node: Node { - id: "0x4444".to_string(), - provider_address: "0x4444".to_string(), - ip_address: "192.168.1.4".to_string(), - port: 8080, - compute_pool_id: 3, - ..Default::default() - }, - is_validated: true, - is_provider_whitelisted: true, - is_active: true, - is_blacklisted: false, - ..Default::default() - }); - - // This node should be filtered out because it shares IP with an active node in pool 3 - nodes.push(DiscoveryNode { - node: Node { - id: "0x5555".to_string(), - provider_address: "0x5555".to_string(), - ip_address: "192.168.1.4".to_string(), - port: 8081, - compute_pool_id: 1, - ..Default::default() - }, - is_validated: true, - is_provider_whitelisted: true, - is_active: false, - is_blacklisted: false, - ..Default::default() - }); - - // Test filtering for pool 1 - let filtered_nodes = filter_nodes_for_pool(nodes.clone(), 1); - - // Should have 2 nodes from pool 1, but one is filtered out due to IP conflict - assert_eq!(filtered_nodes.len(), 2); - assert!(filtered_nodes.iter().any(|n| n.id == "0x1111")); - assert!(filtered_nodes.iter().any(|n| n.id == "0x2222")); - assert!(!filtered_nodes.iter().any(|n| n.id == "0x5555")); - - // Test filtering for pool 2 - let filtered_nodes = filter_nodes_for_pool(nodes.clone(), 2); - assert_eq!(filtered_nodes.len(), 1); - assert!(filtered_nodes.iter().any(|n| n.id == "0x3333")); - - // Test filtering for pool 3 - let filtered_nodes = filter_nodes_for_pool(nodes.clone(), 3); - assert_eq!(filtered_nodes.len(), 1); - assert!(filtered_nodes.iter().any(|n| n.id == "0x4444")); - - // Test filtering for non-existent pool - let filtered_nodes = filter_nodes_for_pool(nodes.clone(), 99); - assert_eq!(filtered_nodes.len(), 0); - } - - #[actix_web::test] - async fn test_filter_nodes_for_pool_with_inactive_nodes() { - let nodes = vec![ - // Inactive node in pool 1 - DiscoveryNode { - node: Node { - id: "0x1111".to_string(), - provider_address: "0x1111".to_string(), - ip_address: "192.168.1.1".to_string(), - port: 8080, - compute_pool_id: 1, - ..Default::default() - }, - is_validated: true, - is_provider_whitelisted: true, - is_active: false, - is_blacklisted: false, - ..Default::default() - }, - // Inactive node in pool 2 with same IP - DiscoveryNode { - node: Node { - id: "0x2222".to_string(), - provider_address: "0x2222".to_string(), - ip_address: "192.168.1.1".to_string(), - port: 8080, - compute_pool_id: 2, - ..Default::default() - }, - is_validated: true, - is_provider_whitelisted: true, - is_active: false, - is_blacklisted: false, - ..Default::default() - }, - ]; - - // This should be included in pool 2 results since the conflicting node in pool 1 - // doesn't affect it (the filter only excludes nodes when there's an active node - // with the same IP in a different pool) - let filtered_nodes = filter_nodes_for_pool(nodes.clone(), 2); - assert_eq!(filtered_nodes.len(), 1); - assert!(filtered_nodes.iter().any(|n| n.id == "0x2222")); - - // The pool 1 node should be included in pool 1 results - let filtered_nodes = filter_nodes_for_pool(nodes.clone(), 1); - assert_eq!(filtered_nodes.len(), 1); - assert!(filtered_nodes.iter().any(|n| n.id == "0x1111")); - } -} diff --git a/crates/discovery/src/api/routes/mod.rs b/crates/discovery/src/api/routes/mod.rs deleted file mode 100644 index c25479e5..00000000 --- a/crates/discovery/src/api/routes/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub(crate) mod get_nodes; -pub(crate) mod node; diff --git a/crates/discovery/src/api/routes/node.rs b/crates/discovery/src/api/routes/node.rs deleted file mode 100644 index b2cf780f..00000000 --- a/crates/discovery/src/api/routes/node.rs +++ /dev/null @@ -1,772 +0,0 @@ -use crate::api::server::AppState; -use actix_web::{ - web::{self, put, Data}, - HttpResponse, Scope, -}; -use alloy::primitives::U256; -use log::warn; -use shared::models::api::ApiResponse; -use shared::models::node::{ComputeRequirements, Node}; -use std::str::FromStr; - -pub(crate) async fn register_node( - node: web::Json, - data: Data, - req: actix_web::HttpRequest, -) -> HttpResponse { - // Check for the x-address header - let address_str = match req.headers().get("x-address") { - Some(address) => match address.to_str() { - Ok(addr) => addr.to_string(), - Err(_) => { - return HttpResponse::BadRequest() - .json(ApiResponse::new(false, "Invalid x-address header")) - } - }, - None => { - return HttpResponse::BadRequest() - .json(ApiResponse::new(false, "Missing x-address header")) - } - }; - - if address_str != node.id { - return HttpResponse::BadRequest() - .json(ApiResponse::new(false, "Invalid x-address header")); - } - - let update_node = node.clone(); - let existing_node = data.node_store.get_node(update_node.id.clone()).await; - if let Ok(Some(existing_node)) = existing_node { - // Node already exists - check if it's active in a pool - if existing_node.is_active { - if existing_node.node == update_node { - log::info!("Node {} is already active in a pool", update_node.id); - return HttpResponse::Ok() - .json(ApiResponse::new(true, "Node registered successfully")); - } - // Temp. adjustment: The gpu object has changed and includes a vec of indices now. - // This now causes the discovery svc to reject nodes that have just updated their software. - // This is a temporary fix to ensure the node is accepted even though the indices are different. - let mut existing_clone = existing_node.node.clone(); - existing_clone.worker_p2p_id = update_node.worker_p2p_id.clone(); - existing_clone.worker_p2p_addresses = update_node.worker_p2p_addresses.clone(); - match &update_node.compute_specs { - Some(compute_specs) => { - if let Some(ref mut existing_compute_specs) = existing_clone.compute_specs { - match &compute_specs.gpu { - Some(gpu_specs) => { - existing_compute_specs.gpu = Some(gpu_specs.clone()); - existing_compute_specs.storage_gb = compute_specs.storage_gb; - existing_compute_specs.storage_path = - compute_specs.storage_path.clone(); - } - None => { - existing_compute_specs.gpu = None; - } - } - } - } - None => { - existing_clone.compute_specs = None; - } - } - - if existing_clone == update_node { - log::info!("Node {} is already active in a pool", update_node.id); - return HttpResponse::Ok() - .json(ApiResponse::new(true, "Node registered successfully")); - } - - warn!( - "Node {} tried to change discovery but is already active in a pool", - update_node.id - ); - // Node is currently active in pool - cannot be updated - // Did the user actually change node information? - return HttpResponse::BadRequest().json(ApiResponse::new( - false, - "Node is currently active in pool - cannot be updated", - )); - } - } - - let active_nodes_count = data - .node_store - .count_active_nodes_by_ip(update_node.ip_address.clone()) - .await; - - if let Ok(count) = active_nodes_count { - let existing_node_by_ip = data - .node_store - .get_active_node_by_ip(update_node.ip_address.clone()) - .await; - - let is_existing_node = existing_node_by_ip - .map(|result| { - result - .map(|node| node.id == update_node.id) - .unwrap_or(false) - }) - .unwrap_or(false); - - let effective_count = if is_existing_node { count - 1 } else { count }; - - if effective_count >= data.max_nodes_per_ip { - warn!( - "Node {} registration would exceed IP limit. Current active nodes on IP {}: {}, max allowed: {}", - update_node.id, update_node.ip_address, count, data.max_nodes_per_ip - ); - return HttpResponse::BadRequest().json(ApiResponse::new( - false, - &format!( - "IP address {} already has {} active nodes (max allowed: {})", - update_node.ip_address, count, data.max_nodes_per_ip - ), - )); - } - } - - if let Some(contracts) = data.contracts.clone() { - let Ok(provider_address) = node.provider_address.parse() else { - return HttpResponse::BadRequest() - .json(ApiResponse::new(false, "Invalid provider address format")); - }; - - let Ok(node_id) = node.id.parse() else { - return HttpResponse::BadRequest() - .json(ApiResponse::new(false, "Invalid node ID format")); - }; - - if contracts - .compute_registry - .get_node(provider_address, node_id) - .await - .is_err() - { - return HttpResponse::BadRequest().json(ApiResponse::new( - false, - "Node not found in compute registry", - )); - } - - // Check if node meets the pool's compute requirements - match contracts - .compute_pool - .get_pool_info(U256::from(node.compute_pool_id)) - .await - { - Ok(pool_info) => { - if let Ok(required_specs) = ComputeRequirements::from_str(&pool_info.pool_data_uri) - { - if let Some(ref compute_specs) = node.compute_specs { - if !compute_specs.meets(&required_specs) { - log::info!( - "Node {} does not meet compute requirements for pool {}", - node.id, - node.compute_pool_id - ); - return HttpResponse::BadRequest().json(ApiResponse::new( - false, - "Node does not meet the compute requirements for this pool", - )); - } - } else { - log::info!("Node specs not provided for node {}", node.id); - return HttpResponse::BadRequest().json(ApiResponse::new( - false, - "Cannot verify compute requirements: node specs not provided", - )); - } - } else { - log::info!( - "Could not parse compute requirements from pool data URI: {}", - &pool_info.pool_data_uri - ); - } - } - Err(e) => { - log::info!( - "Failed to get pool information for pool ID {}: {:?}", - node.compute_pool_id, - e - ); - return HttpResponse::BadRequest() - .json(ApiResponse::new(false, "Failed to get pool information")); - } - } - } - - let node_store = data.node_store.clone(); - - match node_store.register_node(node.into_inner()).await { - Ok(_) => HttpResponse::Ok().json(ApiResponse::new(true, "Node registered successfully")), - Err(_) => HttpResponse::InternalServerError() - .json(ApiResponse::new(false, "Internal server error")), - } -} - -pub(crate) fn node_routes() -> Scope { - web::scope("/api/nodes").route("", put().to(register_node)) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::store::node_store::NodeStore; - use crate::store::redis::RedisStore; - use actix_web::http::StatusCode; - use actix_web::test; - use actix_web::App; - use shared::models::node::{ComputeSpecs, CpuSpecs, DiscoveryNode, GpuSpecs}; - use shared::security::auth_signature_middleware::{ValidateSignature, ValidatorState}; - use shared::security::request_signer::sign_request_with_nonce; - use shared::web3::wallet::Wallet; - use std::sync::Arc; - use std::time::SystemTime; - use tokio::sync::Mutex; - use url::Url; - - #[actix_web::test] - async fn test_register_node() { - let node = Node { - id: "0x32A8dFdA26948728e5351e61d62C190510CF1C88".to_string(), - provider_address: "0x32A8dFdA26948728e5351e61d62C190510CF1C88".to_string(), - ip_address: "127.0.0.1".to_string(), - port: 8089, - compute_pool_id: 0, - ..Default::default() - }; - - let app_state = AppState { - node_store: Arc::new(NodeStore::new(RedisStore::new_test())), - contracts: None, - last_chain_sync: Arc::new(Mutex::new(None::)), - max_nodes_per_ip: 1, - chain_sync_enabled: true, - }; - - let app = test::init_service( - App::new() - .app_data(Data::new(app_state.clone())) - .route("/nodes", put().to(register_node)), - ) - .await; - - let json = serde_json::to_value(node.clone()).unwrap(); - let req = test::TestRequest::put() - .uri("/nodes") - .set_json(json) - .insert_header(("x-address", "wrong_address")) // Set header to an incorrect address - .to_request(); - let resp = test::call_service(&app, req).await; - assert_eq!(resp.status(), StatusCode::BAD_REQUEST); // Expecting a Bad Request response - - let body: ApiResponse = test::read_body_json(resp).await; - assert!(!body.success); - assert_eq!(body.data, "Invalid x-address header"); // Expecting the appropriate error message - } - - #[actix_web::test] - async fn test_register_node_already_validated() { - let private_key = "0000000000000000000000000000000000000000000000000000000000000001"; - let node = Node { - id: "0x7E5F4552091A69125d5DfCb7b8C2659029395Bdf".to_string(), - provider_address: "0x32A8dFdA26948728e5351e61d62C190510CF1C88".to_string(), - ip_address: "127.0.0.1".to_string(), - port: 8089, - compute_pool_id: 0, - compute_specs: Some(ComputeSpecs { - gpu: Some(GpuSpecs { - count: Some(4), - model: Some("A100".to_string()), - memory_mb: Some(40000), - indices: Some(vec![0, 1, 2, 3]), - }), - cpu: Some(CpuSpecs { - cores: Some(16), - model: None, - }), - ram_mb: Some(64000), - storage_gb: Some(500), - ..Default::default() - }), - ..Default::default() - }; - - let node_clone_for_recall = node.clone(); - - let app_state = AppState { - node_store: Arc::new(NodeStore::new(RedisStore::new_test())), - contracts: None, - last_chain_sync: Arc::new(Mutex::new(None::)), - max_nodes_per_ip: 1, - chain_sync_enabled: true, - }; - - let validate_signatures = - Arc::new(ValidatorState::new(vec![]).with_validator(move |_| true)); - let app = test::init_service( - App::new() - .app_data(Data::new(app_state.clone())) - .route("/nodes", put().to(register_node)) - .wrap(ValidateSignature::new(validate_signatures.clone())), - ) - .await; - - let json = serde_json::to_value(node.clone()).unwrap(); - let signed_request = sign_request_with_nonce( - "/nodes", - &Wallet::new(private_key, Url::parse("http://localhost:8080").unwrap()).unwrap(), - Some(&json), - ) - .await - .unwrap(); - - let req = test::TestRequest::put() - .uri("/nodes") - .set_json(signed_request.data.as_ref().unwrap()) - .insert_header(("x-address", node.id.clone())) - .insert_header(("x-signature", signed_request.signature)) - .to_request(); - - let resp = test::call_service(&app, req).await; - assert_eq!(resp.status(), StatusCode::OK); - - let body: ApiResponse = test::read_body_json(resp).await; - assert!(body.success); - assert_eq!(body.data, "Node registered successfully"); - - let nodes = app_state.node_store.get_nodes().await; - match nodes { - Ok(nodes) => { - assert_eq!(nodes.len(), 1); - assert_eq!(nodes[0].id, node.id); - } - Err(_) => { - unreachable!("Error getting nodes"); - } - } - let validated = DiscoveryNode { - node, - is_validated: true, - is_active: true, - is_provider_whitelisted: false, - is_blacklisted: false, - last_updated: None, - created_at: None, - location: None, - latest_balance: None, - }; - - match app_state.node_store.update_node(validated).await { - Ok(_) => (), - Err(_) => { - unreachable!("Error updating node"); - } - } - - let nodes = app_state.node_store.get_nodes().await; - match nodes { - Ok(nodes) => { - assert_eq!(nodes.len(), 1); - assert_eq!(nodes[0].id, node_clone_for_recall.id); - assert!(nodes[0].is_validated); - assert!(nodes[0].is_active); - } - Err(_) => { - unreachable!("Error getting nodes"); - } - } - - let json = serde_json::to_value(node_clone_for_recall.clone()).unwrap(); - let signed_request = sign_request_with_nonce( - "/nodes", - &Wallet::new(private_key, Url::parse("http://localhost:8080").unwrap()).unwrap(), - Some(&json), - ) - .await - .unwrap(); - - let req = test::TestRequest::put() - .uri("/nodes") - .set_json(signed_request.data.as_ref().unwrap()) - .insert_header(("x-address", node_clone_for_recall.id.clone())) - .insert_header(("x-signature", signed_request.signature)) - .to_request(); - - let resp = test::call_service(&app, req).await; - assert_eq!(resp.status(), StatusCode::OK); - - let nodes = app_state.node_store.get_nodes().await; - match nodes { - Ok(nodes) => { - assert_eq!(nodes.len(), 1); - assert_eq!(nodes[0].id, node_clone_for_recall.id); - assert!(nodes[0].is_validated); - assert!(nodes[0].is_active); - } - Err(_) => { - unreachable!("Error getting nodes"); - } - } - } - - #[actix_web::test] - async fn test_register_node_with_correct_signature() { - let private_key = "0000000000000000000000000000000000000000000000000000000000000001"; - let node = Node { - id: "0x7E5F4552091A69125d5DfCb7b8C2659029395Bdf".to_string(), - provider_address: "0x32A8dFdA26948728e5351e61d62C190510CF1C88".to_string(), - ip_address: "127.0.0.1".to_string(), - port: 8089, - compute_pool_id: 0, - ..Default::default() - }; - - let app_state = AppState { - node_store: Arc::new(NodeStore::new(RedisStore::new_test())), - contracts: None, - last_chain_sync: Arc::new(Mutex::new(None::)), - max_nodes_per_ip: 1, - chain_sync_enabled: true, - }; - - let validate_signatures = - Arc::new(ValidatorState::new(vec![]).with_validator(move |_| true)); - let app = test::init_service( - App::new() - .app_data(Data::new(app_state.clone())) - .route("/nodes", put().to(register_node)) - .wrap(ValidateSignature::new(validate_signatures.clone())), - ) - .await; - - let json = serde_json::to_value(node.clone()).unwrap(); - let signed_request = sign_request_with_nonce( - "/nodes", - &Wallet::new(private_key, Url::parse("http://localhost:8080").unwrap()).unwrap(), - Some(&json), - ) - .await - .unwrap(); - - let req = test::TestRequest::put() - .uri("/nodes") - .set_json(signed_request.data.as_ref().unwrap()) - .insert_header(("x-address", node.id.clone())) - .insert_header(("x-signature", signed_request.signature)) - .to_request(); - - let resp = test::call_service(&app, req).await; - assert_eq!(resp.status(), StatusCode::OK); - - let body: ApiResponse = test::read_body_json(resp).await; - assert!(body.success); - assert_eq!(body.data, "Node registered successfully"); - - let nodes = app_state.node_store.get_nodes().await; - let nodes = match nodes { - Ok(nodes) => nodes, - Err(_) => { - panic!("Error getting nodes"); - } - }; - assert_eq!(nodes.len(), 1); - assert_eq!(nodes[0].id, node.id); - assert_eq!(nodes[0].last_updated, None); - assert_ne!(nodes[0].created_at, None); - } - - #[actix_web::test] - async fn test_register_node_with_incorrect_signature() { - let private_key = "0000000000000000000000000000000000000000000000000000000000000001"; - let node = Node { - id: "0x7E5F4552091A69125d5DfCb7b8C2659029395Bdd".to_string(), - provider_address: "0x32A8dFdA26948728e5351e61d62C190510CF1C88".to_string(), - ip_address: "127.0.0.1".to_string(), - port: 8089, - compute_pool_id: 0, - ..Default::default() - }; - - let app_state = AppState { - node_store: Arc::new(NodeStore::new(RedisStore::new_test())), - contracts: None, - last_chain_sync: Arc::new(Mutex::new(None::)), - max_nodes_per_ip: 1, - chain_sync_enabled: true, - }; - - let validate_signatures = - Arc::new(ValidatorState::new(vec![]).with_validator(move |_| true)); - let app = test::init_service( - App::new() - .app_data(Data::new(app_state.clone())) - .route("/nodes", put().to(register_node)) - .wrap(ValidateSignature::new(validate_signatures.clone())), - ) - .await; - - let json = serde_json::to_value(node.clone()).unwrap(); - let signed_request = sign_request_with_nonce( - "/nodes", - &Wallet::new(private_key, Url::parse("http://localhost:8080").unwrap()).unwrap(), - Some(&json), - ) - .await - .unwrap(); - - let req = test::TestRequest::put() - .uri("/nodes") - .set_json(signed_request.data.as_ref().unwrap()) - .insert_header(("x-address", "0x7E5F4552091A69125d5DfCb7b8C2659029395Bdf")) - .insert_header(("x-signature", signed_request.signature)) - .to_request(); - - let resp = test::call_service(&app, req).await; - assert_eq!(resp.status(), StatusCode::BAD_REQUEST); - } - - #[actix_web::test] - async fn test_register_node_already_active_in_pool() { - let private_key = "0000000000000000000000000000000000000000000000000000000000000001"; - let mut node = Node { - id: "0x7E5F4552091A69125d5DfCb7b8C2659029395Bdf".to_string(), - provider_address: "0x32A8dFdA26948728e5351e61d62C190510CF1C88".to_string(), - ip_address: "127.0.0.1".to_string(), - port: 8089, - compute_pool_id: 0, - compute_specs: Some(ComputeSpecs { - gpu: Some(GpuSpecs { - count: Some(4), - model: Some("A100".to_string()), - memory_mb: Some(40000), - indices: None, - }), - cpu: Some(CpuSpecs { - cores: Some(16), - model: None, - }), - ram_mb: Some(64000), - storage_gb: Some(500), - ..Default::default() - }), - ..Default::default() - }; - - let app_state = AppState { - node_store: Arc::new(NodeStore::new(RedisStore::new_test())), - contracts: None, - last_chain_sync: Arc::new(Mutex::new(None::)), - max_nodes_per_ip: 1, - chain_sync_enabled: true, - }; - - app_state - .node_store - .register_node(node.clone()) - .await - .unwrap(); - - node.compute_specs.as_mut().unwrap().storage_gb = Some(300); - node.compute_specs - .as_mut() - .unwrap() - .gpu - .as_mut() - .unwrap() - .indices = Some(vec![0, 1, 2, 3]); - - let validate_signatures = - Arc::new(ValidatorState::new(vec![]).with_validator(move |_| true)); - let app = test::init_service( - App::new() - .app_data(Data::new(app_state.clone())) - .route("/nodes", put().to(register_node)) - .wrap(ValidateSignature::new(validate_signatures.clone())), - ) - .await; - - let json = serde_json::to_value(node.clone()).unwrap(); - let signed_request = sign_request_with_nonce( - "/nodes", - &Wallet::new(private_key, Url::parse("http://localhost:8080").unwrap()).unwrap(), - Some(&json), - ) - .await - .unwrap(); - - let req = test::TestRequest::put() - .uri("/nodes") - .set_json(signed_request.data.as_ref().unwrap()) - .insert_header(("x-address", node.id.clone())) - .insert_header(("x-signature", signed_request.signature)) - .to_request(); - - let resp = test::call_service(&app, req).await; - assert_eq!(resp.status(), StatusCode::OK); - - let body: ApiResponse = test::read_body_json(resp).await; - assert!(body.success); - assert_eq!(body.data, "Node registered successfully"); - - let nodes = app_state.node_store.get_nodes().await; - let nodes = match nodes { - Ok(nodes) => nodes, - Err(_) => { - panic!("Error getting nodes"); - } - }; - assert_eq!(nodes.len(), 1); - assert_eq!(nodes[0].id, node.id); - } - - #[actix_web::test] - async fn test_register_node_with_max_nodes_per_ip() { - let private_key = "0000000000000000000000000000000000000000000000000000000000000001"; - let private_key_2 = "0000000000000000000000000000000000000000000000000000000000000002"; - let private_key_3 = "0000000000000000000000000000000000000000000000000000000000000003"; - - let node1 = Node { - id: "0x7E5F4552091A69125d5DfCb7b8C2659029395Bdf".to_string(), - provider_address: "0x32A8dFdA26948728e5351e61d62C190510CF1C88".to_string(), - ip_address: "127.0.0.1".to_string(), - port: 8089, - compute_pool_id: 0, - ..Default::default() - }; - - let node2 = Node { - id: "0x2546BcD3c84621e976D8185a91A922aE77ECEc30".to_string(), - provider_address: "0x2546BcD3c84621e976D8185a91A922aE77ECEc30".to_string(), - ip_address: "127.0.0.1".to_string(), - port: 8090, - compute_pool_id: 0, - ..Default::default() - }; - - let node3 = Node { - id: "0x3C44CdDdB6a900fa2b585dd299e03d12FA4293BC".to_string(), - provider_address: "0x3C44CdDdB6a900fa2b585dd299e03d12FA4293BC".to_string(), - ip_address: "127.0.0.1".to_string(), - port: 8091, - compute_pool_id: 0, - ..Default::default() - }; - - let app_state = AppState { - node_store: Arc::new(NodeStore::new(RedisStore::new_test())), - contracts: None, - last_chain_sync: Arc::new(Mutex::new(None::)), - max_nodes_per_ip: 2, - chain_sync_enabled: true, - }; - - let app = test::init_service( - App::new() - .app_data(Data::new(app_state.clone())) - .route("/nodes", put().to(register_node)), - ) - .await; - - // Register first node - should succeed - let json1 = serde_json::to_value(node1.clone()).unwrap(); - let signature1 = sign_request_with_nonce( - "/nodes", - &Wallet::new(private_key, Url::parse("http://localhost:8080").unwrap()).unwrap(), - Some(&json1), - ) - .await - .unwrap(); - - let req1 = test::TestRequest::put() - .uri("/nodes") - .set_json(signature1.data) - .insert_header(("x-address", node1.id.clone())) - .insert_header(("x-signature", signature1.signature)) - .to_request(); - - let resp1 = test::call_service(&app, req1).await; - assert_eq!(resp1.status(), StatusCode::OK); - - // Try to register same node again - should succeed (update) - let json1_duplicate = serde_json::to_value(node1.clone()).unwrap(); - let signature1_duplicate = sign_request_with_nonce( - "/nodes", - &Wallet::new(private_key, Url::parse("http://localhost:8080").unwrap()).unwrap(), - Some(&json1_duplicate), - ) - .await - .unwrap(); - - let req1_duplicate = test::TestRequest::put() - .uri("/nodes") - .set_json(signature1_duplicate.data) - .insert_header(("x-address", node1.id.clone())) - .insert_header(("x-signature", signature1_duplicate.signature)) - .to_request(); - - let resp1_duplicate = test::call_service(&app, req1_duplicate).await; - assert_eq!(resp1_duplicate.status(), StatusCode::OK); - - // Register second node with different ID - should succeed - let json2 = serde_json::to_value(node2.clone()).unwrap(); - let signature2 = sign_request_with_nonce( - "/nodes", - &Wallet::new(private_key_2, Url::parse("http://localhost:8080").unwrap()).unwrap(), - Some(&json2), - ) - .await - .unwrap(); - - let req2 = test::TestRequest::put() - .uri("/nodes") - .set_json(signature2.data) - .insert_header(("x-address", node2.id.clone())) - .insert_header(("x-signature", signature2.signature)) - .to_request(); - - let resp2 = test::call_service(&app, req2).await; - assert_eq!(resp2.status(), StatusCode::OK); - - // Make node 1 and two active - let mut node1_active = DiscoveryNode::from(node1.clone()); - node1_active.is_active = true; - app_state - .node_store - .update_node(node1_active) - .await - .unwrap(); - let mut node2_active = DiscoveryNode::from(node2.clone()); - node2_active.is_active = true; - app_state - .node_store - .update_node(node2_active) - .await - .unwrap(); - - // Register third node - should fail (exceeds max_nodes_per_ip) - let json3 = serde_json::to_value(node3.clone()).unwrap(); - let signature3 = sign_request_with_nonce( - "/nodes", - &Wallet::new(private_key_3, Url::parse("http://localhost:8080").unwrap()).unwrap(), - Some(&json3), - ) - .await - .unwrap(); - - let req3 = test::TestRequest::put() - .uri("/nodes") - .set_json(signature3.data) - .insert_header(("x-address", node3.id.clone())) - .insert_header(("x-signature", signature3.signature)) - .to_request(); - - let resp3 = test::call_service(&app, req3).await; - assert_eq!(resp3.status(), StatusCode::BAD_REQUEST); - - // Verify only 2 nodes are registered - let nodes = app_state.node_store.get_nodes().await.unwrap(); - assert_eq!(nodes.len(), 2); - } -} diff --git a/crates/discovery/src/api/server.rs b/crates/discovery/src/api/server.rs deleted file mode 100644 index 13c5303a..00000000 --- a/crates/discovery/src/api/server.rs +++ /dev/null @@ -1,164 +0,0 @@ -use crate::api::routes::get_nodes::{get_node_by_subkey, get_nodes, get_nodes_for_pool}; -use crate::api::routes::node::node_routes; -use crate::store::node_store::NodeStore; -use crate::store::redis::RedisStore; -use actix_web::middleware::{Compress, NormalizePath, TrailingSlash}; -use actix_web::HttpResponse; -use actix_web::{ - middleware, - web::Data, - web::{self, get}, - App, HttpServer, -}; -use alloy::providers::RootProvider; -use log::{error, info, warn}; -use serde_json::json; -use shared::security::api_key_middleware::ApiKeyMiddleware; -use shared::security::auth_signature_middleware::{ValidateSignature, ValidatorState}; -use shared::web3::contracts::core::builder::Contracts; -use std::sync::Arc; -use std::time::{Duration, SystemTime}; -use tokio::sync::Mutex; - -#[derive(Clone)] -pub(crate) struct AppState { - pub node_store: Arc, - pub contracts: Option>, - pub last_chain_sync: Arc>>, - pub max_nodes_per_ip: u32, - pub chain_sync_enabled: bool, -} - -async fn health_check(app_state: web::Data) -> HttpResponse { - if app_state.chain_sync_enabled { - let sync_status = { - let last_sync_guard = app_state.last_chain_sync.lock().await; - match *last_sync_guard { - Some(last_sync) => { - if let Ok(elapsed) = last_sync.elapsed() { - if elapsed > Duration::from_secs(60) { - warn!( - "Health check: Chain sync is delayed. Last sync was {} seconds ago", - elapsed.as_secs() - ); - Some(elapsed) - } else { - None - } - } else { - warn!("Health check: Unable to determine elapsed time since last sync"); - Some(Duration::from_secs(u64::MAX)) - } - } - None => { - warn!("Health check: Chain sync has not occurred yet"); - Some(Duration::from_secs(u64::MAX)) - } - } - }; - - if let Some(elapsed) = sync_status { - return HttpResponse::ServiceUnavailable().json(json!({ - "status": "error", - "service": "discovery", - "message": format!("Chain sync is delayed. Last sync was {} seconds ago", elapsed.as_secs()) - })); - } - } - - HttpResponse::Ok().json(json!({ - "status": "ok", - "service": "discovery", - "chain_sync_enabled": app_state.chain_sync_enabled - })) -} - -#[allow(clippy::too_many_arguments)] -pub async fn start_server( - host: &str, - port: u16, - node_store: Arc, - redis_store: Arc, - contracts: Contracts, - platform_api_key: String, - last_chain_sync: Arc>>, - max_nodes_per_ip: u32, - chain_sync_enabled: bool, -) -> std::io::Result<()> { - info!("Starting server at http://{host}:{port}"); - - let validators = match contracts.prime_network.get_validator_role().await { - Ok(validators) => validators, - Err(e) => { - error!("❌ Failed to get validator role: {e}"); - std::process::exit(1); - } - }; - - let app_state = AppState { - node_store, - contracts: Some(contracts), - last_chain_sync, - max_nodes_per_ip, - chain_sync_enabled, - }; - - let validator_validator = Arc::new( - ValidatorState::new(validators) - .with_redis(redis_store.client.clone()) - .await - .map_err(|e| { - std::io::Error::other(format!("Failed to initialize Redis connection pool: {e}")) - })?, - ); - let validate_signatures = Arc::new( - ValidatorState::new(vec![]) - .with_redis(redis_store.client.clone()) - .await - .map_err(|e| { - std::io::Error::other(format!("Failed to initialize Redis connection pool: {e}")) - })? - .with_validator(move |_| true), - ); - let api_key_middleware = Arc::new(ApiKeyMiddleware::new(platform_api_key)); - - HttpServer::new(move || { - App::new() - .wrap(middleware::Logger::default()) - .wrap(Compress::default()) - .wrap(NormalizePath::new(TrailingSlash::Trim)) - .app_data(Data::new(app_state.clone())) - .app_data(web::PayloadConfig::default().limit(2_097_152)) - .route("/health", web::get().to(health_check)) - .service( - web::scope("/api/platform") - .wrap(api_key_middleware.clone()) - .route("", get().to(get_nodes)), - ) - .service( - web::scope("/api/nodes/{node_id}") - .wrap(api_key_middleware.clone()) - .route("", get().to(get_node_by_subkey)), - ) - .service( - web::scope("/api/validator") - .wrap(ValidateSignature::new(validator_validator.clone())) - .route("", web::get().to(get_nodes)), - ) - .service( - web::scope("/api/pool/{pool_id}") - .wrap(ValidateSignature::new(validate_signatures.clone())) - .route("", get().to(get_nodes_for_pool)), - ) - .service(node_routes().wrap(ValidateSignature::new(validate_signatures.clone()))) - .default_service(web::route().to(|| async { - HttpResponse::NotFound().json(json!({ - "success": false, - "error": "Resource not found" - })) - })) - }) - .bind((host, port))? - .run() - .await -} diff --git a/crates/discovery/src/chainsync/mod.rs b/crates/discovery/src/chainsync/mod.rs deleted file mode 100644 index b017b71e..00000000 --- a/crates/discovery/src/chainsync/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod sync; - -pub use sync::ChainSync; diff --git a/crates/discovery/src/chainsync/sync.rs b/crates/discovery/src/chainsync/sync.rs deleted file mode 100644 index 1120d3cb..00000000 --- a/crates/discovery/src/chainsync/sync.rs +++ /dev/null @@ -1,222 +0,0 @@ -use crate::store::node_store::NodeStore; -use alloy::primitives::Address; -use alloy::providers::Provider as _; -use alloy::providers::RootProvider; -use anyhow::Error; -use futures::stream::{self, StreamExt}; -use log::{debug, error, info, warn}; -use shared::models::node::DiscoveryNode; -use shared::web3::contracts::core::builder::Contracts; -use std::str::FromStr; -use std::sync::Arc; -use std::time::{Duration, SystemTime}; -use tokio::sync::Mutex; -use tokio_util::sync::CancellationToken; - -const MAX_CONCURRENT_SYNCS: usize = 50; - -pub struct ChainSync { - pub node_store: Arc, - cancel_token: CancellationToken, - chain_sync_interval: Duration, - provider: RootProvider, - contracts: Contracts, - last_chain_sync: Arc>>, -} - -impl ChainSync { - pub fn new( - node_store: Arc, - cancellation_token: CancellationToken, - chain_sync_interval: Duration, - provider: RootProvider, - contracts: Contracts, - last_chain_sync: Arc>>, - ) -> Self { - Self { - node_store, - cancel_token: cancellation_token, - chain_sync_interval, - provider, - contracts, - last_chain_sync, - } - } - - pub fn run(self) -> Result<(), Error> { - let ChainSync { - node_store, - cancel_token, - chain_sync_interval, - last_chain_sync, - provider, - contracts, - } = self; - - tokio::spawn(async move { - let mut interval = tokio::time::interval(chain_sync_interval); - info!( - "Chain sync started with {} second interval", - chain_sync_interval.as_secs() - ); - - loop { - tokio::select! { - _ = interval.tick() => { - let sync_start = SystemTime::now(); - info!("Starting chain sync cycle"); - - let nodes = node_store.get_nodes().await; - match nodes { - Ok(nodes) => { - let total_nodes = nodes.len(); - info!("Syncing {total_nodes} nodes"); - - // Process nodes in parallel with concurrency limit - let results: Vec> = stream::iter(nodes) - .map(|node| { - let node_store = node_store.clone(); - let provider = provider.clone(); - let contracts = contracts.clone(); - async move { - sync_single_node(node_store, provider, contracts, node).await - } - }) - .buffer_unordered(MAX_CONCURRENT_SYNCS) - .collect() - .await; - - // Count successes and failures - let mut success_count = 0; - let mut failure_count = 0; - for result in results { - match result { - Ok(_) => success_count += 1, - Err(e) => { - failure_count += 1; - warn!("Node sync failed: {e}"); - } - } - } - - // Update the last chain sync time - let mut last_sync = last_chain_sync.lock().await; - *last_sync = Some(SystemTime::now()); - - let sync_duration = SystemTime::now() - .duration_since(sync_start) - .unwrap_or_default(); - - info!( - "Chain sync completed in {:.2}s: {} successful, {} failed out of {} total nodes", - sync_duration.as_secs_f64(), - success_count, - failure_count, - total_nodes - ); - } - Err(e) => { - error!("Error getting nodes from store: {e}"); - } - } - } - _ = cancel_token.cancelled() => { - info!("Chain sync cancelled, shutting down"); - break; - } - } - } - info!("Chain sync task ended"); - }); - Ok(()) - } -} - -async fn sync_single_node( - node_store: Arc, - provider: RootProvider, - contracts: Contracts, - node: DiscoveryNode, -) -> Result<(), Error> { - let mut n = node.clone(); - - // Safely parse provider_address and node_address - let provider_address = Address::from_str(&node.provider_address).map_err(|e| { - error!( - "Failed to parse provider address '{}': {}", - node.provider_address, e - ); - anyhow::anyhow!("Invalid provider address") - })?; - - let node_address = Address::from_str(&node.id).map_err(|e| { - error!("Failed to parse node address '{}': {}", node.id, e); - anyhow::anyhow!("Invalid node address") - })?; - - let balance = provider.get_balance(node_address).await.map_err(|e| { - error!("Error retrieving balance for node {node_address}: {e}"); - anyhow::anyhow!("Failed to retrieve node balance") - })?; - n.latest_balance = Some(balance); - - let node_info = contracts - .compute_registry - .get_node(provider_address, node_address) - .await - .map_err(|e| { - error!( - "Error retrieving node info for provider {provider_address} and node {node_address}: {e}" - ); - anyhow::anyhow!("Failed to retrieve node info") - })?; - - let provider_info = contracts - .compute_registry - .get_provider(provider_address) - .await - .map_err(|e| { - error!("Error retrieving provider info for {provider_address}: {e}"); - anyhow::anyhow!("Failed to retrieve provider info") - })?; - - let (is_active, is_validated) = node_info; - n.is_active = is_active; - n.is_validated = is_validated; - n.is_provider_whitelisted = provider_info.is_whitelisted; - - // Handle potential errors from async calls - let is_blacklisted = contracts - .compute_pool - .is_node_blacklisted(node.node.compute_pool_id, node_address) - .await - .map_err(|e| { - error!( - "Error checking if node {} is blacklisted in pool {}: {}", - node_address, node.node.compute_pool_id, e - ); - anyhow::anyhow!("Failed to check blacklist status") - })?; - n.is_blacklisted = is_blacklisted; - - // Only update if the node has changed - if n.is_active != node.is_active - || n.is_validated != node.is_validated - || n.is_provider_whitelisted != node.is_provider_whitelisted - || n.is_blacklisted != node.is_blacklisted - { - match node_store.update_node(n).await { - Ok(_) => { - debug!("Successfully updated node {}", node.id); - Ok(()) - } - Err(e) => { - error!("Error updating node {}: {}", node.id, e); - Err(anyhow::anyhow!("Failed to update node: {}", e)) - } - } - } else { - debug!("Node {} unchanged, skipping update", node.id); - Ok(()) - } -} diff --git a/crates/discovery/src/lib.rs b/crates/discovery/src/lib.rs deleted file mode 100644 index b41bf29f..00000000 --- a/crates/discovery/src/lib.rs +++ /dev/null @@ -1,12 +0,0 @@ -mod api; -mod chainsync; -mod location_enrichment; -mod location_service; -mod store; - -pub use api::server::start_server; -pub use chainsync::ChainSync; -pub use location_enrichment::LocationEnrichmentService; -pub use location_service::LocationService; -pub use store::node_store::NodeStore; -pub use store::redis::RedisStore; diff --git a/crates/discovery/src/location_enrichment.rs b/crates/discovery/src/location_enrichment.rs deleted file mode 100644 index 8810e1c3..00000000 --- a/crates/discovery/src/location_enrichment.rs +++ /dev/null @@ -1,119 +0,0 @@ -use crate::location_service::LocationService; -use crate::store::node_store::NodeStore; -use anyhow::Result; -use log::{error, info, warn}; -use redis::AsyncCommands; -use std::sync::Arc; -use std::time::Duration; -use tokio::time::interval; - -const LOCATION_RETRY_KEY: &str = "location:retries:"; -const MAX_RETRIES: u32 = 3; -const BATCH_SIZE: usize = 10; - -pub struct LocationEnrichmentService { - node_store: Arc, - location_service: Arc, - redis_client: redis::Client, -} - -impl LocationEnrichmentService { - pub fn new( - node_store: Arc, - location_service: Arc, - redis_url: &str, - ) -> Result { - let redis_client = redis::Client::open(redis_url)?; - Ok(Self { - node_store, - location_service, - redis_client, - }) - } - - pub async fn run(&self, interval_seconds: u64) -> Result<()> { - let mut interval = interval(Duration::from_secs(interval_seconds)); - - loop { - interval.tick().await; - - if let Err(e) = self.enrich_nodes_without_location().await { - error!("Location enrichment cycle failed: {e}"); - } - } - } - - async fn enrich_nodes_without_location(&self) -> Result<()> { - let nodes = self.node_store.get_nodes().await?; - let mut conn = self.redis_client.get_multiplexed_async_connection().await?; - - let nodes_without_location: Vec<_> = nodes - .into_iter() - .filter(|node| node.location.is_none()) - .collect(); - - if nodes_without_location.is_empty() { - return Ok(()); - } - - info!( - "Found {} nodes without location data", - nodes_without_location.len() - ); - - // Process in batches to respect rate limits - for chunk in nodes_without_location.chunks(BATCH_SIZE) { - for node in chunk { - let retry_key = format!("{}{}", LOCATION_RETRY_KEY, node.id); - let retries: u32 = conn.get(&retry_key).await.unwrap_or(0); - - if retries >= MAX_RETRIES { - continue; // Skip nodes that have exceeded retry limit - } - - match self.location_service.get_location(&node.ip_address).await { - Ok(Some(location)) => { - info!( - "Successfully fetched location for node {}: {:?}", - node.id, location - ); - - let mut updated_node = node.clone(); - updated_node.location = Some(location); - - if let Err(e) = self.node_store.update_node(updated_node).await { - error!("Failed to update node {} with location: {}", node.id, e); - } else { - let _: () = conn.del(&retry_key).await?; - } - } - Ok(None) => { - // Location service is disabled - break; - } - Err(e) => { - warn!( - "Failed to fetch location for node {} (attempt {}/{}): {}", - node.id, - retries + 1, - MAX_RETRIES, - e - ); - - // Increment retry counter - let _: () = conn.set_ex(&retry_key, retries + 1, 86400).await?; - // Expire after 24h - } - } - - // Rate limiting - wait between requests - tokio::time::sleep(Duration::from_millis(100)).await; - } - - // Longer wait between batches - tokio::time::sleep(Duration::from_secs(1)).await; - } - - Ok(()) - } -} diff --git a/crates/discovery/src/location_service.rs b/crates/discovery/src/location_service.rs deleted file mode 100644 index 51a9bd3d..00000000 --- a/crates/discovery/src/location_service.rs +++ /dev/null @@ -1,80 +0,0 @@ -use anyhow::{Context, Result}; -use reqwest::Client; -use serde::{Deserialize, Serialize}; -use shared::models::node::NodeLocation; -use std::time::Duration; - -#[derive(Debug, Deserialize, Serialize)] -struct IpApiResponse { - ip: String, - city: Option, - region: Option, - country: Option, - #[serde(default)] - latitude: f64, - #[serde(default)] - longitude: f64, -} - -pub struct LocationService { - client: Client, - base_url: String, - enabled: bool, - api_key: String, -} - -impl LocationService { - pub fn new(base_url: Option, api_key: Option) -> Self { - let enabled = base_url.is_some(); - let base_url = base_url.unwrap_or_else(|| "https://ipapi.co".to_string()); - let api_key = api_key.unwrap_or_default(); - let client = Client::builder() - .timeout(Duration::from_secs(5)) - .build() - .expect("Failed to build HTTP client"); - - Self { - client, - base_url, - enabled, - api_key, - } - } - - pub async fn get_location(&self, ip_address: &str) -> Result> { - if !self.enabled { - return Ok(None); - } - - let url = format!( - "{}/{}/json/?key={}", - self.base_url, ip_address, self.api_key - ); - - let response = self - .client - .get(&url) - .send() - .await - .context("Failed to send request to location service")?; - - let api_response: IpApiResponse = response - .json() - .await - .context("Failed to parse location service response")?; - - Ok(Some(NodeLocation { - latitude: api_response.latitude, - longitude: api_response.longitude, - city: api_response.city, - region: api_response.region, - country: api_response.country, - })) - } -} - -impl Default for LocationService { - fn default() -> Self { - Self::new(None, None) - } -} diff --git a/crates/discovery/src/main.rs b/crates/discovery/src/main.rs deleted file mode 100644 index 142816a5..00000000 --- a/crates/discovery/src/main.rs +++ /dev/null @@ -1,174 +0,0 @@ -use alloy::providers::RootProvider; -use anyhow::Result; -use clap::Parser; -use log::LevelFilter; -use log::{error, info}; -use shared::web3::contracts::core::builder::ContractBuilder; -use std::sync::Arc; -use std::time::Duration; -use tokio::sync::Mutex; -use tokio_util::sync::CancellationToken; - -use discovery::{ - start_server, ChainSync, LocationEnrichmentService, LocationService, NodeStore, RedisStore, -}; - -#[derive(Debug, Clone, Copy, PartialEq)] -enum ServiceMode { - Api, - Processor, - Full, -} - -impl std::str::FromStr for ServiceMode { - type Err = String; - - fn from_str(s: &str) -> Result { - match s.to_lowercase().as_str() { - "api" => Ok(ServiceMode::Api), - "processor" => Ok(ServiceMode::Processor), - "full" => Ok(ServiceMode::Full), - _ => Err(format!( - "Invalid mode: {s}. Use 'api', 'processor', or 'full'" - )), - } - } -} - -#[derive(Parser)] -struct Args { - /// RPC URL - #[arg(short = 'r', long, default_value = "http://localhost:8545")] - rpc_url: String, - - /// Platform API key - #[arg(short = 'p', long, default_value = "prime")] - platform_api_key: String, - - /// Redis URL - #[arg(long, default_value = "redis://localhost:6380")] - redis_url: String, - - /// Port - #[arg(short = 'P', long, default_value = "8089")] - port: u16, - - /// Maximum number of nodes allowed per IP address (active state) - #[arg(long, default_value = "1")] - max_nodes_per_ip: u32, - - /// Service mode: api, processor, or full - #[arg(short = 'm', long, default_value = "full")] - mode: ServiceMode, - - /// Location service URL (e.g., https://ipapi.co). If not provided, location services are disabled. - #[arg(long)] - location_service_url: Option, - - /// Location service API key - #[arg(long)] - location_service_api_key: Option, -} - -#[tokio::main] -async fn main() -> Result<()> { - env_logger::Builder::new() - .filter_level(LevelFilter::Info) - .format_timestamp(None) - .init(); - - let args = Args::parse(); - - let redis_store = Arc::new(RedisStore::new(&args.redis_url)); - let node_store = Arc::new(NodeStore::new(redis_store.as_ref().clone())); - let Ok(endpoint) = args.rpc_url.parse() else { - return Err(anyhow::anyhow!("invalid RPC URL: {}", args.rpc_url)); - }; - - let provider = RootProvider::new_http(endpoint); - let contracts = ContractBuilder::new(provider.clone()) - .with_compute_registry() - .with_ai_token() - .with_prime_network() - .with_compute_pool() - .with_stake_manager() - .build() - .unwrap(); - - let cancellation_token = CancellationToken::new(); - let last_chain_sync = Arc::new(Mutex::new(None::)); - - info!("Starting discovery service in {:?} mode", args.mode); - - match args.mode { - ServiceMode::Processor | ServiceMode::Full => { - let chain_sync = ChainSync::new( - node_store.clone(), - cancellation_token.clone(), - Duration::from_secs(10), - provider, - contracts.clone(), - last_chain_sync.clone(), - ); - chain_sync.run()?; - - // Start location enrichment service if enabled - if let Some(location_url) = args.location_service_url.clone() { - let location_service = Arc::new(LocationService::new( - Some(location_url), - args.location_service_api_key.clone(), - )); - let location_enrichment = LocationEnrichmentService::new( - node_store.clone(), - location_service, - &args.redis_url, - )?; - - info!("Starting location enrichment service"); - tokio::spawn(async move { - if let Err(e) = location_enrichment.run(30).await { - error!("Location enrichment service failed: {e}"); - } - }); - } - - if let Err(err) = start_server( - "0.0.0.0", - args.port, - node_store, - redis_store, - contracts, - args.platform_api_key, - last_chain_sync, - args.max_nodes_per_ip, - true, - ) - .await - { - error!("❌ Failed to start server: {err}"); - } - - tokio::signal::ctrl_c().await?; - cancellation_token.cancel(); - } - ServiceMode::Api => { - if let Err(err) = start_server( - "0.0.0.0", - args.port, - node_store, - redis_store, - contracts, - args.platform_api_key, - last_chain_sync, - args.max_nodes_per_ip, - false, - ) - .await - { - error!("❌ Failed to start server: {err}"); - } - } - } - - Ok(()) -} diff --git a/crates/discovery/src/store/mod.rs b/crates/discovery/src/store/mod.rs deleted file mode 100644 index f44edc35..00000000 --- a/crates/discovery/src/store/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub(crate) mod node_store; -pub(crate) mod redis; diff --git a/crates/discovery/src/store/node_store.rs b/crates/discovery/src/store/node_store.rs deleted file mode 100644 index 0b6c271f..00000000 --- a/crates/discovery/src/store/node_store.rs +++ /dev/null @@ -1,173 +0,0 @@ -use crate::store::redis::RedisStore; -use anyhow::Error; -use log::error; -use redis::AsyncCommands; -use shared::models::node::{DiscoveryNode, Node}; - -pub struct NodeStore { - redis_store: RedisStore, -} - -impl NodeStore { - pub fn new(redis_store: RedisStore) -> Self { - Self { redis_store } - } - - async fn get_connection(&self) -> Result { - self.redis_store - .client - .get_multiplexed_async_connection() - .await - } - - pub async fn get_node(&self, address: String) -> Result, Error> { - let key = format!("node:{address}"); - let mut con = self.get_connection().await?; - let node: Option = con.get(&key).await?; - let node = match node { - Some(node) => serde_json::from_str(&node), - None => Ok(None), - }?; - Ok(node) - } - - pub async fn get_active_node_by_ip(&self, ip: String) -> Result, Error> { - let mut con = self.get_connection().await?; - let node_ids: Vec = con.smembers("node:ids").await?; - - if node_ids.is_empty() { - return Ok(None); - } - - let node_keys: Vec = node_ids.iter().map(|id| format!("node:{id}")).collect(); - let serialized_nodes: Vec = - redis::pipe().get(&node_keys).query_async(&mut con).await?; - - for serialized_node in serialized_nodes { - let deserialized_node: DiscoveryNode = serde_json::from_str(&serialized_node)?; - if deserialized_node.ip_address == ip && deserialized_node.is_active { - return Ok(Some(deserialized_node)); - } - } - Ok(None) - } - - pub async fn count_active_nodes_by_ip(&self, ip: String) -> Result { - let mut con = self.get_connection().await?; - let node_ids: Vec = con.smembers("node:ids").await?; - - if node_ids.is_empty() { - return Ok(0); - } - - let node_keys: Vec = node_ids.iter().map(|id| format!("node:{id}")).collect(); - - let mut count = 0; - for key in node_keys { - let serialized_node: Option = con.get(&key).await?; - if let Some(serialized_node) = serialized_node { - let deserialized_node: DiscoveryNode = serde_json::from_str(&serialized_node)?; - if deserialized_node.ip_address == ip && deserialized_node.is_active { - count += 1; - } - } - } - Ok(count) - } - - pub async fn register_node(&self, node: Node) -> Result<(), Error> { - let address = node.id.clone(); - let key = format!("node:{address}"); - - let mut con = self.get_connection().await?; - - if con.exists(&key).await? { - let existing_node = self.get_node(address.clone()).await?; - if let Some(existing_node) = existing_node { - let updated_node = existing_node.with_updated_node(node); - self.update_node(updated_node).await?; - } - } else { - let discovery_node = DiscoveryNode::from(node); - let serialized_node = serde_json::to_string(&discovery_node)?; - - let _: () = redis::pipe() - .atomic() - .set(&key, serialized_node) - .sadd("node:ids", &address) - .query_async(&mut con) - .await?; - } - Ok(()) - } - - pub async fn update_node(&self, node: DiscoveryNode) -> Result<(), Error> { - let mut con = self.get_connection().await?; - let address = node.id.clone(); - let key = format!("node:{address}"); - let serialized_node = serde_json::to_string(&node)?; - - let _: () = redis::pipe() - .atomic() - .set(&key, serialized_node) - .sadd("node:ids", &address) - .query_async(&mut con) - .await?; - - Ok(()) - } - - pub async fn get_nodes(&self) -> Result, Error> { - let mut con = self.get_connection().await?; - let node_ids: Vec = con.smembers("node:ids").await?; - - if node_ids.is_empty() { - return Ok(Vec::new()); - } - - let node_keys: Vec = node_ids.iter().map(|id| format!("node:{id}")).collect(); - - let mut pipe = redis::pipe(); - for key in &node_keys { - pipe.get(key); - } - - let serialized_nodes: Result, redis::RedisError> = - pipe.query_async(&mut con).await; - - let serialized_nodes = match serialized_nodes { - Ok(nodes) => nodes, - Err(e) => { - error!("Error querying nodes from Redis: {e}"); - return Err(e.into()); - } - }; - - let nodes_vec: Result, _> = serialized_nodes - .into_iter() - .map(|serialized_node| serde_json::from_str(&serialized_node)) - .collect(); - let mut nodes_vec = nodes_vec?; - - nodes_vec.sort_by(|a, b| { - let a_time = a.last_updated.or(a.created_at); - let b_time = b.last_updated.or(b.created_at); - b_time.cmp(&a_time) - }); - Ok(nodes_vec) - } - - pub async fn get_node_by_id(&self, node_id: &str) -> Result, Error> { - let mut con = self.get_connection().await?; - let key = format!("node:{node_id}"); - - let serialized_node: Option = con.get(&key).await?; - - let serialized_node = match serialized_node { - Some(node_str) => serde_json::from_str(&node_str), - None => Ok(None), - }?; - - Ok(serialized_node) - } -} diff --git a/crates/discovery/src/store/redis.rs b/crates/discovery/src/store/redis.rs deleted file mode 100644 index 508815c2..00000000 --- a/crates/discovery/src/store/redis.rs +++ /dev/null @@ -1,72 +0,0 @@ -#[cfg(test)] -use log::debug; -use log::info; -use redis::Client; -#[cfg(test)] -use redis_test::server::RedisServer; -#[cfg(test)] -use std::sync::Arc; -#[cfg(test)] -use std::thread; -#[cfg(test)] -use std::time::Duration; -#[derive(Clone)] -pub struct RedisStore { - pub client: Client, - #[allow(dead_code)] - #[cfg(test)] - server: Arc, -} - -impl RedisStore { - pub fn new(redis_url: &str) -> Self { - match Client::open(redis_url) { - Ok(client) => { - info!("Successfully connected to Redis at {redis_url}"); - Self { - client, - #[cfg(test)] - server: Arc::new(RedisServer::new()), - } - } - Err(e) => { - panic!("Redis connection error: {e}"); - } - } - } - - #[cfg(test)] - pub fn new_test() -> Self { - let server = RedisServer::new(); - - // Get the server address - let (host, port) = match server.client_addr() { - redis::ConnectionAddr::Tcp(host, port) => (host.clone(), *port), - _ => panic!("Expected TCP connection"), - }; - - let redis_url = format!("redis://{}:{}", host, port); - debug!("Starting test Redis server at {}", redis_url); - - // Add a small delay to ensure server is ready - thread::sleep(Duration::from_millis(100)); - - // Try to connect with retry logic - let client = loop { - if let Ok(client) = Client::open(redis_url.clone()) { - // Verify connection works - if let Ok(mut conn) = client.get_connection() { - if redis::cmd("PING").query::(&mut conn).is_ok() { - break client; - } - } - } - thread::sleep(Duration::from_millis(100)); - }; - - Self { - client, - server: Arc::new(server), - } - } -} diff --git a/crates/orchestrator/Cargo.toml b/crates/orchestrator/Cargo.toml index ce733ee6..b0d7da88 100644 --- a/crates/orchestrator/Cargo.toml +++ b/crates/orchestrator/Cargo.toml @@ -20,7 +20,6 @@ futures = { workspace = true } hex = { workspace = true } log = { workspace = true } redis = { workspace = true, features = ["tokio-comp"] } -redis-test = { workspace = true } reqwest = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } @@ -39,3 +38,4 @@ utoipa-swagger-ui = { version = "9.0.2", features = ["actix-web", "debug-embed", [dev-dependencies] mockito = { workspace = true } +redis-test = { workspace = true } diff --git a/crates/orchestrator/src/discovery/monitor.rs b/crates/orchestrator/src/discovery/monitor.rs index 1d8645cd..9909a639 100644 --- a/crates/orchestrator/src/discovery/monitor.rs +++ b/crates/orchestrator/src/discovery/monitor.rs @@ -7,8 +7,9 @@ use alloy::primitives::Address; use alloy::primitives::U256; use anyhow::{bail, Context as _, Error, Result}; use chrono::Utc; -use log::{error, info, warn}; +use log::{error, info}; use shared::models::node::NodeWithMetadata; +use shared::p2p::get_worker_nodes_from_dht; use std::sync::Arc; use std::time::Duration; use tokio::sync::mpsc::Sender; @@ -381,97 +382,6 @@ impl DiscoveryMonitor { } } -async fn get_worker_nodes_from_dht( - kademlia_action_tx: tokio::sync::mpsc::Sender, -) -> Result, anyhow::Error> { - let (kad_action, mut result_rx) = - p2p::KademliaAction::GetProviders(p2p::WORKER_DHT_KEY.as_bytes().to_vec()) - .into_kademlia_action_with_channel(); - if let Err(e) = kademlia_action_tx.send(kad_action).await { - bail!("failed to send Kademlia action: {e}"); - } - - info!("🔄 Fetching worker nodes from DHT..."); - let mut workers = std::collections::HashSet::new(); - while let Some(result) = result_rx.recv().await { - match result { - Ok(res) => { - match res { - p2p::KademliaQueryResult::GetProviders(res) => match res { - Ok(res) => match res { - p2p::KademliaGetProvidersOk::FoundProviders { key: _, providers } => { - workers.extend(providers.into_iter()); - } - _ => {} - }, - Err(e) => { - bail!("failed to get providers from DHT: {e}"); - } - }, - _ => { - // this case should never happen - bail!("unexpected Kademlia query result: {res:?}"); - } - } - } - Err(e) => { - bail!("kademlia action failed: {e}"); - } - } - } - - log::debug!("got {} worker nodes from DHT", workers.len()); - - let mut nodes = Vec::new(); - for peer_id in workers { - let record_key = p2p::worker_dht_key_with_peer_id(&peer_id); - let (kad_action, mut result_rx) = - p2p::KademliaAction::GetRecord(record_key.as_bytes().to_vec()) - .into_kademlia_action_with_channel(); - if let Err(e) = kademlia_action_tx.send(kad_action).await { - bail!("failed to send Kademlia action: {e}"); - } - - while let Some(result) = result_rx.recv().await { - match result { - Ok(res) => { - match res { - p2p::KademliaQueryResult::GetRecord(res) => match res { - Ok(res) => match res { - p2p::KademliaGetRecordOk::FoundRecord(record) => { - match serde_json::from_slice::( - &record.record.value, - ) { - Ok(node) => { - nodes.push(node); - } - Err(e) => { - warn!("failed to deserialize node record: {e}"); - } - } - } - _ => {} - }, - Err(e) => { - warn!("failed to get record from DHT: {e}"); - } - }, - _ => { - // this case should never happen - bail!("unexpected Kademlia query result: {res:?}"); - } - } - } - Err(e) => { - warn!("kademlia action failed: {e}"); - } - } - } - } - - Ok(nodes) -} - #[cfg(test)] mod tests { use alloy::primitives::Address; diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index bcb1fc4d..075dd935 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -171,8 +171,8 @@ impl BehaviourEvent { ) -> Vec<(PeerId, Multiaddr)> { match self { BehaviourEvent::Autonat(_event) => {} - BehaviourEvent::Identify(event) => match event { - identify::Event::Received { peer_id, info, .. } => { + BehaviourEvent::Identify(event) => { + if let identify::Event::Received { peer_id, info, .. } = event { let addrs = info .listen_addrs .into_iter() @@ -180,8 +180,7 @@ impl BehaviourEvent { .collect::>(); return addrs; } - _ => {} - }, + } BehaviourEvent::Kademlia(event) => { match event { kad::Event::RoutingUpdated { @@ -211,12 +210,11 @@ impl BehaviourEvent { _ => {} } } - BehaviourEvent::Mdns(event) => match event { - mdns::Event::Discovered(peers) => { + BehaviourEvent::Mdns(event) => { + if let mdns::Event::Discovered(peers) = event { return peers; } - _ => {} - }, + } BehaviourEvent::Ping(_event) => {} BehaviourEvent::RequestResponse(event) => match event { request_response::Event::Message { peer, message } => { diff --git a/crates/p2p/src/discovery.rs b/crates/p2p/src/discovery.rs index 790b0052..96be5b5c 100644 --- a/crates/p2p/src/discovery.rs +++ b/crates/p2p/src/discovery.rs @@ -74,7 +74,7 @@ pub(crate) async fn handle_kademlia_action( .put_record( kad::Record { key: key.into(), - value: value.into(), + value, publisher: None, expires: None, }, diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 48a5e8b0..ad5a2503 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -335,6 +335,7 @@ impl NodeBuilder { self } + #[allow(clippy::type_complexity)] pub fn try_build( self, ) -> Result<( diff --git a/crates/shared/Cargo.toml b/crates/shared/Cargo.toml index 4d3a8760..c4edb52d 100644 --- a/crates/shared/Cargo.toml +++ b/crates/shared/Cargo.toml @@ -30,7 +30,6 @@ uuid = { workspace = true } redis = { workspace = true, features = ["aio", "tokio-comp"] } dashmap = "6.1.0" anyhow = { workspace = true } -nalgebra = { workspace = true } log = { workspace = true } rand = "0.9.0" google-cloud-storage = "0.24.0" @@ -38,7 +37,6 @@ base64 = "0.22.1" chrono = { workspace = true, features = ["serde"] } async-trait = "0.1.88" regex = "1.11.1" -iroh = { workspace = true } rand_v8 = { workspace = true } subtle = "2.6.1" utoipa = { version = "5.3.0", features = ["actix_extras", "chrono", "uuid"] } diff --git a/crates/shared/src/models/mod.rs b/crates/shared/src/models/mod.rs index dea669b3..0d525946 100644 --- a/crates/shared/src/models/mod.rs +++ b/crates/shared/src/models/mod.rs @@ -4,3 +4,6 @@ pub mod metric; pub mod node; pub mod storage; pub mod task; + +pub use node::Node; +pub use node::NodeWithMetadata; diff --git a/crates/shared/src/models/node.rs b/crates/shared/src/models/node.rs index f5fec414..142399bf 100644 --- a/crates/shared/src/models/node.rs +++ b/crates/shared/src/models/node.rs @@ -5,9 +5,8 @@ use anyhow::{anyhow, Context as _}; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use std::fmt; -use std::ops::Deref; use std::str::FromStr; -use utoipa::{openapi::Object, ToSchema}; +use utoipa::ToSchema; #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Default, ToSchema)] pub struct Node { @@ -565,6 +564,7 @@ pub struct NodeWithMetadata { } impl NodeWithMetadata { + #[allow(clippy::too_many_arguments)] pub fn new( node: Node, is_validated: bool, @@ -673,76 +673,6 @@ impl NodeWithMetadata { } } -// TODO: delete -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Default, ToSchema)] -pub struct DiscoveryNode { - #[serde(flatten)] - pub node: Node, - pub is_validated: bool, - pub is_active: bool, - #[serde(default)] - pub is_provider_whitelisted: bool, - #[serde(default)] - pub is_blacklisted: bool, - #[serde(default)] - pub last_updated: Option>, - #[serde(default)] - pub created_at: Option>, - #[serde(default)] - pub location: Option, - #[schema(schema_with = u256_schema)] - pub latest_balance: Option, -} - -fn u256_schema() -> Object { - utoipa::openapi::ObjectBuilder::new() - .schema_type(utoipa::openapi::schema::Type::String) - .description(Some("A U256 value represented as a decimal string")) - .examples(Some(serde_json::json!("1000000000000000000"))) - .build() -} - -impl DiscoveryNode { - pub fn with_updated_node(&self, new_node: Node) -> Self { - DiscoveryNode { - node: new_node, - is_validated: self.is_validated, - is_active: self.is_active, - is_provider_whitelisted: self.is_provider_whitelisted, - is_blacklisted: self.is_blacklisted, - last_updated: Some(Utc::now()), - created_at: self.created_at, - location: self.location.clone(), - latest_balance: self.latest_balance, - } - } -} - -impl Deref for DiscoveryNode { - type Target = Node; - - fn deref(&self) -> &Self::Target { - &self.node - } -} - -impl From for DiscoveryNode { - fn from(node: Node) -> Self { - DiscoveryNode { - node, - is_validated: false, // Default values for new discovery nodes - is_active: false, - is_provider_whitelisted: false, - is_blacklisted: false, - last_updated: None, - created_at: Some(Utc::now()), - location: None, - latest_balance: None, - } - } -} - -// --- Tests --- #[cfg(test)] mod tests { use super::*; diff --git a/crates/shared/src/p2p/discovery.rs b/crates/shared/src/p2p/discovery.rs new file mode 100644 index 00000000..5da42135 --- /dev/null +++ b/crates/shared/src/p2p/discovery.rs @@ -0,0 +1,98 @@ +use anyhow::{bail, Result}; +use log::{info, warn}; + +/// Given a kademlia action channel backed by a `p2p::Service`, +/// perform a DHT lookup for all nodes which claim to be a worker using `GetProviders` for `p2p::WORKER_DHT_KEY`. +/// then, for each of these nodes, query the DHT for their record (which stores their node information) using `GetRecord`. +pub async fn get_worker_nodes_from_dht( + kademlia_action_tx: tokio::sync::mpsc::Sender, +) -> Result> { + let (kad_action, mut result_rx) = + p2p::KademliaAction::GetProviders(p2p::WORKER_DHT_KEY.as_bytes().to_vec()) + .into_kademlia_action_with_channel(); + if let Err(e) = kademlia_action_tx.send(kad_action).await { + bail!("failed to send Kademlia action: {e}"); + } + + info!("🔄 Fetching worker nodes from DHT..."); + let mut workers = std::collections::HashSet::new(); + while let Some(result) = result_rx.recv().await { + match result { + Ok(res) => { + match res { + p2p::KademliaQueryResult::GetProviders(res) => match res { + Ok(res) => { + if let p2p::KademliaGetProvidersOk::FoundProviders { + key: _, + providers, + } = res + { + workers.extend(providers.into_iter()); + } + } + Err(e) => { + bail!("failed to get providers from DHT: {e}"); + } + }, + _ => { + // this case should never happen + bail!("unexpected Kademlia query result: {res:?}"); + } + } + } + Err(e) => { + bail!("kademlia action failed: {e}"); + } + } + } + + log::debug!("got {} worker nodes from DHT", workers.len()); + + let mut nodes = Vec::new(); + for peer_id in workers { + let record_key = p2p::worker_dht_key_with_peer_id(&peer_id); + let (kad_action, mut result_rx) = + p2p::KademliaAction::GetRecord(record_key.as_bytes().to_vec()) + .into_kademlia_action_with_channel(); + if let Err(e) = kademlia_action_tx.send(kad_action).await { + bail!("failed to send Kademlia action: {e}"); + } + + while let Some(result) = result_rx.recv().await { + match result { + Ok(res) => { + match res { + p2p::KademliaQueryResult::GetRecord(res) => match res { + Ok(res) => { + if let p2p::KademliaGetRecordOk::FoundRecord(record) = res { + match serde_json::from_slice::( + &record.record.value, + ) { + Ok(node) => { + nodes.push(node); + } + Err(e) => { + warn!("failed to deserialize node record: {e}"); + } + } + } + } + Err(e) => { + warn!("failed to get record from DHT: {e}"); + } + }, + _ => { + // this case should never happen + bail!("unexpected Kademlia query result: {res:?}"); + } + } + } + Err(e) => { + warn!("kademlia action failed: {e}"); + } + } + } + } + + Ok(nodes) +} diff --git a/crates/shared/src/p2p/mod.rs b/crates/shared/src/p2p/mod.rs index 9d0e4016..f468ac3c 100644 --- a/crates/shared/src/p2p/mod.rs +++ b/crates/shared/src/p2p/mod.rs @@ -1,3 +1,5 @@ +mod discovery; mod service; +pub use discovery::get_worker_nodes_from_dht; pub use service::*; diff --git a/crates/shared/src/p2p/service.rs b/crates/shared/src/p2p/service.rs index cff13592..4143ce48 100644 --- a/crates/shared/src/p2p/service.rs +++ b/crates/shared/src/p2p/service.rs @@ -34,6 +34,7 @@ pub struct Service { } impl Service { + #[allow(clippy::type_complexity)] pub fn new( keypair: p2p::Keypair, port: u16, @@ -115,6 +116,7 @@ impl Service { } } +#[allow(clippy::type_complexity)] fn build_p2p_node( keypair: p2p::Keypair, port: u16, diff --git a/crates/shared/src/web3/mod.rs b/crates/shared/src/web3/mod.rs index 6383d272..89e07030 100644 --- a/crates/shared/src/web3/mod.rs +++ b/crates/shared/src/web3/mod.rs @@ -2,3 +2,4 @@ pub mod contracts; pub mod wallet; pub use contracts::core::builder::Contracts; +pub use wallet::{Wallet, WalletProvider}; diff --git a/crates/validator/src/main.rs b/crates/validator/src/main.rs index e36407e0..58d3dcdf 100644 --- a/crates/validator/src/main.rs +++ b/crates/validator/src/main.rs @@ -312,10 +312,10 @@ async fn main() -> anyhow::Result<()> { if let Err(e) = HttpServer::new(move || { App::new() - .app_data(web::Data::new(( - synthetic_validator.clone(), - validator_health.clone(), - ))) + .app_data(web::Data::new(State { + synthetic_validator: synthetic_validator.clone(), + validator_health: validator_health.clone(), + })) .route("/health", web::get().to(health_check)) .route( "/rejections", @@ -375,26 +375,16 @@ async fn main() -> anyhow::Result<()> { Ok(()) } -async fn health_check( - _: HttpRequest, - state: web::Data< - Option<( - SyntheticDataValidator, - Arc>, - )>, - >, -) -> impl Responder { +struct State { + synthetic_validator: Option>, + validator_health: Arc>, +} + +async fn health_check(_: HttpRequest, state: web::Data) -> impl Responder { // Maximum allowed time between validation loops (2 minutes) const MAX_VALIDATION_INTERVAL_SECS: u64 = 120; - let Some(state) = state.get_ref() else { - return HttpResponse::ServiceUnavailable().json(json!({ - "status": "error", - "message": "Validator not initialized" - })); - }; - - let validator_health = state.1.lock().await; + let validator_health = state.validator_health.lock().await; let now = SystemTime::now() .duration_since(UNIX_EPOCH) @@ -426,25 +416,24 @@ async fn health_check( })) } -async fn get_rejections( - req: HttpRequest, - validator: web::Data>>, -) -> impl Responder { - match validator.as_ref() { - Some(validator) => { +async fn get_rejections(req: HttpRequest, state: web::Data) -> impl Responder { + match state.synthetic_validator.as_ref() { + Some(synthetic_validator) => { // Parse query parameters let query = req.query_string(); let limit = parse_limit_param(query).unwrap_or(100); // Default limit of 100 let result = if limit > 0 && limit < 1000 { // Use the optimized recent rejections method for reasonable limits - validator.get_recent_rejections(limit as isize).await + synthetic_validator + .get_recent_rejections(limit as isize) + .await } else { // Fallback to all rejections (but warn about potential performance impact) if limit >= 1000 { info!("Large limit requested ({limit}), this may impact performance"); } - validator.get_all_rejections().await + synthetic_validator.get_all_rejections().await }; match result { diff --git a/crates/validator/src/validator.rs b/crates/validator/src/validator.rs index fd06aa08..45902bff 100644 --- a/crates/validator/src/validator.rs +++ b/crates/validator/src/validator.rs @@ -3,11 +3,11 @@ use alloy::primitives::{utils::Unit, Address, U256}; use anyhow::{bail, Context as _, Result}; use futures::stream::FuturesUnordered; use futures::StreamExt as _; -use log::{error, info, warn}; -use shared::models::node::NodeWithMetadata; -use shared::web3::contracts::core::builder::Contracts; -use shared::web3::wallet::WalletProvider; -use std::collections::HashSet; +use log::{error, info}; +use shared::models::NodeWithMetadata; +use shared::p2p::get_worker_nodes_from_dht; +use shared::web3::Contracts; +use shared::web3::WalletProvider; use std::str::FromStr as _; use std::sync::Arc; use std::time::{Instant, SystemTime, UNIX_EPOCH}; @@ -53,6 +53,7 @@ pub struct Validator { } impl Validator { + #[allow(clippy::too_many_arguments)] pub fn new( cancellation_token: tokio_util::sync::CancellationToken, provider: WalletProvider, @@ -133,6 +134,7 @@ impl Validator { } } +#[allow(clippy::too_many_arguments)] async fn perform_validation( synthetic_validator: Option>, provider: WalletProvider, @@ -249,94 +251,3 @@ async fn perform_validation( validator_health.update(last_validation_timestamp, last_loop_duration_ms as u64); Ok(()) } - -async fn get_worker_nodes_from_dht( - kademlia_action_tx: tokio::sync::mpsc::Sender, -) -> Result, anyhow::Error> { - let (kad_action, mut result_rx) = - p2p::KademliaAction::GetProviders(p2p::WORKER_DHT_KEY.as_bytes().to_vec()) - .into_kademlia_action_with_channel(); - if let Err(e) = kademlia_action_tx.send(kad_action).await { - bail!("failed to send Kademlia action: {e}"); - } - - info!("🔄 Fetching worker nodes from DHT..."); - let mut workers = HashSet::new(); - while let Some(result) = result_rx.recv().await { - match result { - Ok(res) => { - match res { - p2p::KademliaQueryResult::GetProviders(res) => match res { - Ok(res) => match res { - p2p::KademliaGetProvidersOk::FoundProviders { key: _, providers } => { - workers.extend(providers.into_iter()); - } - _ => {} - }, - Err(e) => { - bail!("failed to get providers from DHT: {e}"); - } - }, - _ => { - // this case should never happen - bail!("unexpected Kademlia query result: {res:?}"); - } - } - } - Err(e) => { - bail!("kademlia action failed: {e}"); - } - } - } - - log::debug!("got {} worker nodes from DHT", workers.len()); - - let mut nodes = Vec::new(); - for peer_id in workers { - let record_key = p2p::worker_dht_key_with_peer_id(&peer_id); - let (kad_action, mut result_rx) = - p2p::KademliaAction::GetRecord(record_key.as_bytes().to_vec()) - .into_kademlia_action_with_channel(); - if let Err(e) = kademlia_action_tx.send(kad_action).await { - bail!("failed to send Kademlia action: {e}"); - } - - while let Some(result) = result_rx.recv().await { - match result { - Ok(res) => { - match res { - p2p::KademliaQueryResult::GetRecord(res) => match res { - Ok(res) => match res { - p2p::KademliaGetRecordOk::FoundRecord(record) => { - match serde_json::from_slice::( - &record.record.value, - ) { - Ok(node) => { - nodes.push(node); - } - Err(e) => { - warn!("failed to deserialize node record: {e}"); - } - } - } - _ => {} - }, - Err(e) => { - warn!("failed to get record from DHT: {e}"); - } - }, - _ => { - // this case should never happen - bail!("unexpected Kademlia query result: {res:?}"); - } - } - } - Err(e) => { - warn!("kademlia action failed: {e}"); - } - } - } - } - - Ok(nodes) -} diff --git a/crates/worker/Cargo.toml b/crates/worker/Cargo.toml index eb041cad..8a7d8711 100644 --- a/crates/worker/Cargo.toml +++ b/crates/worker/Cargo.toml @@ -49,7 +49,7 @@ rand = "0.9.0" tempfile = "3.14.0" tracing-loki = "0.2.6" tracing = { workspace = true } -tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } +tracing-subscriber = { workspace = true, features = ["env-filter"] } time = "0.3.41" tokio-stream = { version = "0.1.17", features = ["net"] } homedir = "0.3" diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index 0eadd191..d2e1ea03 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -112,6 +112,8 @@ impl Service { } } +// TODO: refactor all these channels into a `P2PHandle` struct or similar +#[allow(clippy::type_complexity)] fn build_p2p_node( keypair: p2p::Keypair, port: u16, From 3869b17690fe2241b5fd506990876458aa9fdf3c Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 16 Jul 2025 16:15:11 -0400 Subject: [PATCH 09/14] update dockerfile, charts, workflows for bootnode --- .env.example | 1 - .github/workflows/dev-release.yml | 16 +++++----- .github/workflows/prod-release.yml | 18 +++++------ Makefile | 4 --- README.md | 4 +-- SECURITY.md | 2 +- crates/orchestrator/Dockerfile | 6 ++-- crates/orchestrator/src/main.rs | 4 --- crates/validator/Cargo.toml | 2 +- crates/validator/Dockerfile | 6 ++-- crates/validator/src/main.rs | 7 ---- crates/worker/src/cli/command.rs | 9 ------ .../.gitignore | 0 deployment/k8s/bootnode-chart/Chart.yaml | 4 +++ .../templates/_helpers.tpl | 10 +++--- .../templates/api-backend-config.yaml | 4 +-- .../templates/api-deployment.yaml | 0 .../templates/api-hpa.yaml | 6 ++-- .../templates/api-pdb.yaml | 6 ++-- .../bootnode-chart/templates/api-service.yaml | 32 +++++++++++++++++++ .../templates/processor-backend-config.yaml | 4 +-- .../templates/processor-deployment.yaml | 16 ++++++++++ .../templates/processor-service.yaml | 16 ++++++++++ .../templates/redis-deployment.yaml | 14 ++++---- .../templates/redis-pvc.yaml | 4 +-- .../templates/redis-service.yaml | 6 ++-- .../templates/secret.yaml | 4 +-- .../values.example.yaml | 10 +++--- deployment/k8s/discovery-chart/Chart.yaml | 4 --- .../templates/api-service.yaml | 32 ------------------- .../templates/processor-deployment.yaml | 16 ---------- .../templates/processor-service.yaml | 16 ---------- .../orchestrator-chart/values.example.yaml | 2 +- .../k8s/validator-chart/values.example.yaml | 2 +- docker-compose.yml | 21 ++++++------ docs/development-setup.md | 4 +-- 36 files changed, 142 insertions(+), 170 deletions(-) rename deployment/k8s/{discovery-chart => bootnode-chart}/.gitignore (100%) create mode 100644 deployment/k8s/bootnode-chart/Chart.yaml rename deployment/k8s/{discovery-chart => bootnode-chart}/templates/_helpers.tpl (88%) rename deployment/k8s/{discovery-chart => bootnode-chart}/templates/api-backend-config.yaml (68%) rename deployment/k8s/{discovery-chart => bootnode-chart}/templates/api-deployment.yaml (100%) rename deployment/k8s/{discovery-chart => bootnode-chart}/templates/api-hpa.yaml (85%) rename deployment/k8s/{discovery-chart => bootnode-chart}/templates/api-pdb.yaml (55%) create mode 100644 deployment/k8s/bootnode-chart/templates/api-service.yaml rename deployment/k8s/{discovery-chart => bootnode-chart}/templates/processor-backend-config.yaml (66%) create mode 100644 deployment/k8s/bootnode-chart/templates/processor-deployment.yaml create mode 100644 deployment/k8s/bootnode-chart/templates/processor-service.yaml rename deployment/k8s/{discovery-chart => bootnode-chart}/templates/redis-deployment.yaml (62%) rename deployment/k8s/{discovery-chart => bootnode-chart}/templates/redis-pvc.yaml (66%) rename deployment/k8s/{discovery-chart => bootnode-chart}/templates/redis-service.yaml (52%) rename deployment/k8s/{discovery-chart => bootnode-chart}/templates/secret.yaml (68%) rename deployment/k8s/{discovery-chart => bootnode-chart}/values.example.yaml (77%) delete mode 100644 deployment/k8s/discovery-chart/Chart.yaml delete mode 100644 deployment/k8s/discovery-chart/templates/api-service.yaml delete mode 100644 deployment/k8s/discovery-chart/templates/processor-deployment.yaml delete mode 100644 deployment/k8s/discovery-chart/templates/processor-service.yaml diff --git a/.env.example b/.env.example index 0a604ff7..bdecfd2d 100644 --- a/.env.example +++ b/.env.example @@ -13,7 +13,6 @@ WORK_VALIDATION_CONTRACT=0x0B306BF915C4d645ff596e518fAf3F9669b97016 # Discovery # --------- -DISCOVERY_URLS=http://localhost:8089 BOOTNODE_P2P_ADDRESS=/ip4/127.0.0.1/tcp/4005/p2p/12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL BOOTNODE_LIBP2P_PRIVATE_KEY="d0884c9823a0a2c846dbf5e71853bc5f80b2ec5d2de46532cdbe8ab46f020836" diff --git a/.github/workflows/dev-release.yml b/.github/workflows/dev-release.yml index 01bd44a9..94ac377f 100644 --- a/.github/workflows/dev-release.yml +++ b/.github/workflows/dev-release.yml @@ -73,8 +73,8 @@ jobs: if [ -f target/release/orchestrator ]; then cp target/release/orchestrator release-artifacts/orchestrator-linux-x86_64 fi - if [ -f target/release/discovery ]; then # Prepare discovery binary - cp target/release/discovery release-artifacts/discovery-linux-x86_64 + if [ -f target/release/bootnode ]; then # Prepare bootnode binary + cp target/release/bootnode release-artifacts/bootnode-linux-x86_64 fi - name: Generate checksums @@ -140,17 +140,17 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Build and push Discovery image + - name: Build and push bootnode image uses: docker/build-push-action@v4 with: context: . - file: ./crates/discovery/Dockerfile + file: ./crates/bootnode/Dockerfile push: true tags: | - ghcr.io/${{ steps.meta.outputs.repo_lower }}/discovery:dev - ghcr.io/${{ steps.meta.outputs.repo_lower }}/discovery:${{ steps.next_version.outputs.tag_name }} - us-east1-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/prime-protocol/discovery:dev - us-east1-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/prime-protocol/discovery:${{ steps.next_version.outputs.tag_name }} + ghcr.io/${{ steps.meta.outputs.repo_lower }}/bootnode:dev + ghcr.io/${{ steps.meta.outputs.repo_lower }}/bootnode:${{ steps.next_version.outputs.tag_name }} + us-east1-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/prime-protocol/bootnode:dev + us-east1-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/prime-protocol/bootnode:${{ steps.next_version.outputs.tag_name }} - name: Build and push Validator image uses: docker/build-push-action@v4 diff --git a/.github/workflows/prod-release.yml b/.github/workflows/prod-release.yml index 771a3317..d7f7a03a 100644 --- a/.github/workflows/prod-release.yml +++ b/.github/workflows/prod-release.yml @@ -45,7 +45,7 @@ jobs: - name: Prepare binaries run: | mkdir -p release-artifacts - for binary in worker validator orchestrator discovery; do + for binary in worker validator orchestrator bootnode; do if [ -f "target/release/$binary" ]; then cp "target/release/$binary" "release-artifacts/$binary-linux-x86_64" fi @@ -78,7 +78,7 @@ jobs: - Worker - Validator - Orchestrator - - Discovery service + - Bootnode SHA256 checksums are provided for each binary. @@ -126,17 +126,17 @@ jobs: REPO_LOWER=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]') echo "repo_lower=${REPO_LOWER}" >> $GITHUB_OUTPUT - - name: Build and push Discovery image + - name: Build and push bootnode image uses: docker/build-push-action@v4 with: context: . - file: ./crates/discovery/Dockerfile + file: ./crates/bootnode/Dockerfile push: true tags: | - ghcr.io/${{ steps.meta.outputs.repo_lower }}/discovery:latest - ghcr.io/${{ steps.meta.outputs.repo_lower }}/discovery:${{ steps.get_version.outputs.tag_name }} - us-east1-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/prime-protocol/discovery:latest - us-east1-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/prime-protocol/discovery:${{ steps.get_version.outputs.tag_name }} + ghcr.io/${{ steps.meta.outputs.repo_lower }}/bootnode:latest + ghcr.io/${{ steps.meta.outputs.repo_lower }}/bootnode:${{ steps.get_version.outputs.tag_name }} + us-east1-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/prime-protocol/bootnode:latest + us-east1-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/prime-protocol/bootnode:${{ steps.get_version.outputs.tag_name }} - name: Build and push Validator image uses: docker/build-push-action@v4 @@ -160,4 +160,4 @@ jobs: ghcr.io/${{ steps.meta.outputs.repo_lower }}/orchestrator:latest ghcr.io/${{ steps.meta.outputs.repo_lower }}/orchestrator:${{ steps.get_version.outputs.tag_name }} us-east1-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/prime-protocol/orchestrator:latest - us-east1-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/prime-protocol/orchestrator:${{ steps.get_version.outputs.tag_name }} \ No newline at end of file + us-east1-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/prime-protocol/orchestrator:${{ steps.get_version.outputs.tag_name }} diff --git a/Makefile b/Makefile index 9e8887e3..2933dc00 100644 --- a/Makefile +++ b/Makefile @@ -115,10 +115,6 @@ whitelist-provider: set -a; source ${ENV_FILE}; set +a; \ cargo run -p dev-utils --example whitelist_provider -- --provider-address $${PROVIDER_ADDRESS} --key $${PRIVATE_KEY_VALIDATOR} --rpc-url $${RPC_URL} -# watch-discovery: -# set -a; source .env; set +a; \ -# cargo watch -w crates/discovery/src -x "run --bin discovery -- --rpc-url $${RPC_URL} --max-nodes-per-ip $${MAX_NODES_PER_IP:-2} $${LOCATION_SERVICE_URL:+--location-service-url $${LOCATION_SERVICE_URL}} $${LOCATION_SERVICE_API_KEY:+--location-service-api-key $${LOCATION_SERVICE_API_KEY}}" - watch-worker: set -a; source ${ENV_FILE}; set +a; \ cargo watch -w crates/worker/src -x "run --bin worker -- run --bootnodes $${BOOTNODE_P2P_ADDRESS} --port 8091 \ diff --git a/README.md b/README.md index 6710d4b5..322113d2 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ The Prime Protocol follows a modular architecture designed for decentralized AI ### Component Overview - **Smart Contracts**: Ethereum-based contracts manage the protocol's economic layer -- **Discovery Service**: Enables secure peer discovery and metadata sharing +- **Bootnode**: Simple bootnode for the p2p network - **Orchestrator**: Coordinates compute jobs across worker nodes - **Validator Network**: Ensures quality through random challenges - **Worker Nodes**: Execute AI workloads in secure containers @@ -87,4 +87,4 @@ curl -sSL https://raw.githubusercontent.com/PrimeIntellect-ai/protocol/develop/c We welcome contributions! Please see our [Contributing Guidelines](CONTRIBUTING.md). ## Security -See [SECURITY.md](SECURITY.md) for security policies and reporting vulnerabilities. \ No newline at end of file +See [SECURITY.md](SECURITY.md) for security policies and reporting vulnerabilities. diff --git a/SECURITY.md b/SECURITY.md index c04fa844..5085869d 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -36,7 +36,7 @@ The following are in scope for security reports: - Prime Protocol smart contracts - Worker node software - Validator node software -- Discovery service +- Bootnode service - Orchestrator service - Protocol APIs and interfaces diff --git a/crates/orchestrator/Dockerfile b/crates/orchestrator/Dockerfile index 3d3634f4..4d523322 100644 --- a/crates/orchestrator/Dockerfile +++ b/crates/orchestrator/Dockerfile @@ -14,7 +14,7 @@ ENV URL="" ENV HOST="" ENV DISCOVERY_REFRESH_INTERVAL="10" ENV REDIS_STORE_URL="redis://localhost:6380" -ENV DISCOVERY_URLS="http://localhost:8089" +ENV BOOTNODES="/ip4/127.0.0.1/tcp/4005/p2p/12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL" ENV ADMIN_API_KEY="admin" ENV DISABLE_EJECTION="false" ENV S3_CREDENTIALS="" @@ -38,7 +38,7 @@ $([ ! -z "$URL" ] && echo "--url $URL") \ $([ ! -z "$HOST" ] && echo "--host $HOST") \ --discovery-refresh-interval "$DISCOVERY_REFRESH_INTERVAL" \ --redis-store-url "$REDIS_STORE_URL" \ ---discovery-urls "$DISCOVERY_URLS" \ +--bootnodes "$BOOTNODES" \ --admin-api-key "$ADMIN_API_KEY" \ --mode "$MODE" \ $([ "$DISABLE_EJECTION" = "true" ] && echo "--disable-ejection") \ @@ -51,4 +51,4 @@ $([ ! -z "$MAX_HEALTHY_NODES_WITH_SAME_ENDPOINT" ] && echo "--max-healthy-nodes- chmod +x /entrypoint.sh EXPOSE 8090 -ENTRYPOINT ["/entrypoint.sh"] \ No newline at end of file +ENTRYPOINT ["/entrypoint.sh"] diff --git a/crates/orchestrator/src/main.rs b/crates/orchestrator/src/main.rs index d2621d03..3b7e0ddf 100644 --- a/crates/orchestrator/src/main.rs +++ b/crates/orchestrator/src/main.rs @@ -115,10 +115,6 @@ async fn main() -> Result<()> { env_logger::Builder::new() .filter_level(log_level) .format_timestamp(None) - .filter_module("iroh", log::LevelFilter::Warn) - .filter_module("iroh_net", log::LevelFilter::Warn) - .filter_module("iroh_quinn", log::LevelFilter::Warn) - .filter_module("iroh_base", log::LevelFilter::Warn) .filter_module("tracing::span", log::LevelFilter::Warn) .init(); diff --git a/crates/validator/Cargo.toml b/crates/validator/Cargo.toml index 4d329921..c42b6ba0 100644 --- a/crates/validator/Cargo.toml +++ b/crates/validator/Cargo.toml @@ -24,7 +24,7 @@ redis = { workspace = true, features = ["tokio-comp"] } reqwest = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } -tokio = { workspace = true } +tokio = { workspace = true, features = [ "rt-multi-thread" ] } tokio-util = { workspace = true } url = { workspace = true } diff --git a/crates/validator/Dockerfile b/crates/validator/Dockerfile index 93d7fcdc..258f4819 100644 --- a/crates/validator/Dockerfile +++ b/crates/validator/Dockerfile @@ -6,7 +6,7 @@ RUN chmod +x /usr/local/bin/validator ENV RPC_URL="http://localhost:8545" ENV VALIDATOR_KEY="" -ENV DISCOVERY_URLS="http://localhost:8089" +ENV BOOTNODES="/ip4/127.0.0.1/tcp/4005/p2p/12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL" ENV POOL_ID="" ENV S3_CREDENTIALS="" ENV BUCKET_NAME="" @@ -30,7 +30,7 @@ RUN echo '#!/bin/sh\n\ exec /usr/local/bin/validator \ --rpc-url "$RPC_URL" \ --validator-key "$VALIDATOR_KEY" \ ---discovery-urls "$DISCOVERY_URLS" \ +--bootnodes "$BOOTNODES" \ $([ ! -z "$POOL_ID" ] && echo "--pool-id $POOL_ID") \ $([ ! -z "$BUCKET_NAME" ] && echo "--bucket-name $BUCKET_NAME") \ $([ ! -z "$LOG_LEVEL" ] && echo "--log-level $LOG_LEVEL") \ @@ -50,4 +50,4 @@ $([ ! -z "$WORK_UNIT_INVALIDATION_TYPE" ] && echo "--work-unit-invalidation-type chmod +x /entrypoint.sh EXPOSE 9879 -ENTRYPOINT ["/entrypoint.sh"] \ No newline at end of file +ENTRYPOINT ["/entrypoint.sh"] diff --git a/crates/validator/src/main.rs b/crates/validator/src/main.rs index 58d3dcdf..6f1704f3 100644 --- a/crates/validator/src/main.rs +++ b/crates/validator/src/main.rs @@ -32,9 +32,6 @@ struct Args { #[arg(short = 'k', long)] validator_key: String, - // /// Discovery URLs (comma-separated) - // #[arg(long, default_value = "http://localhost:8089", value_delimiter = ',')] - // discovery_urls: Vec, /// Ability to disable hardware validation #[arg(long, default_value = "false")] disable_hardware_validation: bool, @@ -121,10 +118,6 @@ async fn main() -> anyhow::Result<()> { }; env_logger::Builder::new() .filter_level(log_level) - .filter_module("iroh", log::LevelFilter::Warn) - .filter_module("iroh_net", log::LevelFilter::Warn) - .filter_module("iroh_quinn", log::LevelFilter::Warn) - .filter_module("iroh_base", log::LevelFilter::Warn) .filter_module("tracing::span", log::LevelFilter::Warn) .format_timestamp(None) .init(); diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 26c40b20..b5902be3 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -304,16 +304,7 @@ pub async fn execute_command( compute_node_state, ); - // let discovery_urls = vec![discovery_url - // .clone() - // .unwrap_or("http://localhost:8089".to_string())]; - // let discovery_service = - // DiscoveryService::new(node_wallet_instance.clone(), discovery_urls, None); - // let discovery_state = state.clone(); - // let discovery_updater = - // DiscoveryUpdater::new(discovery_service.clone(), discovery_state.clone()); let pool_id = U256::from(*compute_pool_id); - let pool_info = loop { match contracts.compute_pool.get_pool_info(pool_id).await { Ok(pool) if pool.status == PoolStatus::ACTIVE => break Arc::new(pool), diff --git a/deployment/k8s/discovery-chart/.gitignore b/deployment/k8s/bootnode-chart/.gitignore similarity index 100% rename from deployment/k8s/discovery-chart/.gitignore rename to deployment/k8s/bootnode-chart/.gitignore diff --git a/deployment/k8s/bootnode-chart/Chart.yaml b/deployment/k8s/bootnode-chart/Chart.yaml new file mode 100644 index 00000000..7d5c2aac --- /dev/null +++ b/deployment/k8s/bootnode-chart/Chart.yaml @@ -0,0 +1,4 @@ +apiVersion: v2 +name: bootnode +description: A Helm chart for Prime Bootnode +version: 0.1.0 diff --git a/deployment/k8s/discovery-chart/templates/_helpers.tpl b/deployment/k8s/bootnode-chart/templates/_helpers.tpl similarity index 88% rename from deployment/k8s/discovery-chart/templates/_helpers.tpl rename to deployment/k8s/bootnode-chart/templates/_helpers.tpl index c37d6bcc..dfbe7f3f 100644 --- a/deployment/k8s/discovery-chart/templates/_helpers.tpl +++ b/deployment/k8s/bootnode-chart/templates/_helpers.tpl @@ -1,21 +1,21 @@ {{/* Helm helper templates */}} -{{- define "discovery.fullname" -}} +{{- define "bootnode.fullname" -}} {{- if .Values.fullnameOverride -}} {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} {{- else -}} -{{- printf "discovery" -}} +{{- printf "bootnode" -}} {{- end -}} {{- end -}} -{{- define "discovery.namespace" -}} +{{- define "bootnode.namespace" -}} {{- .Values.namespace | default .Release.Namespace -}} {{- end -}} -{{- define "discovery.container" -}} +{{- define "bootnode.container" -}} {{- $mode := .mode }} {{- $root := .root }} -- name: discovery +- name: bootnode image: {{ $root.Values.image }} ports: - containerPort: {{ $root.Values.port }} diff --git a/deployment/k8s/discovery-chart/templates/api-backend-config.yaml b/deployment/k8s/bootnode-chart/templates/api-backend-config.yaml similarity index 68% rename from deployment/k8s/discovery-chart/templates/api-backend-config.yaml rename to deployment/k8s/bootnode-chart/templates/api-backend-config.yaml index eab37e65..94ce1d00 100644 --- a/deployment/k8s/discovery-chart/templates/api-backend-config.yaml +++ b/deployment/k8s/bootnode-chart/templates/api-backend-config.yaml @@ -1,8 +1,8 @@ apiVersion: cloud.google.com/v1 kind: BackendConfig metadata: - name: {{ include "discovery.fullname" . }}-api-backend-config - namespace: {{ include "discovery.namespace" . }} + name: {{ include "bootnode.fullname" . }}-api-backend-config + namespace: {{ include "bootnode.namespace" . }} spec: healthCheck: type: HTTP diff --git a/deployment/k8s/discovery-chart/templates/api-deployment.yaml b/deployment/k8s/bootnode-chart/templates/api-deployment.yaml similarity index 100% rename from deployment/k8s/discovery-chart/templates/api-deployment.yaml rename to deployment/k8s/bootnode-chart/templates/api-deployment.yaml diff --git a/deployment/k8s/discovery-chart/templates/api-hpa.yaml b/deployment/k8s/bootnode-chart/templates/api-hpa.yaml similarity index 85% rename from deployment/k8s/discovery-chart/templates/api-hpa.yaml rename to deployment/k8s/bootnode-chart/templates/api-hpa.yaml index 31520e15..3008d533 100644 --- a/deployment/k8s/discovery-chart/templates/api-hpa.yaml +++ b/deployment/k8s/bootnode-chart/templates/api-hpa.yaml @@ -2,13 +2,13 @@ apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: - name: {{ include "discovery.fullname" . }}-api - namespace: {{ include "discovery.namespace" . }} + name: {{ include "bootnode.fullname" . }}-api + namespace: {{ include "bootnode.namespace" . }} spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment - name: {{ include "discovery.fullname" . }}-api + name: {{ include "bootnode.fullname" . }}-api minReplicas: {{ .Values.api.hpa.minReplicas }} maxReplicas: {{ .Values.api.hpa.maxReplicas }} {{- if or .Values.api.hpa.metrics.cpu.enabled .Values.api.hpa.metrics.memory.enabled }} diff --git a/deployment/k8s/discovery-chart/templates/api-pdb.yaml b/deployment/k8s/bootnode-chart/templates/api-pdb.yaml similarity index 55% rename from deployment/k8s/discovery-chart/templates/api-pdb.yaml rename to deployment/k8s/bootnode-chart/templates/api-pdb.yaml index 98425bed..274dacb8 100644 --- a/deployment/k8s/discovery-chart/templates/api-pdb.yaml +++ b/deployment/k8s/bootnode-chart/templates/api-pdb.yaml @@ -2,11 +2,11 @@ apiVersion: policy/v1 kind: PodDisruptionBudget metadata: - name: {{ include "discovery.fullname" . }}-api - namespace: {{ include "discovery.namespace" . }} + name: {{ include "bootnode.fullname" . }}-api + namespace: {{ include "bootnode.namespace" . }} spec: maxUnavailable: {{ .Values.api.pdb.maxUnavailable }} selector: matchLabels: - app: {{ include "discovery.fullname" . }}-api + app: {{ include "bootnode.fullname" . }}-api {{- end }} diff --git a/deployment/k8s/bootnode-chart/templates/api-service.yaml b/deployment/k8s/bootnode-chart/templates/api-service.yaml new file mode 100644 index 00000000..a1e61692 --- /dev/null +++ b/deployment/k8s/bootnode-chart/templates/api-service.yaml @@ -0,0 +1,32 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "bootnode.fullname" . }} + namespace: {{ include "bootnode.namespace" . }} + labels: + app: {{ include "bootnode.fullname" . }}-api +spec: + selector: + app: {{ include "bootnode.fullname" . }}-api + ports: + - port: {{ .Values.port }} + targetPort: {{ .Values.port }} + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "bootnode.fullname" . }}-api + namespace: {{ include "bootnode.namespace" . }} + labels: + app: {{ include "bootnode.fullname" . }}-api + annotations: + cloud.google.com/backend-config: '{"default": "{{ include "bootnode.fullname" . }}-api-backend-config"}' +spec: + selector: + app: {{ include "bootnode.fullname" . }}-api + ports: + - port: {{ .Values.port }} + targetPort: {{ .Values.port }} + type: ClusterIP diff --git a/deployment/k8s/discovery-chart/templates/processor-backend-config.yaml b/deployment/k8s/bootnode-chart/templates/processor-backend-config.yaml similarity index 66% rename from deployment/k8s/discovery-chart/templates/processor-backend-config.yaml rename to deployment/k8s/bootnode-chart/templates/processor-backend-config.yaml index a1f68fa1..d51b5083 100644 --- a/deployment/k8s/discovery-chart/templates/processor-backend-config.yaml +++ b/deployment/k8s/bootnode-chart/templates/processor-backend-config.yaml @@ -1,8 +1,8 @@ apiVersion: cloud.google.com/v1 kind: BackendConfig metadata: - name: {{ include "discovery.fullname" . }}-processor-backend-config - namespace: {{ include "discovery.namespace" . }} + name: {{ include "bootnode.fullname" . }}-processor-backend-config + namespace: {{ include "bootnode.namespace" . }} spec: healthCheck: type: HTTP diff --git a/deployment/k8s/bootnode-chart/templates/processor-deployment.yaml b/deployment/k8s/bootnode-chart/templates/processor-deployment.yaml new file mode 100644 index 00000000..d187b0e8 --- /dev/null +++ b/deployment/k8s/bootnode-chart/templates/processor-deployment.yaml @@ -0,0 +1,16 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "bootnode.fullname" . }}-processor + namespace: {{ include "bootnode.namespace" . }} +spec: + replicas: 1 + selector: + matchLabels: + app: {{ include "bootnode.fullname" . }}-processor + template: + metadata: + labels: + app: {{ include "bootnode.fullname" . }}-processor + spec: + containers: {{- include "bootnode.container" (dict "root" . "mode" "processor") | nindent 6 }} diff --git a/deployment/k8s/bootnode-chart/templates/processor-service.yaml b/deployment/k8s/bootnode-chart/templates/processor-service.yaml new file mode 100644 index 00000000..ae4daae9 --- /dev/null +++ b/deployment/k8s/bootnode-chart/templates/processor-service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "bootnode.fullname" . }}-processor + namespace: {{ include "bootnode.namespace" . }} + labels: + app: {{ include "bootnode.fullname" . }}-processor + annotations: + cloud.google.com/backend-config: '{"default": "{{ include "bootnode.fullname" . }}-processor-backend-config"}' +spec: + selector: + app: {{ include "bootnode.fullname" . }}-processor + ports: + - port: {{ .Values.port }} + targetPort: {{ .Values.port }} + type: ClusterIP diff --git a/deployment/k8s/discovery-chart/templates/redis-deployment.yaml b/deployment/k8s/bootnode-chart/templates/redis-deployment.yaml similarity index 62% rename from deployment/k8s/discovery-chart/templates/redis-deployment.yaml rename to deployment/k8s/bootnode-chart/templates/redis-deployment.yaml index 9fced0dd..ca36fe48 100644 --- a/deployment/k8s/discovery-chart/templates/redis-deployment.yaml +++ b/deployment/k8s/bootnode-chart/templates/redis-deployment.yaml @@ -2,17 +2,17 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: {{ include "discovery.fullname" . }}-redis - namespace: {{ include "discovery.namespace" . }} + name: {{ include "bootnode.fullname" . }}-redis + namespace: {{ include "bootnode.namespace" . }} spec: replicas: 1 selector: matchLabels: - app: {{ include "discovery.fullname" . }}-redis + app: {{ include "bootnode.fullname" . }}-redis template: metadata: labels: - app: {{ include "discovery.fullname" . }}-redis + app: {{ include "bootnode.fullname" . }}-redis spec: containers: - name: redis @@ -27,11 +27,11 @@ spec: memory: "512Mi" cpu: "500m" volumeMounts: - - name: {{ include "discovery.fullname" . }}-redis-data + - name: {{ include "bootnode.fullname" . }}-redis-data mountPath: /data command: ["redis-server", "--appendonly", "yes", "--save", "60", "1"] volumes: - - name: {{ include "discovery.fullname" . }}-redis-data + - name: {{ include "bootnode.fullname" . }}-redis-data persistentVolumeClaim: - claimName: {{ include "discovery.fullname" . }}-redis-data + claimName: {{ include "bootnode.fullname" . }}-redis-data {{- end }} diff --git a/deployment/k8s/discovery-chart/templates/redis-pvc.yaml b/deployment/k8s/bootnode-chart/templates/redis-pvc.yaml similarity index 66% rename from deployment/k8s/discovery-chart/templates/redis-pvc.yaml rename to deployment/k8s/bootnode-chart/templates/redis-pvc.yaml index 11d0d58f..b65f5902 100644 --- a/deployment/k8s/discovery-chart/templates/redis-pvc.yaml +++ b/deployment/k8s/bootnode-chart/templates/redis-pvc.yaml @@ -2,8 +2,8 @@ apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: {{ include "discovery.fullname" . }}-redis-data - namespace: {{ include "discovery.namespace" . }} + name: {{ include "bootnode.fullname" . }}-redis-data + namespace: {{ include "bootnode.namespace" . }} spec: accessModes: - ReadWriteOnce diff --git a/deployment/k8s/discovery-chart/templates/redis-service.yaml b/deployment/k8s/bootnode-chart/templates/redis-service.yaml similarity index 52% rename from deployment/k8s/discovery-chart/templates/redis-service.yaml rename to deployment/k8s/bootnode-chart/templates/redis-service.yaml index 4e488095..c9281d56 100644 --- a/deployment/k8s/discovery-chart/templates/redis-service.yaml +++ b/deployment/k8s/bootnode-chart/templates/redis-service.yaml @@ -2,11 +2,11 @@ apiVersion: v1 kind: Service metadata: - name: {{ include "discovery.fullname" . }}-redis - namespace: {{ include "discovery.namespace" . }} + name: {{ include "bootnode.fullname" . }}-redis + namespace: {{ include "bootnode.namespace" . }} spec: selector: - app: {{ include "discovery.fullname" . }}-redis + app: {{ include "bootnode.fullname" . }}-redis ports: - port: 6379 targetPort: 6379 diff --git a/deployment/k8s/discovery-chart/templates/secret.yaml b/deployment/k8s/bootnode-chart/templates/secret.yaml similarity index 68% rename from deployment/k8s/discovery-chart/templates/secret.yaml rename to deployment/k8s/bootnode-chart/templates/secret.yaml index 79fe50b0..1a497731 100644 --- a/deployment/k8s/discovery-chart/templates/secret.yaml +++ b/deployment/k8s/bootnode-chart/templates/secret.yaml @@ -2,8 +2,8 @@ apiVersion: v1 kind: Secret metadata: - name: {{ include "discovery.fullname" . }}-secrets - namespace: {{ include "discovery.namespace" . }} + name: {{ include "bootnode.fullname" . }}-secrets + namespace: {{ include "bootnode.namespace" . }} type: Opaque data: {{- range $key, $value := .Values.secrets }} diff --git a/deployment/k8s/discovery-chart/values.example.yaml b/deployment/k8s/bootnode-chart/values.example.yaml similarity index 77% rename from deployment/k8s/discovery-chart/values.example.yaml rename to deployment/k8s/bootnode-chart/values.example.yaml index 2b23f986..2e72c88f 100644 --- a/deployment/k8s/discovery-chart/values.example.yaml +++ b/deployment/k8s/bootnode-chart/values.example.yaml @@ -1,6 +1,6 @@ -# fullnameOverride: discovery-2 +# fullnameOverride: bootnode-2 namespace: example-namespace -image: ghcr.io/primeintellect-ai/protocol/discovery:v0.3.1-beta.5 +image: ghcr.io/primeintellect-ai/protocol/bootnode:v0.3.1-beta.5 port: 8089 redis: @@ -23,7 +23,7 @@ api: enabled: false env: - REDIS_URL: "redis://discovery-redis:6379" + REDIS_URL: "redis://bootnode-redis:6379" RPC_URL: "http://reth:8545" LOG_LEVEL: "info" LOCATION_SERVICE_URL: "https://ipapi.co" @@ -32,11 +32,11 @@ env: envFromSecret: - name: PLATFORM_API_KEY secretKeyRef: - name: discovery-secret + name: bootnode-secret key: platformApiKey - name: LOCATION_SERVICE_API_KEY secretKeyRef: - name: discovery-secret + name: bootnode-secret key: locationServiceApiKey # TODO: fill in the secrets diff --git a/deployment/k8s/discovery-chart/Chart.yaml b/deployment/k8s/discovery-chart/Chart.yaml deleted file mode 100644 index 7f265073..00000000 --- a/deployment/k8s/discovery-chart/Chart.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v2 -name: discovery -description: A Helm chart for Prime Discovery -version: 0.1.0 diff --git a/deployment/k8s/discovery-chart/templates/api-service.yaml b/deployment/k8s/discovery-chart/templates/api-service.yaml deleted file mode 100644 index b6f5ba5c..00000000 --- a/deployment/k8s/discovery-chart/templates/api-service.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ include "discovery.fullname" . }} - namespace: {{ include "discovery.namespace" . }} - labels: - app: {{ include "discovery.fullname" . }}-api -spec: - selector: - app: {{ include "discovery.fullname" . }}-api - ports: - - port: {{ .Values.port }} - targetPort: {{ .Values.port }} - type: ClusterIP ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ include "discovery.fullname" . }}-api - namespace: {{ include "discovery.namespace" . }} - labels: - app: {{ include "discovery.fullname" . }}-api - annotations: - cloud.google.com/backend-config: '{"default": "{{ include "discovery.fullname" . }}-api-backend-config"}' -spec: - selector: - app: {{ include "discovery.fullname" . }}-api - ports: - - port: {{ .Values.port }} - targetPort: {{ .Values.port }} - type: ClusterIP diff --git a/deployment/k8s/discovery-chart/templates/processor-deployment.yaml b/deployment/k8s/discovery-chart/templates/processor-deployment.yaml deleted file mode 100644 index 6e850c00..00000000 --- a/deployment/k8s/discovery-chart/templates/processor-deployment.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "discovery.fullname" . }}-processor - namespace: {{ include "discovery.namespace" . }} -spec: - replicas: 1 - selector: - matchLabels: - app: {{ include "discovery.fullname" . }}-processor - template: - metadata: - labels: - app: {{ include "discovery.fullname" . }}-processor - spec: - containers: {{- include "discovery.container" (dict "root" . "mode" "processor") | nindent 6 }} diff --git a/deployment/k8s/discovery-chart/templates/processor-service.yaml b/deployment/k8s/discovery-chart/templates/processor-service.yaml deleted file mode 100644 index 5f6f7407..00000000 --- a/deployment/k8s/discovery-chart/templates/processor-service.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ include "discovery.fullname" . }}-processor - namespace: {{ include "discovery.namespace" . }} - labels: - app: {{ include "discovery.fullname" . }}-processor - annotations: - cloud.google.com/backend-config: '{"default": "{{ include "discovery.fullname" . }}-processor-backend-config"}' -spec: - selector: - app: {{ include "discovery.fullname" . }}-processor - ports: - - port: {{ .Values.port }} - targetPort: {{ .Values.port }} - type: ClusterIP diff --git a/deployment/k8s/orchestrator-chart/values.example.yaml b/deployment/k8s/orchestrator-chart/values.example.yaml index 96411011..2d051e03 100644 --- a/deployment/k8s/orchestrator-chart/values.example.yaml +++ b/deployment/k8s/orchestrator-chart/values.example.yaml @@ -27,7 +27,7 @@ api: env: RPC_URL: "http://your-rpc-endpoint:8545" - DISCOVERY_URLS: "http://your-discovery-service:8089" + BOOTNODES: "/ip4/127.0.0.1/tcp/4005/p2p/12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL" DISCOVERY_REFRESH_INTERVAL: "60" BUCKET_NAME: "your-bucket-name" LOG_LEVEL: "info" diff --git a/deployment/k8s/validator-chart/values.example.yaml b/deployment/k8s/validator-chart/values.example.yaml index 3fa41e5f..df476ff2 100644 --- a/deployment/k8s/validator-chart/values.example.yaml +++ b/deployment/k8s/validator-chart/values.example.yaml @@ -5,7 +5,7 @@ port: 9879 env: RPC_URL: "http://anvil.example-namespace:8545" - DISCOVERY_URLS: "http://discovery.example-namespace:8089" + BOOTNODES: "/ip4/127.0.0.1/tcp/4005/p2p/12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL" BUCKET_NAME: "example-development-bucket" LOG_LEVEL: "info" TOPLOC_GRACE_INTERVAL: "1" diff --git a/docker-compose.yml b/docker-compose.yml index 7230b1cc..f4ec3ac1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -67,16 +67,13 @@ services: timeout: 3s retries: 5 - discovery: - image: ghcr.io/primeintellect-ai/protocol/discovery:dev + bootnode: + image: ghcr.io/primeintellect-ai/protocol/bootnode:dev platform: linux/amd64 ports: - - "8089:8089" + - "4005:4005" environment: - - RPC_URL=http://reth:8545 - - REDIS_URL=redis://redis:6379 - - PLATFORM_API_KEY=prime - - MAX_NODES_PER_IP=3 + - LIBP2P_PRIVATE_KEY="d0884c9823a0a2c846dbf5e71853bc5f80b2ec5d2de46532cdbe8ab46f020836" networks: - prime depends_on: @@ -95,7 +92,6 @@ services: environment: - RPC_URL=http://reth:8545 - REDIS_STORE_URL=redis://redis:6379 - - DISCOVERY_URLS=http://discovery:8089 - URL=http://localhost:8090 - COORDINATOR_KEY=${POOL_OWNER_PRIVATE_KEY} - COMPUTE_POOL_ID=${WORKER_COMPUTE_POOL_ID} @@ -103,6 +99,7 @@ services: - PORT=8090 - DISCOVERY_REFRESH_INTERVAL=10 - S3_CREDENTIALS=${S3_CREDENTIALS} + - BOOTNODES="/ip4/127.0.0.1/tcp/4005/p2p/12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL" networks: - prime depends_on: @@ -110,7 +107,7 @@ services: condition: service_healthy redis: condition: service_healthy - discovery: + bootnode: condition: service_started validator: @@ -124,7 +121,6 @@ services: - prime environment: - RPC_URL=http://reth:8545 - - DISCOVERY_URLS=http://discovery:8089 - VALIDATOR_KEY=${PRIVATE_KEY_VALIDATOR} - WORK_VALIDATION_CONTRACT=${WORK_VALIDATION_CONTRACT} - POOL_ID=${WORKER_COMPUTE_POOL_ID} @@ -133,10 +129,11 @@ services: - TOPLOC_AUTH_TOKEN=${TOPLOC_AUTH_TOKEN} - TOPLOC_GRACE_INTERVAL=15 - REDIS_URL=redis://redis:6379 + - BOOTNODES="/ip4/127.0.0.1/tcp/4005/p2p/12D3KooWJj3haDEzxGSbGSAvXCiE9pDYC9xHDdtQe8B2donhfwXL" depends_on: reth: condition: service_healthy redis: condition: service_healthy - discovery: - condition: service_started \ No newline at end of file + bootnode: + condition: service_started diff --git a/docs/development-setup.md b/docs/development-setup.md index 0f13f41d..4244dbb7 100644 --- a/docs/development-setup.md +++ b/docs/development-setup.md @@ -93,7 +93,7 @@ make up This will start: - Local blockchain node -- Discovery service +- Bootnode - Validator node - Orchestrator service - Redis instance @@ -141,7 +141,7 @@ make down ``` ## Docker Compose Setup -You can run all supporting services (chain, validator, discovery, orchestrator) (if you only want to work on the worker sw) in docker compose. +You can run all supporting services (chain, validator, bootnode, orchestrator) (if you only want to work on the worker sw) in docker compose. 1. Start docker compose: ```bash From 7a8b8341530a9385190517b116076597f29c584c Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 16 Jul 2025 16:30:53 -0400 Subject: [PATCH 10/14] cleanup, add dockerfile to bootnode --- crates/bootnode/Dockerfile | 20 ++++++++++++++++++++ crates/orchestrator/src/discovery/monitor.rs | 6 ++++-- crates/orchestrator/src/main.rs | 4 ++-- crates/p2p/src/behaviour.rs | 4 +--- crates/p2p/src/lib.rs | 2 +- crates/shared/src/models/node.rs | 6 +++--- crates/validator/src/main.rs | 3 +-- crates/worker/src/cli/command.rs | 6 ++---- crates/worker/src/lib.rs | 1 - crates/worker/src/p2p/mod.rs | 6 +++--- 10 files changed, 37 insertions(+), 21 deletions(-) create mode 100644 crates/bootnode/Dockerfile diff --git a/crates/bootnode/Dockerfile b/crates/bootnode/Dockerfile new file mode 100644 index 00000000..72a1e044 --- /dev/null +++ b/crates/bootnode/Dockerfile @@ -0,0 +1,20 @@ +FROM ubuntu:22.04 + +RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/lib/apt/lists/* +COPY release-artifacts/bootnode-linux-x86_64 /usr/local/bin/bootnode +RUN chmod +x /usr/local/bin/bootnode + +ENV LIBP2P_PRIVATE_KEY="" +ENV LIBP2P_PORT="4005" +ENV LOG_LEVEL="info" + +RUN echo '#!/bin/sh\n\ +exec /usr/local/bin/bootnode \ +--libp2p-private-key "$LIBP2P_PRIVATE_KEY" \ +--libp2p-port "$LIBP2P_PORT" \ +--log-level "$LOG_LEVEL" \ +"$@"' > /entrypoint.sh && \ +chmod +x /entrypoint.sh + +EXPOSE 4005 +ENTRYPOINT ["/entrypoint.sh"] \ No newline at end of file diff --git a/crates/orchestrator/src/discovery/monitor.rs b/crates/orchestrator/src/discovery/monitor.rs index 9909a639..4d2a0296 100644 --- a/crates/orchestrator/src/discovery/monitor.rs +++ b/crates/orchestrator/src/discovery/monitor.rs @@ -27,7 +27,10 @@ impl NodeFetcher { use futures::stream::FuturesUnordered; use futures::StreamExt as _; - // TODO: this actually needs to fetch for compute pool ID only (`self.compute_pool_id`) + // TODO: this function fetches all worker nodes from the dht; however, + // we only care about workers with the same compute pool ID. + // this can be improved by having workers also advertise their compute pool ID + // when they join one, and then only performing a DHT query for that pool ID. let nodes = get_worker_nodes_from_dht(self.kademlia_action_tx.clone()) .await .context("failed to get worker nodes from DHT")?; @@ -138,7 +141,6 @@ impl Updater { let node_address = node.node().id.parse::

()?; // Check if there's any healthy node with the same peer ID - // TODO: can this case still happen? i think so if there's stale provider records in the dht let healthy_nodes_with_same_peer_id = self .count_healthy_nodes_with_same_peer_id( node_address, diff --git a/crates/orchestrator/src/main.rs b/crates/orchestrator/src/main.rs index 3b7e0ddf..3c0e654f 100644 --- a/crates/orchestrator/src/main.rs +++ b/crates/orchestrator/src/main.rs @@ -339,7 +339,7 @@ async fn main() -> Result<()> { ); tasks.spawn( - // TODO: refactor task handling + // TODO: refactor task handling (https://github.com/PrimeIntellect-ai/protocol/issues/627) monitor.run().map(|_| Ok(())), ); @@ -457,7 +457,7 @@ async fn main() -> Result<()> { } } - // TODO: use cancellation token to gracefully shutdown tasks + // TODO: use cancellation token to gracefully shutdown tasks (https://github.com/PrimeIntellect-ai/protocol/issues/627) cancellation_token.cancel(); tasks.shutdown().await; Ok(()) diff --git a/crates/p2p/src/behaviour.rs b/crates/p2p/src/behaviour.rs index 075dd935..26d3dd40 100644 --- a/crates/p2p/src/behaviour.rs +++ b/crates/p2p/src/behaviour.rs @@ -124,7 +124,7 @@ impl Behaviour { .context("failed to create mDNS behaviour")?; let mut kad_config = kad::Config::new(kad::PROTOCOL_NAME); // TODO: by default this is 20, however on a local test network we won't have 20 nodes. - // make this configurable? + // should make this configurable. kad_config .set_replication_factor(1usize.try_into().expect("can convert 1 to NonZeroUsize")); kad_config.set_provider_publication_interval(Some(Duration::from_secs(30))); @@ -188,8 +188,6 @@ impl BehaviourEvent { } => { debug!("kademlia routing updated for peer {peer:?} with addresses {addresses:?}"); } - // TODO: also handle InboundRequest::AddProvider and InboundRequest::PutRecord, - // as these are new workers joining the network kad::Event::OutboundQueryProgressed { id, result, diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index ad5a2503..69115c69 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -445,7 +445,7 @@ mod test { tokio::spawn(async move { node1.run().await }); tokio::spawn(async move { node2.run().await }); - // TODO: implement a way to get peer count + // TODO: implement a way to get peer count (https://github.com/PrimeIntellect-ai/protocol/issues/628) tokio::time::sleep(std::time::Duration::from_secs(2)).await; // send request from node1->node2 diff --git a/crates/shared/src/models/node.rs b/crates/shared/src/models/node.rs index 142399bf..5f070305 100644 --- a/crates/shared/src/models/node.rs +++ b/crates/shared/src/models/node.rs @@ -630,9 +630,9 @@ impl NodeWithMetadata { is_provider_whitelisted, is_blacklisted, latest_balance, - last_updated: None, // TODO - created_at: None, // TODO - location: None, // TODO + last_updated: None, + created_at: None, + location: None, }) } diff --git a/crates/validator/src/main.rs b/crates/validator/src/main.rs index 6f1704f3..6c63e72a 100644 --- a/crates/validator/src/main.rs +++ b/crates/validator/src/main.rs @@ -363,8 +363,7 @@ async fn main() -> anyhow::Result<()> { } cancellation_token.cancel(); - // TODO: handle spawn handles here - + // TODO: handle spawn handles here (https://github.com/PrimeIntellect-ai/protocol/issues/627) Ok(()) } diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index b5902be3..5569f6b7 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -9,8 +9,6 @@ use crate::metrics::store::MetricsStore; use crate::operations::compute_node::ComputeNodeOperations; use crate::operations::heartbeat::service::HeartbeatService; use crate::operations::provider::ProviderOperations; -// use crate::services::discovery::DiscoveryService; -// use crate::services::discovery_updater::DiscoveryUpdater; use crate::state::system_state::SystemState; use crate::TaskHandles; use alloy::primitives::utils::format_ether; @@ -777,8 +775,8 @@ pub async fn execute_command( Console::success(&format!("P2P service started with ID: {peer_id}")); - // TODO: sleep so that dht is bootstrapped before publishing; - // should update p2p service to expose this. + // sleep so that dht is bootstrapped before publishing. + // TOOD: should update p2p service to expose this better (https://github.com/PrimeIntellect-ai/protocol/issues/628) tokio::time::sleep(Duration::from_secs(1)).await; let record_key = p2p::worker_dht_key_with_peer_id(&peer_id); diff --git a/crates/worker/src/lib.rs b/crates/worker/src/lib.rs index 91d08e91..1752bd8e 100644 --- a/crates/worker/src/lib.rs +++ b/crates/worker/src/lib.rs @@ -5,7 +5,6 @@ mod docker; mod metrics; mod operations; mod p2p; -// mod services; mod state; mod utils; diff --git a/crates/worker/src/p2p/mod.rs b/crates/worker/src/p2p/mod.rs index d2e1ea03..be1bd03b 100644 --- a/crates/worker/src/p2p/mod.rs +++ b/crates/worker/src/p2p/mod.rs @@ -112,7 +112,7 @@ impl Service { } } -// TODO: refactor all these channels into a `P2PHandle` struct or similar +// TODO: refactor all these channels into a `P2PHandle` struct or similar (https://github.com/PrimeIntellect-ai/protocol/issues/628) #[allow(clippy::type_complexity)] fn build_p2p_node( keypair: p2p::Keypair, @@ -267,7 +267,7 @@ async fn handle_incoming_request( handle_restart_request(from, &context).await } p2p::Request::General(_) => { - todo!() + unimplemented!("no services use the `General` protocol yet") } }; @@ -407,7 +407,7 @@ fn handle_incoming_response(response: p2p::Response) { tracing::error!("worker should never receive Restart responses"); } p2p::Response::General(_) => { - todo!() + tracing::error!("worker should never receive General responses"); } } } From 4facf43072bd9c7a120f9d04d4978c7560eb96af Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 16 Jul 2025 22:35:19 -0400 Subject: [PATCH 11/14] add location enrichment to orchestrator --- crates/bootnode/Cargo.toml | 2 +- crates/orchestrator/Cargo.toml | 2 +- .../orchestrator/src/api/routes/heartbeat.rs | 2 +- crates/orchestrator/src/api/routes/task.rs | 4 +- .../src/discovery/location_service.rs | 67 ++++++ crates/orchestrator/src/discovery/mod.rs | 1 + crates/orchestrator/src/discovery/monitor.rs | 197 +++++++++++++++--- crates/orchestrator/src/main.rs | 23 +- .../src/plugins/node_groups/tests.rs | 22 +- crates/orchestrator/src/scheduler/mod.rs | 4 +- crates/orchestrator/src/status_update/mod.rs | 34 +-- crates/orchestrator/src/store/core/redis.rs | 4 +- .../src/store/domains/heartbeat_store.rs | 2 +- .../src/store/domains/metrics_store.rs | 2 +- .../src/store/domains/node_store.rs | 1 + crates/orchestrator/src/store/mod.rs | 2 + crates/p2p/src/lib.rs | 9 +- crates/shared/src/models/metric.rs | 2 +- .../src/security/auth_signature_middleware.rs | 8 +- crates/shared/src/security/request_signer.rs | 2 +- crates/shared/src/utils/google_cloud.rs | 6 +- crates/shared/src/utils/mod.rs | 2 +- crates/validator/src/store/redis.rs | 4 +- .../synthetic_data/chain_operations.rs | 4 +- .../src/validators/synthetic_data/mod.rs | 15 +- .../validators/synthetic_data/tests/mod.rs | 62 +++--- .../src/validators/synthetic_data/toploc.rs | 5 +- crates/worker/Cargo.toml | 2 +- .../src/checks/hardware/interconnect.rs | 2 +- crates/worker/src/checks/stun.rs | 2 +- crates/worker/src/cli/command.rs | 2 +- crates/worker/src/docker/taskbridge/bridge.rs | 7 +- crates/worker/src/services/discovery.rs | 113 ---------- .../worker/src/services/discovery_updater.rs | 89 -------- crates/worker/src/services/mod.rs | 2 - 35 files changed, 366 insertions(+), 341 deletions(-) create mode 100644 crates/orchestrator/src/discovery/location_service.rs delete mode 100644 crates/worker/src/services/discovery.rs delete mode 100644 crates/worker/src/services/discovery_updater.rs delete mode 100644 crates/worker/src/services/mod.rs diff --git a/crates/bootnode/Cargo.toml b/crates/bootnode/Cargo.toml index 14095161..ed5b84d3 100644 --- a/crates/bootnode/Cargo.toml +++ b/crates/bootnode/Cargo.toml @@ -9,7 +9,7 @@ p2p = {workspace = true} anyhow = {workspace = true} clap = { workspace = true } hex = { workspace = true } -tokio = { workspace = true } +tokio = { workspace = true, features = [ "rt-multi-thread" ] } tokio-util = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter"] } diff --git a/crates/orchestrator/Cargo.toml b/crates/orchestrator/Cargo.toml index b0d7da88..233988c3 100644 --- a/crates/orchestrator/Cargo.toml +++ b/crates/orchestrator/Cargo.toml @@ -23,7 +23,7 @@ redis = { workspace = true, features = ["tokio-comp"] } reqwest = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } -tokio = { workspace = true } +tokio = { workspace = true, features = [ "rt-multi-thread" ] } tokio-util = { workspace = true } url = { workspace = true } uuid = { workspace = true } diff --git a/crates/orchestrator/src/api/routes/heartbeat.rs b/crates/orchestrator/src/api/routes/heartbeat.rs index a8110e61..4d6261f9 100644 --- a/crates/orchestrator/src/api/routes/heartbeat.rs +++ b/crates/orchestrator/src/api/routes/heartbeat.rs @@ -404,7 +404,7 @@ mod tests { let task = match task.try_into() { Ok(task) => task, - Err(e) => panic!("Failed to convert TaskRequest to Task: {}", e), + Err(e) => panic!("Failed to convert TaskRequest to Task: {e}"), }; let _ = app_state.store_context.task_store.add_task(task).await; diff --git a/crates/orchestrator/src/api/routes/task.rs b/crates/orchestrator/src/api/routes/task.rs index 7cff4b6d..fa167dc7 100644 --- a/crates/orchestrator/src/api/routes/task.rs +++ b/crates/orchestrator/src/api/routes/task.rs @@ -315,8 +315,8 @@ mod tests { // Add tasks in sequence with delays for i in 1..=3 { let task: Task = TaskRequest { - image: format!("test{}", i), - name: format!("test{}", i), + image: format!("test{i}"), + name: format!("test{i}"), ..Default::default() } .try_into() diff --git a/crates/orchestrator/src/discovery/location_service.rs b/crates/orchestrator/src/discovery/location_service.rs new file mode 100644 index 00000000..b08d0a50 --- /dev/null +++ b/crates/orchestrator/src/discovery/location_service.rs @@ -0,0 +1,67 @@ +use anyhow::{Context as _, Result}; +use reqwest::Client; +use serde::{Deserialize, Serialize}; +use shared::models::node::NodeLocation; +use std::time::Duration; + +#[derive(Debug, Deserialize, Serialize)] +struct IpApiResponse { + ip: String, + city: Option, + region: Option, + country: Option, + #[serde(default)] + latitude: f64, + #[serde(default)] + longitude: f64, +} + +#[derive(Debug, Clone)] +pub(crate) struct LocationService { + client: Client, + base_url: String, + api_key: String, +} + +impl LocationService { + pub(crate) fn new(base_url: String, api_key: Option) -> Result { + let api_key = api_key.unwrap_or_default(); + let client = Client::builder() + .timeout(Duration::from_secs(5)) + .build() + .context("failed to build HTTP client")?; + + Ok(Self { + client, + base_url, + api_key, + }) + } + + pub(crate) async fn get_location(&self, ip_address: &str) -> Result> { + let url = format!( + "{}/{}/json/?key={}", + self.base_url, ip_address, self.api_key + ); + + let response = self + .client + .get(&url) + .send() + .await + .context("Failed to send request to location service")?; + + let api_response: IpApiResponse = response + .json() + .await + .context("Failed to parse location service response")?; + + Ok(Some(NodeLocation { + latitude: api_response.latitude, + longitude: api_response.longitude, + city: api_response.city, + region: api_response.region, + country: api_response.country, + })) + } +} diff --git a/crates/orchestrator/src/discovery/mod.rs b/crates/orchestrator/src/discovery/mod.rs index 108b58aa..e2897771 100644 --- a/crates/orchestrator/src/discovery/mod.rs +++ b/crates/orchestrator/src/discovery/mod.rs @@ -1 +1,2 @@ +pub(crate) mod location_service; pub(crate) mod monitor; diff --git a/crates/orchestrator/src/discovery/monitor.rs b/crates/orchestrator/src/discovery/monitor.rs index 4d2a0296..02d66eba 100644 --- a/crates/orchestrator/src/discovery/monitor.rs +++ b/crates/orchestrator/src/discovery/monitor.rs @@ -1,3 +1,4 @@ +use crate::discovery::location_service::LocationService; use crate::models::node::NodeStatus; use crate::models::node::OrchestratorNode; use crate::plugins::StatusUpdatePlugin; @@ -7,14 +8,17 @@ use alloy::primitives::Address; use alloy::primitives::U256; use anyhow::{bail, Context as _, Error, Result}; use chrono::Utc; -use log::{error, info}; +use futures::stream::FuturesUnordered; +use log::{error, info, warn}; use shared::models::node::NodeWithMetadata; use shared::p2p::get_worker_nodes_from_dht; +use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; use tokio::sync::mpsc::Sender; use tokio::time::interval; +#[derive(Clone)] struct NodeFetcher { compute_pool_id: u32, kademlia_action_tx: Sender, @@ -70,18 +74,11 @@ impl NodeFetcher { return Ok(vec![]); } - // for node in &nodes { - // if let Err(e) = self.perform_node_updates(node).await { - // error!( - // "failed to perform update for node with id {}: {e}", - // node.node().id - // ); - // } - // } - Ok(nodes) } } + +#[derive(Clone)] struct Updater { store_context: Arc, status_change_handlers: Vec, @@ -320,6 +317,7 @@ pub struct DiscoveryMonitor { heartbeats: Arc, updater: Updater, node_fetcher: NodeFetcher, + location_service: Option, } impl DiscoveryMonitor { @@ -333,8 +331,20 @@ impl DiscoveryMonitor { kademlia_action_tx: Sender, provider: alloy::providers::RootProvider, contracts: shared::web3::Contracts, - ) -> Self { - Self { + location_service_url: Option, + location_service_api_key: Option, + ) -> Result { + let location_service = if let Some(location_service_url) = location_service_url { + Some( + LocationService::new(location_service_url, location_service_api_key) + .context("failed to create location service")?, + ) + } else { + info!("Location service is disabled, skipping node enrichment"); + None + }; + + Ok(Self { interval_s, heartbeats, updater: Updater { @@ -347,41 +357,178 @@ impl DiscoveryMonitor { provider, contracts, }, - } + location_service, + }) } pub async fn run(self) { + use futures::StreamExt as _; + let Self { interval_s, heartbeats, updater, node_fetcher, + location_service, } = self; let mut interval = interval(Duration::from_secs(interval_s)); + let mut get_nodes_futures = FuturesUnordered::new(); + let mut node_update_futures = FuturesUnordered::new(); + let mut location_futures = FuturesUnordered::new(); + loop { - interval.tick().await; - match node_fetcher.get_nodes().await { - Ok(nodes) => { - for node in &nodes { - if let Err(e) = updater.perform_node_updates(node).await { - error!( - "failed to perform update for node with id {}: {e}", - node.node().id - ); + tokio::select! { + _ = interval.tick() => { + let node_fetcher = node_fetcher.clone(); + get_nodes_futures.push(tokio::task::spawn(async move {node_fetcher.get_nodes().await})); + if let Some(location_service) = &location_service { + info!("Enriching nodes without location data"); + location_futures.push(tokio::task::spawn(enrich_nodes_without_location( + updater.store_context.node_store.clone(), + location_service.clone(), + ))); + heartbeats.update_monitor(); + } + } + Some(res) = get_nodes_futures.next() => { + match res { + Ok(Ok(nodes)) => { + if nodes.is_empty() { + info!("No nodes found in discovery"); + continue; + } + + for node in nodes { + let updater = updater.clone(); + node_update_futures.push( + tokio::task::spawn(async move { + updater.perform_node_updates(&node).await + }) + ); + } + } + Ok(Err(e)) => { + error!("Error fetching nodes from discovery: {e}"); + } + Err(e) => { + error!("Task failed while fetching nodes: {e}"); + } + } + } + Some(res) = node_update_futures.next() => { + match res { + Ok(Ok(())) => { + info!("Successfully updated nodes from discovery"); + } + Ok(Err(e)) => { + error!("Error updating nodes from discovery: {e}"); + } + Err(e) => { + error!("Task failed while updating nodes: {e}"); } } + } + Some(res) = location_futures.next() => { + match res { + Ok(Ok(())) => { + info!("Successfully enriched nodes without location data"); + } + Ok(Err(e)) => { + error!("Error enriching nodes without location data: {e}"); + } + Err(e) => { + error!("Task failed while enriching nodes: {e}"); + } + } + } + } + } + } +} + +use crate::store::NodeStore; + +async fn enrich_nodes_without_location( + node_store: Arc, + location_service: LocationService, +) -> Result<()> { + const BATCH_SIZE: usize = 10; + const MAX_RETRIES: u32 = 3; + + let nodes = node_store + .get_nodes() + .await + .context("failed to get nodes from store")?; + let nodes_without_location: Vec<_> = nodes + .into_iter() + .filter(|node| node.location.is_none()) + .collect(); + + if nodes_without_location.is_empty() { + return Ok(()); + } + + info!( + "Found {} nodes without location data", + nodes_without_location.len() + ); + + // Process in batches to respect rate limits + let mut retry_count: HashMap = HashMap::new(); + for chunk in nodes_without_location.chunks(BATCH_SIZE) { + for node in chunk { + let retries = retry_count.get(&node.address).unwrap_or(&0); + if *retries >= MAX_RETRIES { + continue; // Skip nodes that have exceeded retry limit + } + + match location_service.get_location(&node.ip_address).await { + Ok(Some(location)) => { + info!( + "Successfully fetched location for node {}: {:?}", + node.address, location + ); - info!("Successfully synced {} nodes from discovery", nodes.len()); + if let Err(e) = node_store + .update_node_location(&node.address, &location) + .await + { + error!( + "Failed to update node {} with location: {}", + node.address, e + ); + } + } + Ok(None) => { + // Location service is disabled + break; } Err(e) => { - error!("Error syncing nodes from discovery: {e}"); + warn!( + "Failed to fetch location for node {} (attempt {}/{}): {}", + node.address, + retries + 1, + MAX_RETRIES, + e + ); + + // Increment retry counter + let retries = retry_count.entry(node.address).or_insert(0); + *retries += 1; } } - heartbeats.update_monitor(); + + // Rate limiting - wait between requests + tokio::time::sleep(Duration::from_millis(100)).await; } + + // Longer wait between batches + tokio::time::sleep(Duration::from_secs(1)).await; } + + Ok(()) } #[cfg(test)] diff --git a/crates/orchestrator/src/main.rs b/crates/orchestrator/src/main.rs index 3c0e654f..4ea7a1bd 100644 --- a/crates/orchestrator/src/main.rs +++ b/crates/orchestrator/src/main.rs @@ -99,6 +99,14 @@ struct Args { /// Example: `/ip4/104.131.131.82/tcp/4001/p2p/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ,/ip4/104.131.131.82/udp/4001/quic-v1/p2p/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ` #[arg(long, default_value = "")] bootnodes: String, + + /// Location service URL (e.g., https://ipapi.co). If not provided, location services are disabled. + #[arg(long)] + location_service_url: Option, + + /// Location service API key + #[arg(long)] + location_service_api_key: Option, } #[tokio::main] @@ -327,7 +335,7 @@ async fn main() -> Result<()> { let discovery_store_context = store_context.clone(); let discovery_heartbeats = heartbeats.clone(); - let monitor = orchestrator::DiscoveryMonitor::new( + let monitor = match orchestrator::DiscoveryMonitor::new( compute_pool_id, args.discovery_refresh_interval, discovery_store_context.clone(), @@ -336,7 +344,18 @@ async fn main() -> Result<()> { kademlia_action_tx, wallet.provider().root().clone(), contracts.clone(), - ); + args.location_service_url, + args.location_service_api_key, + ) { + Ok(monitor) => { + info!("Discovery monitor initialized successfully"); + monitor + } + Err(e) => { + error!("Failed to initialize discovery monitor: {e}"); + std::process::exit(1); + } + }; tasks.spawn( // TODO: refactor task handling (https://github.com/PrimeIntellect-ai/protocol/issues/627) diff --git a/crates/orchestrator/src/plugins/node_groups/tests.rs b/crates/orchestrator/src/plugins/node_groups/tests.rs index 89902066..0f3e8433 100644 --- a/crates/orchestrator/src/plugins/node_groups/tests.rs +++ b/crates/orchestrator/src/plugins/node_groups/tests.rs @@ -277,7 +277,7 @@ async fn test_group_formation_with_multiple_configs() { let mut conn = plugin.store.client.get_connection().unwrap(); let groups: Vec = conn - .keys(format!("{}*", GROUP_KEY_PREFIX).as_str()) + .keys(format!("{GROUP_KEY_PREFIX}*").as_str()) .unwrap(); assert_eq!(groups.len(), 2); @@ -1102,7 +1102,7 @@ async fn test_node_cannot_be_in_multiple_groups() { ); // Get all group keys - let group_keys: Vec = conn.keys(format!("{}*", GROUP_KEY_PREFIX)).unwrap(); + let group_keys: Vec = conn.keys(format!("{GROUP_KEY_PREFIX}*")).unwrap(); let group_copy = group_keys.clone(); // There should be exactly one group @@ -1167,7 +1167,7 @@ async fn test_node_cannot_be_in_multiple_groups() { let _ = plugin.try_form_new_groups().await; // Get updated group keys - let group_keys: Vec = conn.keys(format!("{}*", GROUP_KEY_PREFIX)).unwrap(); + let group_keys: Vec = conn.keys(format!("{GROUP_KEY_PREFIX}*")).unwrap(); // There should now be exactly two groups assert_eq!( @@ -1544,7 +1544,7 @@ async fn test_task_observer() { let _ = store_context.task_store.add_task(task2.clone()).await; let _ = plugin.try_form_new_groups().await; let all_tasks = store_context.task_store.get_all_tasks().await.unwrap(); - println!("All tasks: {:?}", all_tasks); + println!("All tasks: {all_tasks:?}"); assert_eq!(all_tasks.len(), 2); assert!(all_tasks[0].id != all_tasks[1].id); let topologies = get_task_topologies(&task).unwrap(); @@ -1588,7 +1588,7 @@ async fn test_task_observer() { .unwrap(); assert!(group_3.is_some()); let all_tasks = store_context.task_store.get_all_tasks().await.unwrap(); - println!("All tasks: {:?}", all_tasks); + println!("All tasks: {all_tasks:?}"); assert_eq!(all_tasks.len(), 2); // Manually assign the first task to the group to test immediate dissolution let group_3_before = plugin @@ -1615,7 +1615,7 @@ async fn test_task_observer() { .get_node_group(&node_3.address.to_string()) .await .unwrap(); - println!("Group 3 after task deletion: {:?}", group_3); + println!("Group 3 after task deletion: {group_3:?}"); // With new behavior, group should be dissolved immediately when its assigned task is deleted assert!(group_3.is_none()); @@ -1833,7 +1833,7 @@ async fn test_group_formation_priority() { let nodes: Vec<_> = (1..=4) .map(|i| { create_test_node( - &format!("0x{}234567890123456789012345678901234567890", i), + &format!("0x{i}234567890123456789012345678901234567890"), NodeStatus::Healthy, None, ) @@ -1863,7 +1863,7 @@ async fn test_group_formation_priority() { // Verify: Should form one 3-node group + one 1-node group // NOT four 1-node groups let mut conn = plugin.store.client.get_connection().unwrap(); - let group_keys: Vec = conn.keys(format!("{}*", GROUP_KEY_PREFIX)).unwrap(); + let group_keys: Vec = conn.keys(format!("{GROUP_KEY_PREFIX}*")).unwrap(); assert_eq!(group_keys.len(), 2, "Should form exactly 2 groups"); // Check group compositions @@ -1944,7 +1944,7 @@ async fn test_multiple_groups_same_configuration() { let nodes: Vec<_> = (1..=6) .map(|i| { create_test_node( - &format!("0x{}234567890123456789012345678901234567890", i), + &format!("0x{i}234567890123456789012345678901234567890"), NodeStatus::Healthy, None, ) @@ -1958,7 +1958,7 @@ async fn test_multiple_groups_same_configuration() { // Verify: Should create 3 groups of 2 nodes each let mut conn = plugin.store.client.get_connection().unwrap(); - let group_keys: Vec = conn.keys(format!("{}*", GROUP_KEY_PREFIX)).unwrap(); + let group_keys: Vec = conn.keys(format!("{GROUP_KEY_PREFIX}*")).unwrap(); assert_eq!(group_keys.len(), 3, "Should form exactly 3 groups"); // Verify all groups have exactly 2 nodes and same configuration @@ -2663,7 +2663,7 @@ async fn test_no_merge_when_policy_disabled() { // Create 3 nodes let nodes: Vec<_> = (1..=3) - .map(|i| create_test_node(&format!("0x{:040x}", i), NodeStatus::Healthy, None)) + .map(|i| create_test_node(&format!("0x{i:040x}"), NodeStatus::Healthy, None)) .collect(); for node in &nodes { diff --git a/crates/orchestrator/src/scheduler/mod.rs b/crates/orchestrator/src/scheduler/mod.rs index 711f313f..d5ffa506 100644 --- a/crates/orchestrator/src/scheduler/mod.rs +++ b/crates/orchestrator/src/scheduler/mod.rs @@ -144,12 +144,12 @@ mod tests { ); assert_eq!( env_vars.get("NODE_VAR").unwrap(), - &format!("node-{}", node_address) + &format!("node-{node_address}") ); // Check cmd replacement let cmd = returned_task.cmd.unwrap(); assert_eq!(cmd[0], format!("--task={}", task.id)); - assert_eq!(cmd[1], format!("--node={}", node_address)); + assert_eq!(cmd[1], format!("--node={node_address}")); } } diff --git a/crates/orchestrator/src/status_update/mod.rs b/crates/orchestrator/src/status_update/mod.rs index b2738488..3100e34e 100644 --- a/crates/orchestrator/src/status_update/mod.rs +++ b/crates/orchestrator/src/status_update/mod.rs @@ -433,7 +433,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let heartbeat = HeartbeatRequest { address: node.address.to_string(), @@ -451,7 +451,7 @@ mod tests { .beat(&heartbeat) .await { - error!("Heartbeat Error: {}", e); + error!("Heartbeat Error: {e}"); } let _ = updater.process_nodes().await; @@ -510,7 +510,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let mode = ServerMode::Full; let updater = NodeStatusUpdater::new( @@ -563,7 +563,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let mode = ServerMode::Full; let updater = NodeStatusUpdater::new( @@ -623,7 +623,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } if let Err(e) = app_state .store_context @@ -631,7 +631,7 @@ mod tests { .set_unhealthy_counter(&node.address, 2) .await { - error!("Error setting unhealthy counter: {}", e); + error!("Error setting unhealthy counter: {e}"); } let mode = ServerMode::Full; @@ -687,7 +687,7 @@ mod tests { .set_unhealthy_counter(&node.address, 2) .await { - error!("Error setting unhealthy counter: {}", e); + error!("Error setting unhealthy counter: {e}"); }; let heartbeat = HeartbeatRequest { @@ -702,7 +702,7 @@ mod tests { .beat(&heartbeat) .await { - error!("Heartbeat Error: {}", e); + error!("Heartbeat Error: {e}"); } if let Err(e) = app_state .store_context @@ -710,7 +710,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let mode = ServerMode::Full; @@ -772,7 +772,7 @@ mod tests { .set_unhealthy_counter(&node1.address, 1) .await { - error!("Error setting unhealthy counter: {}", e); + error!("Error setting unhealthy counter: {e}"); }; if let Err(e) = app_state .store_context @@ -780,7 +780,7 @@ mod tests { .add_node(node1.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let node2 = OrchestratorNode { @@ -797,7 +797,7 @@ mod tests { .add_node(node2.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let mode = ServerMode::Full; @@ -873,7 +873,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } if let Err(e) = app_state .store_context @@ -881,7 +881,7 @@ mod tests { .set_unhealthy_counter(&node.address, 2) .await { - error!("Error setting unhealthy counter: {}", e); + error!("Error setting unhealthy counter: {e}"); } let mode = ServerMode::Full; @@ -926,7 +926,7 @@ mod tests { .beat(&heartbeat) .await { - error!("Heartbeat Error: {}", e); + error!("Heartbeat Error: {e}"); } sleep(Duration::from_secs(5)).await; @@ -960,7 +960,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let mode = ServerMode::Full; let updater = NodeStatusUpdater::new( @@ -1029,7 +1029,7 @@ mod tests { .add_node(node.clone()) .await { - error!("Error adding node: {}", e); + error!("Error adding node: {e}"); } let mode = ServerMode::Full; let updater = NodeStatusUpdater::new( diff --git a/crates/orchestrator/src/store/core/redis.rs b/crates/orchestrator/src/store/core/redis.rs index 79f57ce8..3b524b33 100644 --- a/crates/orchestrator/src/store/core/redis.rs +++ b/crates/orchestrator/src/store/core/redis.rs @@ -45,8 +45,8 @@ impl RedisStore { _ => panic!("Expected TCP connection"), }; - let redis_url = format!("redis://{}:{}", host, port); - debug!("Starting test Redis server at {}", redis_url); + let redis_url = format!("redis://{host}:{port}"); + debug!("Starting test Redis server at {redis_url}"); // Add a small delay to ensure server is ready thread::sleep(Duration::from_millis(100)); diff --git a/crates/orchestrator/src/store/domains/heartbeat_store.rs b/crates/orchestrator/src/store/domains/heartbeat_store.rs index b2f8138a..8bb43374 100644 --- a/crates/orchestrator/src/store/domains/heartbeat_store.rs +++ b/crates/orchestrator/src/store/domains/heartbeat_store.rs @@ -80,7 +80,7 @@ impl HeartbeatStore { .get_multiplexed_async_connection() .await .map_err(|_| anyhow!("Failed to get connection"))?; - let key = format!("{}:{}", ORCHESTRATOR_UNHEALTHY_COUNTER_KEY, address); + let key = format!("{ORCHESTRATOR_UNHEALTHY_COUNTER_KEY}:{address}"); con.set(key, counter.to_string()) .await .map_err(|_| anyhow!("Failed to set value")) diff --git a/crates/orchestrator/src/store/domains/metrics_store.rs b/crates/orchestrator/src/store/domains/metrics_store.rs index 1a0d79ac..5520860a 100644 --- a/crates/orchestrator/src/store/domains/metrics_store.rs +++ b/crates/orchestrator/src/store/domains/metrics_store.rs @@ -145,7 +145,7 @@ impl MetricsStore { task_id: &str, ) -> Result> { let mut con = self.redis.client.get_multiplexed_async_connection().await?; - let pattern = format!("{}:*", ORCHESTRATOR_NODE_METRICS_STORE); + let pattern = format!("{ORCHESTRATOR_NODE_METRICS_STORE}:*"); // Scan all node keys let mut iter: redis::AsyncIter = con.scan_match(&pattern).await?; diff --git a/crates/orchestrator/src/store/domains/node_store.rs b/crates/orchestrator/src/store/domains/node_store.rs index 0874c64c..7d8fce41 100644 --- a/crates/orchestrator/src/store/domains/node_store.rs +++ b/crates/orchestrator/src/store/domains/node_store.rs @@ -23,6 +23,7 @@ impl NodeStore { pub fn new(redis: Arc) -> Self { Self { redis } } + // convert orchestrator node to redis hash fields fn node_to_hash_fields(node: &OrchestratorNode) -> Result> { let mut fields = vec![ diff --git a/crates/orchestrator/src/store/mod.rs b/crates/orchestrator/src/store/mod.rs index afee9f07..2654227b 100644 --- a/crates/orchestrator/src/store/mod.rs +++ b/crates/orchestrator/src/store/mod.rs @@ -1,2 +1,4 @@ pub(crate) mod core; mod domains; + +pub(crate) use domains::node_store::NodeStore; diff --git a/crates/p2p/src/lib.rs b/crates/p2p/src/lib.rs index 69115c69..056817af 100644 --- a/crates/p2p/src/lib.rs +++ b/crates/p2p/src/lib.rs @@ -531,12 +531,9 @@ mod test { match res { Ok(kad::QueryResult::GetProviders(res)) => { let ok = res.unwrap(); - match ok { - GetProvidersOk::FoundProviders { key, providers } => { - assert_eq!(key, test_key.clone().into()); - providers_set.insert(providers.iter().map(|p| p.to_string()).collect()); - } - _ => {} + if let GetProvidersOk::FoundProviders { key, providers } = ok { + assert_eq!(key, test_key.clone().into()); + providers_set.insert(providers.iter().map(|p| p.to_string()).collect()); } } Ok(_) => panic!("expected a QueryResult::GetProviders response"), diff --git a/crates/shared/src/models/metric.rs b/crates/shared/src/models/metric.rs index 47b27f24..b85c4926 100644 --- a/crates/shared/src/models/metric.rs +++ b/crates/shared/src/models/metric.rs @@ -58,7 +58,7 @@ mod tests { let invalid_values = vec![(f64::INFINITY, "infinite value"), (f64::NAN, "NaN value")]; for (value, case) in invalid_values { let entry = MetricEntry::new(key.clone(), value); - assert!(entry.is_err(), "Should fail for {}", case); + assert!(entry.is_err(), "Should fail for {case}"); } } diff --git a/crates/shared/src/security/auth_signature_middleware.rs b/crates/shared/src/security/auth_signature_middleware.rs index 1c4c1e10..427fceec 100644 --- a/crates/shared/src/security/auth_signature_middleware.rs +++ b/crates/shared/src/security/auth_signature_middleware.rs @@ -634,10 +634,10 @@ mod tests { .await; log::info!("Address: {}", wallet.wallet.default_signer().address()); - log::info!("Signature: {}", signature); - log::info!("Nonce: {}", nonce); + log::info!("Signature: {signature}"); + log::info!("Nonce: {nonce}"); let req = test::TestRequest::get() - .uri(&format!("/test?nonce={}", nonce)) + .uri(&format!("/test?nonce={nonce}")) .insert_header(( "x-address", wallet.wallet.default_signer().address().to_string(), @@ -801,7 +801,7 @@ mod tests { // Create multiple addresses let addresses: Vec
= (0..5) .map(|i| { - Address::from_str(&format!("0x{}000000000000000000000000000000000000000", i)) + Address::from_str(&format!("0x{i}000000000000000000000000000000000000000")) .unwrap() }) .collect(); diff --git a/crates/shared/src/security/request_signer.rs b/crates/shared/src/security/request_signer.rs index ff3e9964..c5ea3605 100644 --- a/crates/shared/src/security/request_signer.rs +++ b/crates/shared/src/security/request_signer.rs @@ -143,7 +143,7 @@ mod tests { let signature = sign_request(endpoint, &wallet, Some(&empty_data)) .await .unwrap(); - println!("Signature: {}", signature); + println!("Signature: {signature}"); assert!(signature.starts_with("0x")); assert_eq!(signature.len(), 132); } diff --git a/crates/shared/src/utils/google_cloud.rs b/crates/shared/src/utils/google_cloud.rs index 128259eb..a069680d 100644 --- a/crates/shared/src/utils/google_cloud.rs +++ b/crates/shared/src/utils/google_cloud.rs @@ -219,15 +219,15 @@ mod tests { .generate_mapping_file(&random_sha256, "run_1/file.parquet") .await .unwrap(); - println!("mapping_content: {}", mapping_content); - println!("bucket_name: {}", bucket_name); + println!("mapping_content: {mapping_content}"); + println!("bucket_name: {bucket_name}"); let original_file_name = storage .resolve_mapping_for_sha(&random_sha256) .await .unwrap(); - println!("original_file_name: {}", original_file_name); + println!("original_file_name: {original_file_name}"); assert_eq!(original_file_name, "run_1/file.parquet"); } } diff --git a/crates/shared/src/utils/mod.rs b/crates/shared/src/utils/mod.rs index d4e3f1c9..290f1ae5 100644 --- a/crates/shared/src/utils/mod.rs +++ b/crates/shared/src/utils/mod.rs @@ -119,7 +119,7 @@ mod tests { provider.add_mapping_file("sha256", "file.txt").await; provider.add_file("file.txt", "content").await; let map_file_link = provider.resolve_mapping_for_sha("sha256").await.unwrap(); - println!("map_file_link: {}", map_file_link); + println!("map_file_link: {map_file_link}"); assert_eq!(map_file_link, "file.txt"); assert_eq!( diff --git a/crates/validator/src/store/redis.rs b/crates/validator/src/store/redis.rs index 508815c2..c0a0c36b 100644 --- a/crates/validator/src/store/redis.rs +++ b/crates/validator/src/store/redis.rs @@ -45,8 +45,8 @@ impl RedisStore { _ => panic!("Expected TCP connection"), }; - let redis_url = format!("redis://{}:{}", host, port); - debug!("Starting test Redis server at {}", redis_url); + let redis_url = format!("redis://{host}:{port}"); + debug!("Starting test Redis server at {redis_url}"); // Add a small delay to ensure server is ready thread::sleep(Duration::from_millis(100)); diff --git a/crates/validator/src/validators/synthetic_data/chain_operations.rs b/crates/validator/src/validators/synthetic_data/chain_operations.rs index 004c7e45..c1fbb7ac 100644 --- a/crates/validator/src/validators/synthetic_data/chain_operations.rs +++ b/crates/validator/src/validators/synthetic_data/chain_operations.rs @@ -3,7 +3,7 @@ use super::*; impl SyntheticDataValidator { #[cfg(test)] pub fn soft_invalidate_work(&self, work_key: &str) -> Result<(), Error> { - info!("Soft invalidating work: {}", work_key); + info!("Soft invalidating work: {work_key}"); if self.disable_chain_invalidation { info!("Chain invalidation is disabled, skipping work soft invalidation"); @@ -54,7 +54,7 @@ impl SyntheticDataValidator { #[cfg(test)] pub fn invalidate_work(&self, work_key: &str) -> Result<(), Error> { - info!("Invalidating work: {}", work_key); + info!("Invalidating work: {work_key}"); if let Some(metrics) = &self.metrics { metrics.record_work_key_invalidation(); diff --git a/crates/validator/src/validators/synthetic_data/mod.rs b/crates/validator/src/validators/synthetic_data/mod.rs index ce472c8b..40b3da0d 100644 --- a/crates/validator/src/validators/synthetic_data/mod.rs +++ b/crates/validator/src/validators/synthetic_data/mod.rs @@ -237,7 +237,7 @@ impl SyntheticDataValidator { let score: Option = con .zscore("incomplete_groups", group_key) .await - .map_err(|e| Error::msg(format!("Failed to check incomplete tracking: {}", e)))?; + .map_err(|e| Error::msg(format!("Failed to check incomplete tracking: {e}")))?; Ok(score.is_some()) } @@ -271,12 +271,11 @@ impl SyntheticDataValidator { .zadd("incomplete_groups", group_key, new_deadline) .await .map_err(|e| { - Error::msg(format!("Failed to update incomplete group deadline: {}", e)) + Error::msg(format!("Failed to update incomplete group deadline: {e}")) })?; debug!( - "Updated deadline for incomplete group {} to {} ({} minutes from now)", - group_key, new_deadline, minutes_from_now + "Updated deadline for incomplete group {group_key} to {new_deadline} ({minutes_from_now} minutes from now)" ); Ok(()) @@ -420,7 +419,7 @@ impl SyntheticDataValidator { let data: Option = con .get(key) .await - .map_err(|e| Error::msg(format!("Failed to get work validation status: {}", e)))?; + .map_err(|e| Error::msg(format!("Failed to get work validation status: {e}")))?; match data { Some(data) => { @@ -435,8 +434,7 @@ impl SyntheticDataValidator { reason: None, })), Err(e) => Err(Error::msg(format!( - "Failed to parse work validation data: {}", - e + "Failed to parse work validation data: {e}" ))), } } @@ -1576,8 +1574,7 @@ impl SyntheticDataValidator { .await { error!( - "Failed to update work validation status for {}: {}", - work_key, e + "Failed to update work validation status for {work_key}: {e}" ); } } diff --git a/crates/validator/src/validators/synthetic_data/tests/mod.rs b/crates/validator/src/validators/synthetic_data/tests/mod.rs index a589076f..48aaee85 100644 --- a/crates/validator/src/validators/synthetic_data/tests/mod.rs +++ b/crates/validator/src/validators/synthetic_data/tests/mod.rs @@ -34,7 +34,7 @@ fn setup_test_env() -> Result<(RedisStore, Contracts), Error> { "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97", url, ) - .map_err(|e| Error::msg(format!("Failed to create demo wallet: {}", e)))?; + .map_err(|e| Error::msg(format!("Failed to create demo wallet: {e}")))?; let contracts = ContractBuilder::new(demo_wallet.provider()) .with_compute_registry() @@ -45,7 +45,7 @@ fn setup_test_env() -> Result<(RedisStore, Contracts), Error> { .with_stake_manager() .with_synthetic_data_validator(Some(Address::ZERO)) .build() - .map_err(|e| Error::msg(format!("Failed to build contracts: {}", e)))?; + .map_err(|e| Error::msg(format!("Failed to build contracts: {e}")))?; Ok((store, contracts)) } @@ -197,8 +197,8 @@ async fn test_status_update() -> Result<(), Error> { ) .await .map_err(|e| { - error!("Failed to update work validation status: {}", e); - Error::msg(format!("Failed to update work validation status: {}", e)) + error!("Failed to update work validation status: {e}"); + Error::msg(format!("Failed to update work validation status: {e}")) })?; tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; @@ -206,8 +206,8 @@ async fn test_status_update() -> Result<(), Error> { .get_work_validation_status_from_redis("0x0000000000000000000000000000000000000000") .await .map_err(|e| { - error!("Failed to get work validation status: {}", e); - Error::msg(format!("Failed to get work validation status: {}", e)) + error!("Failed to get work validation status: {e}"); + Error::msg(format!("Failed to get work validation status: {e}")) })?; assert_eq!(status, Some(ValidationResult::Accept)); Ok(()) @@ -344,20 +344,20 @@ async fn test_group_e2e_accept() -> Result<(), Error> { let mock_storage = MockStorageProvider::new(); mock_storage .add_file( - &format!("Qwen/Qwen0.6/dataset/samplingn-{}-1-0-0.parquet", GROUP_ID), + &format!("Qwen/Qwen0.6/dataset/samplingn-{GROUP_ID}-1-0-0.parquet"), "file1", ) .await; mock_storage .add_mapping_file( FILE_SHA, - &format!("Qwen/Qwen0.6/dataset/samplingn-{}-1-0-0.parquet", GROUP_ID), + &format!("Qwen/Qwen0.6/dataset/samplingn-{GROUP_ID}-1-0-0.parquet"), ) .await; server .mock( "POST", - format!("/validategroup/dataset/samplingn-{}-1-0.parquet", GROUP_ID).as_str(), + format!("/validategroup/dataset/samplingn-{GROUP_ID}-1-0.parquet").as_str(), ) .match_body(mockito::Matcher::Json(serde_json::json!({ "file_shas": [FILE_SHA], @@ -371,7 +371,7 @@ async fn test_group_e2e_accept() -> Result<(), Error> { server .mock( "GET", - format!("/statusgroup/dataset/samplingn-{}-1-0.parquet", GROUP_ID).as_str(), + format!("/statusgroup/dataset/samplingn-{GROUP_ID}-1-0.parquet").as_str(), ) .with_status(200) .with_body(r#"{"status": "accept", "input_flops": 1, "output_flops": 1000}"#) @@ -463,7 +463,7 @@ async fn test_group_e2e_accept() -> Result<(), Error> { metrics_2.contains("validator_work_keys_to_process{pool_id=\"0\",validator_id=\"0\"} 0") ); assert!(metrics_2.contains("toploc_config_name=\"Qwen/Qwen0.6\"")); - assert!(metrics_2.contains(&format!("validator_group_work_units_check_total{{group_id=\"{}\",pool_id=\"0\",result=\"match\",toploc_config_name=\"Qwen/Qwen0.6\",validator_id=\"0\"}} 1", GROUP_ID))); + assert!(metrics_2.contains(&format!("validator_group_work_units_check_total{{group_id=\"{GROUP_ID}\",pool_id=\"0\",result=\"match\",toploc_config_name=\"Qwen/Qwen0.6\",validator_id=\"0\"}} 1"))); Ok(()) } @@ -490,32 +490,32 @@ async fn test_group_e2e_work_unit_mismatch() -> Result<(), Error> { let mock_storage = MockStorageProvider::new(); mock_storage .add_file( - &format!("Qwen/Qwen0.6/dataset/samplingn-{}-2-0-0.parquet", GROUP_ID), + &format!("Qwen/Qwen0.6/dataset/samplingn-{GROUP_ID}-2-0-0.parquet"), "file1", ) .await; mock_storage .add_file( - &format!("Qwen/Qwen0.6/dataset/samplingn-{}-2-0-1.parquet", GROUP_ID), + &format!("Qwen/Qwen0.6/dataset/samplingn-{GROUP_ID}-2-0-1.parquet"), "file2", ) .await; mock_storage .add_mapping_file( HONEST_FILE_SHA, - &format!("Qwen/Qwen0.6/dataset/samplingn-{}-2-0-0.parquet", GROUP_ID), + &format!("Qwen/Qwen0.6/dataset/samplingn-{GROUP_ID}-2-0-0.parquet"), ) .await; mock_storage .add_mapping_file( EXCESSIVE_FILE_SHA, - &format!("Qwen/Qwen0.6/dataset/samplingn-{}-2-0-1.parquet", GROUP_ID), + &format!("Qwen/Qwen0.6/dataset/samplingn-{GROUP_ID}-2-0-1.parquet"), ) .await; server .mock( "POST", - format!("/validategroup/dataset/samplingn-{}-2-0.parquet", GROUP_ID).as_str(), + format!("/validategroup/dataset/samplingn-{GROUP_ID}-2-0.parquet").as_str(), ) .match_body(mockito::Matcher::Json(serde_json::json!({ "file_shas": [HONEST_FILE_SHA, EXCESSIVE_FILE_SHA], @@ -529,7 +529,7 @@ async fn test_group_e2e_work_unit_mismatch() -> Result<(), Error> { server .mock( "GET", - format!("/statusgroup/dataset/samplingn-{}-2-0.parquet", GROUP_ID).as_str(), + format!("/statusgroup/dataset/samplingn-{GROUP_ID}-2-0.parquet").as_str(), ) .with_status(200) .with_body(r#"{"status": "accept", "input_flops": 1, "output_flops": 2000}"#) @@ -636,12 +636,12 @@ async fn test_group_e2e_work_unit_mismatch() -> Result<(), Error> { assert_eq!(plan_3.group_trigger_tasks.len(), 0); assert_eq!(plan_3.group_status_check_tasks.len(), 0); let metrics_2 = export_metrics().unwrap(); - assert!(metrics_2.contains(&format!("validator_group_validations_total{{group_id=\"{}\",pool_id=\"0\",result=\"accept\",toploc_config_name=\"Qwen/Qwen0.6\",validator_id=\"0\"}} 1", GROUP_ID))); + assert!(metrics_2.contains(&format!("validator_group_validations_total{{group_id=\"{GROUP_ID}\",pool_id=\"0\",result=\"accept\",toploc_config_name=\"Qwen/Qwen0.6\",validator_id=\"0\"}} 1"))); assert!( metrics_2.contains("validator_work_keys_to_process{pool_id=\"0\",validator_id=\"0\"} 0") ); assert!(metrics_2.contains("toploc_config_name=\"Qwen/Qwen0.6\"")); - assert!(metrics_2.contains(&format!("validator_group_work_units_check_total{{group_id=\"{}\",pool_id=\"0\",result=\"mismatch\",toploc_config_name=\"Qwen/Qwen0.6\",validator_id=\"0\"}} 1", GROUP_ID))); + assert!(metrics_2.contains(&format!("validator_group_work_units_check_total{{group_id=\"{GROUP_ID}\",pool_id=\"0\",result=\"mismatch\",toploc_config_name=\"Qwen/Qwen0.6\",validator_id=\"0\"}} 1"))); Ok(()) } @@ -734,26 +734,26 @@ async fn test_incomplete_group_recovery() -> Result<(), Error> { mock_storage .add_file( - &format!("TestModel/dataset/test-{}-2-0-0.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-2-0-0.parquet"), "file1", ) .await; mock_storage .add_file( - &format!("TestModel/dataset/test-{}-2-0-1.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-2-0-1.parquet"), "file2", ) .await; mock_storage .add_mapping_file( FILE_SHA_1, - &format!("TestModel/dataset/test-{}-2-0-0.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-2-0-0.parquet"), ) .await; mock_storage .add_mapping_file( FILE_SHA_2, - &format!("TestModel/dataset/test-{}-2-0-1.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-2-0-1.parquet"), ) .await; @@ -800,7 +800,7 @@ async fn test_incomplete_group_recovery() -> Result<(), Error> { assert!(group.is_none(), "Group should be incomplete"); // Check that the incomplete group is being tracked - let group_key = format!("group:{}:2:0", GROUP_ID); + let group_key = format!("group:{GROUP_ID}:2:0"); let is_tracked = validator .is_group_being_tracked_as_incomplete(&group_key) .await?; @@ -847,14 +847,14 @@ async fn test_expired_incomplete_group_soft_invalidation() -> Result<(), Error> mock_storage .add_file( - &format!("TestModel/dataset/test-{}-2-0-0.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-2-0-0.parquet"), "file1", ) .await; mock_storage .add_mapping_file( FILE_SHA_1, - &format!("TestModel/dataset/test-{}-2-0-0.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-2-0-0.parquet"), ) .await; @@ -902,7 +902,7 @@ async fn test_expired_incomplete_group_soft_invalidation() -> Result<(), Error> // Manually expire the incomplete group tracking by removing it and simulating expiry // In a real test, you would wait for the actual expiry, but for testing we simulate it - let group_key = format!("group:{}:2:0", GROUP_ID); + let group_key = format!("group:{GROUP_ID}:2:0"); validator.track_incomplete_group(&group_key).await?; // Process groups past grace period (this would normally find groups past deadline) @@ -936,7 +936,7 @@ async fn test_expired_incomplete_group_soft_invalidation() -> Result<(), Error> assert_eq!(key_status, Some(ValidationResult::IncompleteGroup)); let metrics = export_metrics().unwrap(); - assert!(metrics.contains(&format!("validator_work_keys_soft_invalidated_total{{group_key=\"group:{}:2:0\",pool_id=\"0\",validator_id=\"0\"}} 1", GROUP_ID))); + assert!(metrics.contains(&format!("validator_work_keys_soft_invalidated_total{{group_key=\"group:{GROUP_ID}:2:0\",pool_id=\"0\",validator_id=\"0\"}} 1"))); Ok(()) } @@ -952,14 +952,14 @@ async fn test_incomplete_group_status_tracking() -> Result<(), Error> { mock_storage .add_file( - &format!("TestModel/dataset/test-{}-3-0-0.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-3-0-0.parquet"), "file1", ) .await; mock_storage .add_mapping_file( FILE_SHA_1, - &format!("TestModel/dataset/test-{}-3-0-0.parquet", GROUP_ID), + &format!("TestModel/dataset/test-{GROUP_ID}-3-0-0.parquet"), ) .await; @@ -1006,7 +1006,7 @@ async fn test_incomplete_group_status_tracking() -> Result<(), Error> { // Manually process groups past grace period to simulate what would happen // after the grace period expires (we simulate this since we can't wait in tests) - let group_key = format!("group:{}:3:0", GROUP_ID); + let group_key = format!("group:{GROUP_ID}:3:0"); // Manually add the group to tracking and then process it validator.track_incomplete_group(&group_key).await?; diff --git a/crates/validator/src/validators/synthetic_data/toploc.rs b/crates/validator/src/validators/synthetic_data/toploc.rs index 33d9f57f..f5641533 100644 --- a/crates/validator/src/validators/synthetic_data/toploc.rs +++ b/crates/validator/src/validators/synthetic_data/toploc.rs @@ -689,8 +689,7 @@ mod tests { Some(expected_idx) => { assert!( matched, - "Expected file {} to match config {}", - test_file, expected_idx + "Expected file {test_file} to match config {expected_idx}" ); assert_eq!( matched_idx, @@ -701,7 +700,7 @@ mod tests { expected_idx ); } - None => assert!(!matched, "File {} should not match any config", test_file), + None => assert!(!matched, "File {test_file} should not match any config"), } } } diff --git a/crates/worker/Cargo.toml b/crates/worker/Cargo.toml index 8a7d8711..115fb4ba 100644 --- a/crates/worker/Cargo.toml +++ b/crates/worker/Cargo.toml @@ -28,7 +28,7 @@ rust-ipfs = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } stun = { workspace = true } -tokio = { workspace = true, features = ["full", "macros"] } +tokio = { workspace = true, features = [ "rt-multi-thread" ] } tokio-util = { workspace = true, features = ["rt"] } url = { workspace = true } uuid = { workspace = true } diff --git a/crates/worker/src/checks/hardware/interconnect.rs b/crates/worker/src/checks/hardware/interconnect.rs index 21725686..d87d1819 100644 --- a/crates/worker/src/checks/hardware/interconnect.rs +++ b/crates/worker/src/checks/hardware/interconnect.rs @@ -78,7 +78,7 @@ mod tests { #[tokio::test] async fn test_check_speeds() { let result = InterconnectCheck::check_speeds().await; - println!("Test Result: {:?}", result); + println!("Test Result: {result:?}"); // Verify the result is Ok and contains expected tuple structure assert!(result.is_ok()); diff --git a/crates/worker/src/checks/stun.rs b/crates/worker/src/checks/stun.rs index 5830b49e..734f2795 100644 --- a/crates/worker/src/checks/stun.rs +++ b/crates/worker/src/checks/stun.rs @@ -139,7 +139,7 @@ mod tests { async fn test_get_public_ip() { let stun_check = StunCheck::new(Duration::from_secs(5), 0); let public_ip = stun_check.get_public_ip().await.unwrap(); - println!("Public IP: {}", public_ip); + println!("Public IP: {public_ip}"); assert!(!public_ip.is_empty()); } } diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index 5569f6b7..d29e09a4 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -776,7 +776,7 @@ pub async fn execute_command( Console::success(&format!("P2P service started with ID: {peer_id}")); // sleep so that dht is bootstrapped before publishing. - // TOOD: should update p2p service to expose this better (https://github.com/PrimeIntellect-ai/protocol/issues/628) + // TODO: should update p2p service to expose this better (https://github.com/PrimeIntellect-ai/protocol/issues/628) tokio::time::sleep(Duration::from_secs(1)).await; let record_key = p2p::worker_dht_key_with_peer_id(&peer_id); diff --git a/crates/worker/src/docker/taskbridge/bridge.rs b/crates/worker/src/docker/taskbridge/bridge.rs index 4765ef06..594bc62d 100644 --- a/crates/worker/src/docker/taskbridge/bridge.rs +++ b/crates/worker/src/docker/taskbridge/bridge.rs @@ -565,7 +565,7 @@ mod tests { "test_label2": 20.0, }); let sample_metric = serde_json::to_string(&data)?; - debug!("Sending {:?}", sample_metric); + debug!("Sending {sample_metric:?}"); let msg = format!("{}{}", sample_metric, "\n"); stream.write_all(msg.as_bytes()).await?; stream.flush().await?; @@ -616,7 +616,7 @@ mod tests { "output/input_flops": 2500.0, }); let sample_metric = serde_json::to_string(&json)?; - debug!("Sending {:?}", sample_metric); + debug!("Sending {sample_metric:?}"); let msg = format!("{}{}", sample_metric, "\n"); stream.write_all(msg.as_bytes()).await?; stream.flush().await?; @@ -626,8 +626,7 @@ mod tests { let all_metrics = metrics_store.get_all_metrics().await; assert!( all_metrics.is_empty(), - "Expected metrics to be empty but found: {:?}", - all_metrics + "Expected metrics to be empty but found: {all_metrics:?}" ); bridge_handle.abort(); diff --git a/crates/worker/src/services/discovery.rs b/crates/worker/src/services/discovery.rs deleted file mode 100644 index 2088215c..00000000 --- a/crates/worker/src/services/discovery.rs +++ /dev/null @@ -1,113 +0,0 @@ -use anyhow::Result; -use shared::models::node::Node; -use shared::security::request_signer::sign_request_with_nonce; -use shared::web3::wallet::Wallet; - -pub(crate) struct DiscoveryService { - wallet: Wallet, - base_urls: Vec, - endpoint: String, -} - -impl DiscoveryService { - pub(crate) fn new(wallet: Wallet, base_urls: Vec, endpoint: Option) -> Self { - let urls = if base_urls.is_empty() { - vec!["http://localhost:8089".to_string()] - } else { - base_urls - }; - Self { - wallet, - base_urls: urls, - endpoint: endpoint.unwrap_or_else(|| "/api/nodes".to_string()), - } - } - - async fn upload_to_single_discovery(&self, node_config: &Node, base_url: &str) -> Result<()> { - let request_data = serde_json::to_value(node_config)?; - - let signed_request = - sign_request_with_nonce(&self.endpoint, &self.wallet, Some(&request_data)) - .await - .map_err(|e| anyhow::anyhow!("{}", e))?; - - let mut headers = reqwest::header::HeaderMap::new(); - headers.insert( - "x-address", - self.wallet - .wallet - .default_signer() - .address() - .to_string() - .parse() - .unwrap(), - ); - headers.insert("x-signature", signed_request.signature.parse().unwrap()); - let request_url = format!("{}{}", base_url, &self.endpoint); - let client = reqwest::Client::new(); - let response = client - .put(&request_url) - .headers(headers) - .json( - &signed_request - .data - .expect("Signed request data should always be present for discovery upload"), - ) - .send() - .await?; - - if !response.status().is_success() { - let status = response.status(); - let error_text = response - .text() - .await - .unwrap_or_else(|_| "No error message".to_string()); - return Err(anyhow::anyhow!( - "Error: Received response with status code {} from {}: {}", - status, - base_url, - error_text - )); - } - - Ok(()) - } - - pub(crate) async fn upload_discovery_info(&self, node_config: &Node) -> Result<()> { - let mut last_error: Option = None; - - for base_url in &self.base_urls { - match self.upload_to_single_discovery(node_config, base_url).await { - Ok(_) => { - // Successfully uploaded to one discovery service, return immediately - return Ok(()); - } - Err(e) => { - last_error = Some(e.to_string()); - } - } - } - - // If we reach here, all discovery services failed - if let Some(error) = last_error { - Err(anyhow::anyhow!( - "Failed to upload to all discovery services. Last error: {}", - error - )) - } else { - Err(anyhow::anyhow!( - "Failed to upload to all discovery services" - )) - } - } -} - -impl Clone for DiscoveryService { - fn clone(&self) -> Self { - Self { - wallet: self.wallet.clone(), - base_urls: self.base_urls.clone(), - endpoint: self.endpoint.clone(), - } - } -} diff --git a/crates/worker/src/services/discovery_updater.rs b/crates/worker/src/services/discovery_updater.rs deleted file mode 100644 index 148a5f10..00000000 --- a/crates/worker/src/services/discovery_updater.rs +++ /dev/null @@ -1,89 +0,0 @@ -use crate::services::discovery::DiscoveryService; -use crate::state::system_state::SystemState; -use log::{debug, error, info}; -use shared::models::node::Node; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; -use tokio::time::{sleep, Duration}; -use tokio_util::sync::CancellationToken; - -const INITIAL_UPDATE_DELAY: Duration = Duration::from_secs(120); -const UPDATE_INTERVAL: Duration = Duration::from_secs(120); - -pub(crate) struct DiscoveryUpdater { - discovery_service: Arc, - is_running: Arc, - system_state: Arc, - cancellation_token: Arc, -} - -impl DiscoveryUpdater { - pub(crate) fn new(discovery_service: DiscoveryService, system_state: Arc) -> Self { - Self { - discovery_service: Arc::new(discovery_service), - is_running: Arc::new(AtomicBool::new(false)), - system_state, - cancellation_token: Arc::new(CancellationToken::new()), - } - } - - pub(crate) fn start_auto_update(&self, node_config: Node) { - if self.is_running.load(Ordering::SeqCst) { - debug!("Auto update already running, skipping start"); - return; - } - - self.is_running.store(true, Ordering::SeqCst); - let is_running = self.is_running.clone(); - let discovery_service = self.discovery_service.clone(); - let system_state = self.system_state.clone(); - let cancellation_token = self.cancellation_token.clone(); - - tokio::spawn(async move { - debug!("Starting discovery info auto-update task"); - - // Initial delay before first update - tokio::select! { - _ = sleep(INITIAL_UPDATE_DELAY) => {}, - _ = cancellation_token.cancelled() => { - is_running.store(false, Ordering::SeqCst); - return; - } - } - - while is_running.load(Ordering::SeqCst) { - // Check if we're in a compute pool by checking the heartbeat endpoint - let should_update = !system_state.is_running().await; - - if should_update { - if let Err(e) = discovery_service.upload_discovery_info(&node_config).await { - error!("Failed to update discovery info: {e}"); - } else { - info!("Successfully updated discovery info"); - } - } - - // Sleep before next check, but check for cancellation - tokio::select! { - _ = sleep(UPDATE_INTERVAL) => {}, - _ = cancellation_token.cancelled() => { - is_running.store(false, Ordering::SeqCst); - break; - } - } - } - debug!("Discovery info auto-update task finished"); - }); - } -} - -impl Clone for DiscoveryUpdater { - fn clone(&self) -> Self { - Self { - discovery_service: self.discovery_service.clone(), - is_running: self.is_running.clone(), - system_state: self.system_state.clone(), - cancellation_token: self.cancellation_token.clone(), - } - } -} diff --git a/crates/worker/src/services/mod.rs b/crates/worker/src/services/mod.rs deleted file mode 100644 index 82907023..00000000 --- a/crates/worker/src/services/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub(crate) mod discovery; -pub(crate) mod discovery_updater; From aa798aa864337cef92c6579d1d1e59379c5984c7 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Wed, 16 Jul 2025 22:41:41 -0400 Subject: [PATCH 12/14] fmt --- crates/orchestrator/src/plugins/node_groups/tests.rs | 4 +--- crates/shared/src/security/auth_signature_middleware.rs | 3 +-- crates/validator/src/validators/synthetic_data/mod.rs | 4 +--- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/crates/orchestrator/src/plugins/node_groups/tests.rs b/crates/orchestrator/src/plugins/node_groups/tests.rs index 0f3e8433..66565702 100644 --- a/crates/orchestrator/src/plugins/node_groups/tests.rs +++ b/crates/orchestrator/src/plugins/node_groups/tests.rs @@ -276,9 +276,7 @@ async fn test_group_formation_with_multiple_configs() { let _ = plugin.try_form_new_groups().await; let mut conn = plugin.store.client.get_connection().unwrap(); - let groups: Vec = conn - .keys(format!("{GROUP_KEY_PREFIX}*").as_str()) - .unwrap(); + let groups: Vec = conn.keys(format!("{GROUP_KEY_PREFIX}*").as_str()).unwrap(); assert_eq!(groups.len(), 2); // Verify group was created diff --git a/crates/shared/src/security/auth_signature_middleware.rs b/crates/shared/src/security/auth_signature_middleware.rs index 427fceec..8ba7767e 100644 --- a/crates/shared/src/security/auth_signature_middleware.rs +++ b/crates/shared/src/security/auth_signature_middleware.rs @@ -801,8 +801,7 @@ mod tests { // Create multiple addresses let addresses: Vec
= (0..5) .map(|i| { - Address::from_str(&format!("0x{i}000000000000000000000000000000000000000")) - .unwrap() + Address::from_str(&format!("0x{i}000000000000000000000000000000000000000")).unwrap() }) .collect(); diff --git a/crates/validator/src/validators/synthetic_data/mod.rs b/crates/validator/src/validators/synthetic_data/mod.rs index 40b3da0d..bf8ce6e2 100644 --- a/crates/validator/src/validators/synthetic_data/mod.rs +++ b/crates/validator/src/validators/synthetic_data/mod.rs @@ -270,9 +270,7 @@ impl SyntheticDataValidator { let _: () = con .zadd("incomplete_groups", group_key, new_deadline) .await - .map_err(|e| { - Error::msg(format!("Failed to update incomplete group deadline: {e}")) - })?; + .map_err(|e| Error::msg(format!("Failed to update incomplete group deadline: {e}")))?; debug!( "Updated deadline for incomplete group {group_key} to {new_deadline} ({minutes_from_now} minutes from now)" From 6fe25161502e62676869832e391d1623bc51d5cf Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 17 Jul 2025 12:58:32 -0400 Subject: [PATCH 13/14] cleanup --- crates/orchestrator/src/discovery/monitor.rs | 3 +-- crates/validator/src/validator.rs | 2 +- crates/worker/src/cli/command.rs | 2 -- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/crates/orchestrator/src/discovery/monitor.rs b/crates/orchestrator/src/discovery/monitor.rs index 02d66eba..54156371 100644 --- a/crates/orchestrator/src/discovery/monitor.rs +++ b/crates/orchestrator/src/discovery/monitor.rs @@ -3,6 +3,7 @@ use crate::models::node::NodeStatus; use crate::models::node::OrchestratorNode; use crate::plugins::StatusUpdatePlugin; use crate::store::core::StoreContext; +use crate::store::NodeStore; use crate::utils::loop_heartbeats::LoopHeartbeats; use alloy::primitives::Address; use alloy::primitives::U256; @@ -448,8 +449,6 @@ impl DiscoveryMonitor { } } -use crate::store::NodeStore; - async fn enrich_nodes_without_location( node_store: Arc, location_service: LocationService, diff --git a/crates/validator/src/validator.rs b/crates/validator/src/validator.rs index 45902bff..09532ebf 100644 --- a/crates/validator/src/validator.rs +++ b/crates/validator/src/validator.rs @@ -41,7 +41,7 @@ impl ValidatorHealth { } pub struct Validator { - synthetic_validator: Option>, // TOOD: does this need to be optional? + synthetic_validator: Option>, // TODO: does this need to be optional? provider: WalletProvider, contracts: Contracts, hardware_validator: HardwareValidator, diff --git a/crates/worker/src/cli/command.rs b/crates/worker/src/cli/command.rs index d29e09a4..9215b379 100644 --- a/crates/worker/src/cli/command.rs +++ b/crates/worker/src/cli/command.rs @@ -865,8 +865,6 @@ pub async fn execute_command( std::process::exit(1); } - // discovery_updater.start_auto_update(node_config); - if recover_last_state { info!("Recovering from previous state: {recover_last_state}"); heartbeat.activate_heartbeat_if_endpoint_exists().await; From 3dfc29042ed0516f5c2cf908b6ba4e2ef6b87a08 Mon Sep 17 00:00:00 2001 From: elizabeth Date: Thu, 17 Jul 2025 12:59:35 -0400 Subject: [PATCH 14/14] fix tokio features --- crates/dev-utils/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/dev-utils/Cargo.toml b/crates/dev-utils/Cargo.toml index 990bdd75..78410f7f 100644 --- a/crates/dev-utils/Cargo.toml +++ b/crates/dev-utils/Cargo.toml @@ -8,7 +8,7 @@ workspace = true [dependencies] shared = { workspace = true } -tokio = { workspace = true } +tokio = { workspace = true, features = [ "rt-multi-thread" ] } eyre = "0.6" clap = { workspace = true } url = { workspace = true }