Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/arcane-infra/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
tokio = { version = "1", features = ["rt-multi-thread", "net", "sync", "macros"], optional = true }
tokio-tungstenite = { version = "0.21", optional = true }
tracing = "0.1"
uuid = { version = "1.0", features = ["v4"] }

[features]
Expand Down
197 changes: 181 additions & 16 deletions crates/arcane-infra/src/cluster_manager.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! ClusterManager (IN-01) — central coordinator.

use arcane_core::{
clustering_model::{ClusterInfo, PlayerInfo, WorldStateView},
clustering_model::{ClusterDecision, ClusterInfo, DecisionType, PlayerInfo, WorldStateView},
types::Vec2,
IClusteringModel, IServerPool, ServerHandle,
};
Expand All @@ -12,13 +12,42 @@ use std::collections::HashMap;
use std::sync::Arc;
use uuid::Uuid;

/// Guardrails for decision execution. All thresholds are per-cycle or per-pair.
#[derive(Clone, Debug)]
pub struct ExecutionConfig {
/// Minimum model confidence to execute a decision (0.0–1.0).
pub min_confidence: f32,
/// Ticks to suppress further merges involving the surviving cluster after a merge.
pub merge_cooldown_ticks: u32,
/// Ticks to suppress further splits involving either resulting cluster after a split.
pub split_cooldown_ticks: u32,
/// Maximum decisions executed per evaluation cycle (merge + split combined).
pub max_per_cycle: usize,
}

impl Default for ExecutionConfig {
fn default() -> Self {
Self {
min_confidence: 0.7,
merge_cooldown_ticks: 20,
split_cooldown_ticks: 30,
max_per_cycle: 3,
}
}
}

/// Central coordinator: assignments, topology, clustering model.
pub struct ClusterManager {
model: Arc<dyn IClusteringModel>,
pool: Arc<dyn IServerPool>,
spatial_index: SpatialIndex,
/// Allocated cluster servers. active_count = allocated_servers.len().
allocated_servers: Vec<ServerHandle>,
/// cluster_id → ServerHandle. One entry per live cluster server.
servers: HashMap<Uuid, ServerHandle>,
exec_config: ExecutionConfig,
/// cluster_id → remaining cooldown ticks after a merge.
merge_cooldowns: HashMap<Uuid, u32>,
/// cluster_id → remaining cooldown ticks after a split.
split_cooldowns: HashMap<Uuid, u32>,
}

impl ClusterManager {
Expand All @@ -31,7 +60,10 @@ impl ClusterManager {
model,
pool,
spatial_index,
allocated_servers: Vec::new(),
servers: HashMap::new(),
exec_config: ExecutionConfig::default(),
merge_cooldowns: HashMap::new(),
split_cooldowns: HashMap::new(),
}
}

Expand All @@ -44,6 +76,12 @@ impl ClusterManager {
)
}

/// Override the execution config (for tests or custom deployments).
pub fn with_exec_config(mut self, config: ExecutionConfig) -> Self {
self.exec_config = config;
self
}

/// Create with a named clustering model. Supported values: "rules" (default), "affinity".
/// The "affinity" variant requires the `affinity-clustering` feature flag.
pub fn with_model(model_type: &str) -> Self {
Expand Down Expand Up @@ -79,6 +117,15 @@ impl ClusterManager {
/// Run one evaluation cycle: build view from spatial snapshot, run model, apply decisions.
/// Without SpacetimeDB we allocate from pool when we have clusters (entities) and no servers yet.
pub fn run_evaluation_cycle(&mut self) -> Result<(), String> {
self.merge_cooldowns
.values_mut()
.for_each(|v| *v = v.saturating_sub(1));
self.merge_cooldowns.retain(|_, v| *v > 0);
self.split_cooldowns
.values_mut()
.for_each(|v| *v = v.saturating_sub(1));
self.split_cooldowns.retain(|_, v| *v > 0);

let snapshot = self.spatial_index.snapshot_for_view();
if snapshot.is_empty() {
return Ok(());
Expand Down Expand Up @@ -126,26 +173,144 @@ impl ClusterManager {
clusters,
players,
};
let _decisions = self.model.evaluate(&view);
// Minimal apply: if we have clusters in the world and no servers allocated, allocate one.
if !self.allocated_servers.is_empty() {
// Bootstrap: allocate a server for each cluster that doesn't have one yet.
// Key = cluster_id so execute_merge/split can look up and release by cluster_id.
let unserved: Vec<Uuid> = self
.spatial_index
.snapshot_for_view()
.into_iter()
.map(|g| g.cluster_id)
.filter(|id| !self.servers.contains_key(id))
.collect();
for cluster_id in unserved {
match self.pool.allocate() {
Ok(handle) => {
self.servers.insert(cluster_id, handle);
}
Err(e) => {
return Err(format!(
"pool allocate failed: {} - {}",
e.code as u32, e.detail
));
}
}
}

let decisions = self.model.evaluate(&view);
let max = self.exec_config.max_per_cycle;
let mut executed = 0;
for decision in decisions.iter() {
if executed >= max {
break;
}
let result = match decision.decision_type {
DecisionType::Merge => self.execute_merge(decision),
DecisionType::Split => self.execute_split(decision),
};
match result {
Ok(()) => executed += 1,
Err(e) => tracing::warn!("decision execution error: {}", e),
}
}
Ok(())
}

/// Execute a Merge decision: migrate all entities from source to target, release source server.
/// Returns Ok(()) for skipped decisions (confidence/cooldown) and Err only for malformed input.
fn execute_merge(&mut self, decision: &ClusterDecision) -> Result<(), String> {
let source = decision
.source_cluster_id
.ok_or("merge decision missing source_cluster_id")?;
let target = decision
.target_cluster_id
.ok_or("merge decision missing target_cluster_id")?;

if decision.confidence < self.exec_config.min_confidence {
return Ok(());
}
if self.merge_cooldowns.contains_key(&source)
|| self.merge_cooldowns.contains_key(&target)
{
return Ok(());
}

self.spatial_index.reassign_cluster(source, target);
if let Err(e) = self.pool.release(source) {
tracing::warn!("merge: pool.release({}) failed: {}", source, e.detail);
}
self.servers.remove(&source);
if self.exec_config.merge_cooldown_ticks > 0 {
self.merge_cooldowns
.insert(target, self.exec_config.merge_cooldown_ticks);
}
Ok(())
}

/// Execute a Split decision: allocate a new server, migrate group_b entities to it.
/// Returns Ok(()) for skipped decisions (confidence/cooldown/pool exhaustion) and Err only for
/// malformed input.
fn execute_split(&mut self, decision: &ClusterDecision) -> Result<(), String> {
let cluster = decision
.cluster_id
.ok_or("split decision missing cluster_id")?;
let group_b = decision
.split_group_b
.as_ref()
.ok_or("split decision missing split_group_b")?;

if group_b.is_empty() {
return Err("split_group_b is empty".to_string());
}
if decision
.split_group_a
.as_ref()
.map_or(true, |g| g.is_empty())
{
return Err("split_group_a is empty".to_string());
}
if decision.confidence < self.exec_config.min_confidence {
return Ok(());
}
match self.pool.allocate() {
Ok(handle) => {
self.allocated_servers.push(handle);
Ok(())
if self.split_cooldowns.contains_key(&cluster) {
return Ok(());
}

let new_handle = match self.pool.allocate() {
Ok(h) => h,
Err(e) => {
tracing::warn!("split: pool exhausted, skipping: {}", e.detail);
return Ok(());
}
Err(e) => Err(format!(
"pool allocate failed: {} - {}",
e.code as u32, e.detail
)),
};
let new_cluster_id = new_handle.server_id;
self.servers.insert(new_cluster_id, new_handle);

// Snapshot current positions before mutating the index.
let positions: HashMap<Uuid, arcane_core::Vec3> = self
.spatial_index
.snapshot_entities()
.into_iter()
.map(|(eid, _, pos)| (eid, pos))
.collect();

for &entity_id in group_b {
if let Some(&pos) = positions.get(&entity_id) {
self.spatial_index
.update_entity(entity_id, new_cluster_id, pos);
}
}

let cooldown = self.exec_config.split_cooldown_ticks;
if cooldown > 0 {
self.split_cooldowns.insert(cluster, cooldown);
self.split_cooldowns.insert(new_cluster_id, cooldown);
}
Ok(())
}

/// Current number of active clusters (for tests / metrics).
pub fn active_cluster_count(&self) -> u32 {
self.allocated_servers.len() as u32
self.servers.len() as u32
}

/// Snapshot of cluster geometry from the spatial index (for visualization / debugging).
Expand Down
2 changes: 1 addition & 1 deletion crates/arcane-infra/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pub mod ws_server;
#[cfg(feature = "cluster-ws")]
pub use arcane_core::cluster_simulation::{ClusterSimulation, ClusterTickContext, GameAction};

pub use cluster_manager::ClusterManager;
pub use cluster_manager::{ClusterManager, ExecutionConfig};
pub use cluster_server::ClusterServer;
pub use redis_channel::RedisReplicationChannel;
pub use replication_channel_manager::ReplicationChannelManager;
Expand Down
Loading
Loading