diff --git a/Cargo.lock b/Cargo.lock index 5114b3bb3ca..429f220657d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6778,6 +6778,7 @@ dependencies = [ "test-strategy", "thiserror 2.0.17", "tokio", + "trust-quorum-protocol", "tufaceous-artifact", "uuid", "vergen-gitcl", @@ -6871,6 +6872,7 @@ dependencies = [ "term 0.7.0", "thiserror 2.0.17", "tokio", + "trust-quorum-protocol", "tufaceous-artifact", "url", "usdt 0.5.0", @@ -7307,6 +7309,7 @@ dependencies = [ "omicron-uuid-kinds", "omicron-workspace-hack", "pq-sys", + "sled-agent-types", "slog", "slog-error-chain", ] @@ -7528,6 +7531,7 @@ dependencies = [ "serde", "serde_json", "serde_with", + "sled-agent-types", "sled-agent-types-versions", "sled-hardware-types", "slog", @@ -7541,6 +7545,7 @@ dependencies = [ "thiserror 2.0.17", "tokio", "tough", + "trust-quorum-protocol", "tufaceous-artifact", "unicode-width 0.1.14", "update-engine", @@ -11480,6 +11485,7 @@ dependencies = [ "oxide-tokio-rt", "qorb", "serde_json", + "sled-agent-types", "slog", "swrite", "tokio", @@ -13060,6 +13066,7 @@ dependencies = [ "omicron-workspace-hack", "oxnet", "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=3f1752e6cee9a2f8ecdce6e2ad3326781182e2d9)", + "proptest", "rcgen", "schemars 0.8.22", "serde", @@ -13071,6 +13078,7 @@ dependencies = [ "slog-error-chain", "strum 0.27.2", "swrite", + "test-strategy", "thiserror 2.0.17", "toml 0.8.23", "tufaceous-artifact", @@ -14832,6 +14840,7 @@ dependencies = [ "reconfigurator-cli", "reedline", "serde_json", + "sled-agent-types", "slog", "tabled 0.15.0", "trust-quorum-protocol", @@ -15113,6 +15122,7 @@ dependencies = [ "secrecy 0.10.3", "serde", "serde_json", + "sled-agent-types", "sled-hardware-types", "slog", "trust-quorum-protocol", diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 661e3780c98..844422bd79a 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -78,7 +78,6 @@ use nexus_types::internal_api::background::TufArtifactReplicationCounters; use nexus_types::internal_api::background::TufArtifactReplicationRequest; use nexus_types::internal_api::background::TufArtifactReplicationStatus; use nexus_types::internal_api::background::TufRepoPrunerStatus; -use nexus_types::inventory::BaseboardId; use omicron_uuid_kinds::BlueprintUuid; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::DemoSagaUuid; @@ -92,6 +91,7 @@ use quiesce::cmd_nexus_quiesce; use reconfigurator_config::ReconfiguratorConfigArgs; use reconfigurator_config::cmd_nexus_reconfigurator_config; use serde::Deserialize; +use sled_agent_types::sled::BaseboardId; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::BTreeSet; diff --git a/dev-tools/reconfigurator-sp-updater/Cargo.toml b/dev-tools/reconfigurator-sp-updater/Cargo.toml index 7f3008253c1..6d05b527cfd 100644 --- a/dev-tools/reconfigurator-sp-updater/Cargo.toml +++ b/dev-tools/reconfigurator-sp-updater/Cargo.toml @@ -25,6 +25,7 @@ omicron-repl-utils.workspace = true oxide-tokio-rt.workspace = true qorb.workspace = true serde_json.workspace = true +sled-agent-types.workspace = true slog.workspace = true swrite.workspace = true tokio = { workspace = true, features = [ "full" ] } diff --git a/dev-tools/reconfigurator-sp-updater/src/main.rs b/dev-tools/reconfigurator-sp-updater/src/main.rs index 0c04682eb09..3b6b6a3b46a 100644 --- a/dev-tools/reconfigurator-sp-updater/src/main.rs +++ b/dev-tools/reconfigurator-sp-updater/src/main.rs @@ -26,12 +26,12 @@ use nexus_types::deployment::PendingMgsUpdateRotDetails; use nexus_types::deployment::PendingMgsUpdateSpDetails; use nexus_types::deployment::PendingMgsUpdates; use nexus_types::internal_api::views::MgsUpdateDriverStatus; -use nexus_types::inventory::BaseboardId; use nexus_types::inventory::SpType; use omicron_common::disk::M2Slot; use omicron_repl_utils::run_repl_on_stdin; use qorb::resolver::Resolver; use qorb::resolvers::fixed::FixedResolver; +use sled_agent_types::sled::BaseboardId; use slog::{info, o, warn}; use std::collections::BTreeMap; use std::net::SocketAddr; diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml index 6769eb15f19..b48082544b3 100644 --- a/nexus/db-model/Cargo.toml +++ b/nexus/db-model/Cargo.toml @@ -42,6 +42,7 @@ slog-error-chain.workspace = true steno.workspace = true strum.workspace = true thiserror.workspace = true +trust-quorum-protocol.workspace = true tokio.workspace = true uuid.workspace = true diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs index c5d2fedf4f3..9cccc10b1f1 100644 --- a/nexus/db-model/src/deployment.rs +++ b/nexus/db-model/src/deployment.rs @@ -57,7 +57,6 @@ use nexus_types::deployment::{ OmicronZoneExternalFloatingAddr, OmicronZoneExternalFloatingIp, OmicronZoneExternalSnatIp, }; -use nexus_types::inventory::BaseboardId; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use omicron_common::api::internal::shared::NetworkInterface; @@ -69,6 +68,7 @@ use omicron_uuid_kinds::{ PhysicalDiskKind, SledKind, SledUuid, ZpoolKind, ZpoolUuid, }; use sled_agent_types::inventory::OmicronZoneDataset; +use sled_agent_types::sled::BaseboardId; use std::net::{IpAddr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 7af0efad29b..98e70b96964 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -47,9 +47,8 @@ use nexus_db_schema::schema::{ }; use nexus_types::inventory::HostPhase1ActiveSlot; use nexus_types::inventory::{ - BaseboardId, Caboose, CockroachStatus, Collection, - InternalDnsGenerationStatus, NvmeFirmware, PowerState, RotPage, RotSlot, - TimeSync, + Caboose, CockroachStatus, Collection, InternalDnsGenerationStatus, + NvmeFirmware, PowerState, RotPage, RotSlot, TimeSync, }; use omicron_common::api::external; use omicron_common::api::internal::shared::NetworkInterface; @@ -94,6 +93,7 @@ use sled_agent_types::inventory::{ ConfigReconcilerInventoryResult, OmicronSledConfig, OmicronZoneConfig, OmicronZoneDataset, OmicronZoneImageSource, OmicronZoneType, }; +use sled_agent_types::sled::BaseboardId; use std::collections::BTreeSet; use std::net::{IpAddr, SocketAddrV6}; use std::time::Duration; @@ -489,8 +489,8 @@ impl<'a> From<&'a Collection> for InvCollection { } } -/// See [`nexus_types::inventory::BaseboardId`]. -#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +/// See [`sled_agent_types::sled::BaseboardId`]. +#[derive(Queryable, Insertable, Clone, Debug, Selectable, PartialEq, Eq)] #[diesel(table_name = hw_baseboard_id)] pub struct HwBaseboardId { pub id: Uuid, diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 692cf62b839..a72ea624862 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -87,6 +87,7 @@ mod silo_auth_settings; mod switch_interface; mod switch_port; mod target_release; +mod trust_quorum; mod v2p_mapping; mod vmm_state; mod webhook_delivery; @@ -262,6 +263,7 @@ pub use switch::*; pub use switch_interface::*; pub use switch_port::*; pub use target_release::*; +pub use trust_quorum::*; pub use tuf_repo::*; pub use typed_uuid::DbTypedUuid; pub use typed_uuid::to_db_typed_uuid; diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index fff38dc1eb4..d34936eb973 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(215, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(216, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(216, "add-trust-quorum"), KnownVersion::new(215, "support-up-to-12-disks"), KnownVersion::new(214, "separate-transit-ips-by-version"), KnownVersion::new(213, "fm-cases"), diff --git a/nexus/db-model/src/trust_quorum.rs b/nexus/db-model/src/trust_quorum.rs new file mode 100644 index 00000000000..08a38781926 --- /dev/null +++ b/nexus/db-model/src/trust_quorum.rs @@ -0,0 +1,115 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Database representations for trust quorum types + +use super::impl_enum_type; +use crate::SqlU8; +use crate::typed_uuid::DbTypedUuid; +use nexus_db_schema::schema::{ + trust_quorum_configuration, trust_quorum_member, +}; +use nexus_types::trust_quorum::{ + TrustQuorumConfigState, TrustQuorumMemberState, +}; +use omicron_uuid_kinds::RackKind; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +impl_enum_type!( + TrustQuorumConfigurationStateEnum: + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + pub enum DbTrustQuorumConfigurationState; + + // Enum values + Preparing => b"preparing" + PreparingLrtqUpgrade => b"preparing-lrtq-upgrade" + Committing => b"committing" + Committed => b"committed" + Aborted => b"aborted" +); + +impl From for TrustQuorumConfigState { + fn from(value: DbTrustQuorumConfigurationState) -> Self { + match value { + DbTrustQuorumConfigurationState::Preparing => Self::Preparing, + DbTrustQuorumConfigurationState::PreparingLrtqUpgrade => { + Self::PreparingLrtqUpgrade + } + DbTrustQuorumConfigurationState::Committing => Self::Committing, + DbTrustQuorumConfigurationState::Committed => Self::Committed, + DbTrustQuorumConfigurationState::Aborted => Self::Aborted, + } + } +} + +impl From for DbTrustQuorumConfigurationState { + fn from(value: TrustQuorumConfigState) -> Self { + match value { + TrustQuorumConfigState::Preparing => Self::Preparing, + TrustQuorumConfigState::PreparingLrtqUpgrade => { + Self::PreparingLrtqUpgrade + } + TrustQuorumConfigState::Committing => Self::Committing, + TrustQuorumConfigState::Committed => Self::Committed, + TrustQuorumConfigState::Aborted => Self::Aborted, + } + } +} + +impl_enum_type!( + TrustQuorumMemberStateEnum: + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + pub enum DbTrustQuorumMemberState; + + // Enum values + Unacked => b"unacked" + Prepared => b"prepared" + Committed => b"committed" +); + +impl From for TrustQuorumMemberState { + fn from(value: DbTrustQuorumMemberState) -> Self { + match value { + DbTrustQuorumMemberState::Unacked => Self::Unacked, + DbTrustQuorumMemberState::Prepared => Self::Prepared, + DbTrustQuorumMemberState::Committed => Self::Committed, + } + } +} + +impl From for DbTrustQuorumMemberState { + fn from(value: TrustQuorumMemberState) -> Self { + match value { + TrustQuorumMemberState::Unacked => Self::Unacked, + TrustQuorumMemberState::Prepared => Self::Prepared, + TrustQuorumMemberState::Committed => Self::Committed, + } + } +} + +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = trust_quorum_configuration)] +pub struct TrustQuorumConfiguration { + pub rack_id: DbTypedUuid, + pub epoch: i64, + pub state: DbTrustQuorumConfigurationState, + pub threshold: SqlU8, + pub commit_crash_tolerance: SqlU8, + pub coordinator: Uuid, + pub encrypted_rack_secrets_salt: Option, + pub encrypted_rack_secrets: Option>, +} + +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = trust_quorum_member)] +pub struct TrustQuorumMember { + pub rack_id: DbTypedUuid, + pub epoch: i64, + pub hw_baseboard_id: Uuid, + pub state: DbTrustQuorumMemberState, + pub share_digest: Option, +} diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 2f2d43fff3f..e9b16060542 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -56,6 +56,7 @@ swrite.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } tufaceous-artifact.workspace = true +trust-quorum-protocol.workspace = true url.workspace = true usdt.workspace = true uuid.workspace = true diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 67c4e162bc5..c9c494a5624 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -91,7 +91,6 @@ use nexus_types::deployment::PendingMgsUpdateRotBootloaderDetails; use nexus_types::deployment::PendingMgsUpdateRotDetails; use nexus_types::deployment::PendingMgsUpdateSpDetails; use nexus_types::deployment::PendingMgsUpdates; -use nexus_types::inventory::BaseboardId; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::Generation; @@ -105,6 +104,7 @@ use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::TypedUuid; +use sled_agent_types::sled::BaseboardId; use slog::Logger; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; @@ -3097,7 +3097,6 @@ mod tests { use nexus_types::external_api::views::PhysicalDiskState; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledState; - use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; @@ -3116,6 +3115,7 @@ mod tests { use omicron_uuid_kinds::ZpoolUuid; use pretty_assertions::assert_eq; use rand::Rng; + use sled_agent_types::sled::BaseboardId; use std::collections::BTreeSet; use std::mem; use std::net::Ipv6Addr; diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 0542f482c49..5d849235e93 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -85,7 +85,6 @@ use nexus_db_schema::enums::{ CabooseWhichEnum, InvConfigReconcilerStatusKindEnum, }; use nexus_db_schema::enums::{HwPowerStateEnum, InvZoneManifestSourceEnum}; -use nexus_types::inventory::BaseboardId; use nexus_types::inventory::CockroachStatus; use nexus_types::inventory::Collection; use nexus_types::inventory::InternalDnsGenerationStatus; @@ -116,6 +115,7 @@ use sled_agent_types::inventory::MupdateOverrideNonBootInventory; use sled_agent_types::inventory::OmicronSledConfig; use sled_agent_types::inventory::OrphanedDataset; use sled_agent_types::inventory::ZoneArtifactInventory; +use sled_agent_types::sled::BaseboardId; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -2744,12 +2744,11 @@ impl DataStore { public_error_from_diesel(e, ErrorHandler::Server) })?; paginator = p.found_batch(&batch, &|row| row.id); - bbs.extend(batch.into_iter().map(|bb| { - ( - bb.id, - Arc::new(nexus_types::inventory::BaseboardId::from(bb)), - ) - })); + bbs.extend( + batch + .into_iter() + .map(|bb| (bb.id, Arc::new(BaseboardId::from(bb)))), + ); } bbs @@ -4468,7 +4467,7 @@ mod test { use nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::RotPageWhich; - use nexus_types::inventory::{BaseboardId, SpType}; + use nexus_types::inventory::SpType; use omicron_common::api::external::Error; use omicron_common::disk::DatasetKind; use omicron_common::disk::DatasetName; @@ -4491,6 +4490,7 @@ mod test { ConfigReconcilerInventory, ConfigReconcilerInventoryResult, ConfigReconcilerInventoryStatus, OmicronZoneImageSource, }; + use sled_agent_types::sled::BaseboardId; use std::num::NonZeroU32; use std::time::Duration; use tufaceous_artifact::ArtifactHash; diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 88ce4cd8c44..7247dcb9d76 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -116,6 +116,7 @@ mod switch_port; mod target_release; #[cfg(test)] pub(crate) mod test_utils; +mod trust_quorum; pub mod update; mod user_data_export; mod utilization; diff --git a/nexus/db-queries/src/db/datastore/trust_quorum.rs b/nexus/db-queries/src/db/datastore/trust_quorum.rs new file mode 100644 index 00000000000..15cfcd340cb --- /dev/null +++ b/nexus/db-queries/src/db/datastore/trust_quorum.rs @@ -0,0 +1,1644 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Trust quorum related queries + +use super::DataStore; +use crate::authz; +use crate::context::OpContext; +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::prelude::*; +use nexus_db_errors::ErrorHandler; +use nexus_db_errors::OptionalError; +use nexus_db_errors::TransactionError; +use nexus_db_errors::public_error_from_diesel; +use nexus_db_lookup::DbConnection; +use nexus_db_model::DbTrustQuorumConfigurationState; +use nexus_db_model::DbTrustQuorumMemberState; +use nexus_db_model::DbTypedUuid; +use nexus_db_model::HwBaseboardId; +use nexus_db_model::TrustQuorumConfiguration as DbTrustQuorumConfiguration; +use nexus_db_model::TrustQuorumMember as DbTrustQuorumMember; +use nexus_types::trust_quorum::TrustQuorumConfigState; +use nexus_types::trust_quorum::{TrustQuorumConfig, TrustQuorumMemberData}; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::OptionalLookupResult; +use omicron_common::bail_unless; +use omicron_uuid_kinds::RackKind; +use omicron_uuid_kinds::RackUuid; +use sled_agent_types::sled::BaseboardId; +use std::collections::{BTreeMap, BTreeSet}; +use trust_quorum_protocol::{ + EncryptedRackSecrets, Epoch, Salt, Sha3_256Digest, Threshold, +}; + +macro_rules! bail_txn { + ($err:ident, $($arg:tt),*) => { + return Err($err.bail( + omicron_common::api::external::Error::internal_error(&format!( + $($arg),* + )) + .into() + )); + } +} + +fn i64_to_epoch(val: i64) -> Result { + let Ok(epoch) = val.try_into() else { + return Err(Error::internal_error(&format!( + "Failed to convert i64 from database: {val} \ + into trust quroum epoch", + ))); + }; + Ok(Epoch(epoch)) +} + +fn epoch_to_i64(epoch: Epoch) -> Result { + epoch.0.try_into().map_err(|_| { + Error::internal_error(&format!( + "Failed to convert trust quorum epoch to i64 in attempt to insert \ + into database: {epoch}" + )) + }) +} + +impl DataStore { + /// Return a `RackUuid` for each trust quorum along with its latest `Epoch`. + /// + /// For now, since we do not have multirack, and we aren't sure how big + /// those clusters are going to be we return all values and don't bother + /// paginating. The current `SQL_BATCH_SIZE` is also 1000, and it's unlikely + /// that there will ever be more than 1000 racks in a single fleet, sharing + /// a single CRDB cluster. + pub async fn tq_get_all_rack_id_and_latest_epoch( + &self, + opctx: &OpContext, + ) -> Result, Error> { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + let conn = &*self.pool_connection_authorized(opctx).await?; + + use nexus_db_schema::schema::trust_quorum_configuration::dsl; + + let values: Vec<(DbTypedUuid, i64)> = + dsl::trust_quorum_configuration + .select((dsl::rack_id, dsl::epoch)) + .order_by((dsl::rack_id, dsl::epoch.desc())) + .distinct_on(dsl::rack_id) + .load_async(conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + let mut output = BTreeMap::new(); + for (rack_id, epoch) in values { + output.insert(rack_id.into(), i64_to_epoch(epoch)?); + } + + Ok(output) + } + + /// Return any active `RackUuid` for each trust quorum along with its latest + /// `Epoch`. + /// + /// An active trust quorum configuration is one that has not `Committed` + /// or `Aborted`, which means that nexus has more work to do on that + /// configuration. + /// + /// For now, since we do not have multirack, and we aren't sure how big + /// those clusters are going to be we return all values and don't bother + /// paginating. The current `SQL_BATCH_SIZE` is also 1000, and it's unlikely + /// that there will ever be more than 1000 racks in a single fleet, sharing + /// a single CRDB cluster. + pub async fn tq_get_all_active_rack_id_and_latest_epoch( + &self, + opctx: &OpContext, + ) -> ListResultVec<(RackUuid, Epoch)> { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + let conn = &*self.pool_connection_authorized(opctx).await?; + + use nexus_db_schema::schema::trust_quorum_configuration::dsl; + + let values: Vec<(DbTypedUuid, i64)> = + dsl::trust_quorum_configuration + .filter(dsl::state.ne_all(vec![ + DbTrustQuorumConfigurationState::Committed, + DbTrustQuorumConfigurationState::Aborted, + ])) + .select((dsl::rack_id, dsl::epoch)) + .order_by((dsl::rack_id, dsl::epoch.desc())) + .distinct_on(dsl::rack_id) + .load_async(conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + let mut output = Vec::with_capacity(values.len()); + + for (rack_id, epoch) in values { + output.push((rack_id.into(), i64_to_epoch(epoch)?)); + } + + Ok(output) + } + + /// Get the latest trust quorum configuration from the database + pub async fn tq_get_latest_config( + &self, + opctx: &OpContext, + rack_id: RackUuid, + ) -> OptionalLookupResult { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + let conn = &*self.pool_connection_authorized(opctx).await?; + + // First, retrieve our configuration if there is one. + let Some(latest) = + Self::tq_get_latest_config_conn(opctx, conn, rack_id) + .await + .map_err(|err| err.into_public_ignore_retries())? + else { + return Ok(None); + }; + + // Then get any members associated with the configuration + let members = + Self::tq_get_members_conn(opctx, conn, rack_id, latest.epoch) + .await + .map_err(|err| err.into_public_ignore_retries())?; + + let mut tq_members: BTreeMap = + BTreeMap::new(); + let mut coordinator: Option = None; + for (member, hw_baseboard_id) in members { + let digest = if let Some(digest_str) = member.share_digest { + let mut data = [0u8; 32]; + hex::decode_to_slice(&digest_str, &mut data).map_err(|e| { + Error::internal_error(&format!( + "Failed to decode share digest for trust quorum member \ + {}:{} : {e}", + hw_baseboard_id.part_number, + hw_baseboard_id.serial_number + )) + })?; + Some(Sha3_256Digest(data)) + } else { + None + }; + + // The coordinator is always a member of the group + // We pull out its `BaseboardId` here. + if latest.coordinator == hw_baseboard_id.id { + coordinator = Some(hw_baseboard_id.clone().into()); + } + tq_members.insert( + hw_baseboard_id.into(), + TrustQuorumMemberData { state: member.state.into(), digest }, + ); + } + + let salt = if let Some(salt_str) = latest.encrypted_rack_secrets_salt { + let mut data = [0u8; 32]; + hex::decode_to_slice(&salt_str, &mut data).map_err(|e| { + Error::internal_error(&format!( + "Failed to decode salt for trust quorum config: \ + rack_id: {}, epoch: {}: {e}", + latest.rack_id, latest.epoch + )) + })?; + Some(Salt(data)) + } else { + None + }; + + let encrypted_rack_secrets = if let Some(salt) = salt { + let Some(secrets) = latest.encrypted_rack_secrets else { + // This should never happend due to constraint checks + return Err(Error::internal_error(&format!( + "Salt exists, but secrets do not for trust quorum config: \ + rack_id: {}, epoch: {}", + latest.rack_id, latest.epoch + ))); + }; + Some(EncryptedRackSecrets::new(salt, secrets.into_boxed_slice())) + } else { + None + }; + + let Some(coordinator) = coordinator else { + return Err(Error::internal_error(&format!( + "Failed to find coordinator for hw_baseboard_id: {} \ + in trust quorum config.", + latest.coordinator + ))); + }; + + Ok(Some(TrustQuorumConfig { + rack_id: latest.rack_id.into(), + epoch: i64_to_epoch(latest.epoch)?, + state: latest.state.into(), + threshold: Threshold(latest.threshold.into()), + commit_crash_tolerance: latest.commit_crash_tolerance.into(), + coordinator, + encrypted_rack_secrets, + members: tq_members, + })) + } + + /// Insert a new trust quorum configuration, but only if it is equivalent + /// to the highest epoch of the last configuration + 1. + pub async fn tq_insert_latest_config( + &self, + opctx: &OpContext, + config: TrustQuorumConfig, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let conn = &*self.pool_connection_authorized(opctx).await?; + + let err = OptionalError::new(); + + self.transaction_retry_wrapper("tq_insert_latest_config") + .transaction(&conn, |c| { + let err = err.clone(); + let config = config.clone(); + + async move { + let current = Self::tq_get_latest_epoch_in_txn( + opctx, + &c, + config.rack_id, + ) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + + // Some light sanity checking + match config.state { + TrustQuorumConfigState::Preparing + | TrustQuorumConfigState::PreparingLrtqUpgrade => {} + TrustQuorumConfigState::Committing + | TrustQuorumConfigState::Committed + | TrustQuorumConfigState::Aborted => { + let state = config.state; + bail_txn!( + err, + "Cannot insert configuration in state={:?}", + state + ); + } + } + + let is_insertable = if let Some(epoch) = current { + // Only insert if what is in the DB is immediately prior to + // this configuration. + Some(epoch) == config.epoch.previous() + } else { + // We perform an unconditional insert here since + // no existing configuration exists. However, the + // configuration to be inserted is still subject to + // some constraints. + // + // If there is no existing configuration, then the epoch + // to be inserted must be either 1 or 2. It will be 1 if + // this is a new initialization and 2 if this is an LRTQ + // upgrade. Let's check both conditions here and return + // an error if unmet. + match config.state { + TrustQuorumConfigState::Preparing => { + let actual = config.epoch; + let expected = Epoch(1); + if actual != expected { + bail_txn!( + err, + "Failed to insert first TQ + configuration: invalid epoch for \ + state=preparing: Expected {}, Got {}", + expected, + actual + ); + } + } + TrustQuorumConfigState::PreparingLrtqUpgrade => { + let actual = config.epoch; + let expected = Epoch(2); + if actual != expected { + bail_txn!( + err, + "Failed to insert first TQ + configuration: invalid epoch for \ + state=preparing-lrtq-upgrade: \ + Expected {}, Got {}", + expected, + actual + ); + } + } + _ => { + // Already checked abbove + } + } + true + }; + + if !is_insertable { + return Err(err.bail(TransactionError::CustomError( + Error::conflict(format!( + "expected current TQ epoch for rack_id \ + {} to be {:?}, found {:?}", + config.rack_id, + config.epoch.previous(), + current + )), + ))); + } + + Self::insert_tq_config_in_txn(opctx, conn, config) + .await + .map_err(|txn_error| txn_error.into_diesel(&err)) + } + }) + .await + .map_err(|e| match err.take() { + Some(err) => err.into_public_ignore_retries(), + None => public_error_from_diesel(e, ErrorHandler::Server), + }) + } + + /// If this configuration is in the `Preparing` state, then update any + /// members to acknowledge the prepare. + /// + /// Also, update any digests or encrypted rack secrets if necessary. + /// Lastly, if enough members have acked prepares then commit the configuration. + pub async fn tq_update_prepare_status( + &self, + opctx: &OpContext, + config: trust_quorum_protocol::Configuration, + acked_prepares: BTreeSet, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let conn = &*self.pool_connection_authorized(opctx).await?; + + let epoch = epoch_to_i64(config.epoch)?; + let rack_id = config.rack_id; + + let err = OptionalError::new(); + + self.transaction_retry_wrapper("tq_update_prepare_status") + .transaction(&conn, |c| { + let err = err.clone(); + let config = config.clone(); + let acked_prepares = acked_prepares.clone(); + async move { + // First, retrieve our configuration if there is one. + let latest = + Self::tq_get_latest_config_conn(opctx, &c, rack_id) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + + let Some(db_config) = latest else { + bail_txn!( + err, + "No trust quorum config for rack_id {} at epoch {}", + rack_id, + epoch + ); + }; + + if db_config.epoch != epoch { + let actual = db_config.epoch; + bail_txn!( + err, + "Cannot update trust quorum config. \ + Latest epoch does not match. Expected {}, Got {}", + epoch, + actual + ); + } + + // If we aren't preparing, then ignore this call. Multiple + // Nexuses race to completion and we don't want to worry + // about overwriting commits with prepares in the `state` + // field of each member. + if db_config.state != + DbTrustQuorumConfigurationState::Preparing + && db_config.state + != DbTrustQuorumConfigurationState::PreparingLrtqUpgrade + { + let state = db_config.state; + bail_txn!( + err, + "Ignoring stale update of trust quorum prepare \ + status. Expected state = preparing || \ + preparing-lrtq-upgrade, Got {:?}", + state + ); + } + + // Then get any members associated with the configuration + let db_members = Self::tq_get_members_conn( + opctx, + &c, + rack_id, + db_config.epoch, + ) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + + let mut total_acks = 0; + for (mut member, hw_id) in db_members { + let baseboard_id: BaseboardId = hw_id.into(); + + // Set the share digest for the member if we just learned it + if member.share_digest.is_none() { + let Some(digest) = + config.members.get(&baseboard_id) + else { + bail_txn!( + err, + "Cannot update share digest for {}. Not a \ + member of the trust quorum configuration.", + baseboard_id + ); + }; + member.share_digest = Some(hex::encode(digest.0)); + Self::update_tq_member_share_digest_in_txn( + opctx, + conn, + member.clone(), + ) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + } + + // Set the state of this member + if acked_prepares.contains(&baseboard_id) + && member.state == DbTrustQuorumMemberState::Unacked + { + total_acks += 1; + Self::update_tq_member_state_prepared_in_txn( + opctx, + conn, + member.clone(), + ) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + } + + if member.state == DbTrustQuorumMemberState::Prepared { + total_acks += 1; + } + } + + // We only update the configuration in the database if: + // 1. This is the first time we have seen encrypted rack secrets + // 2. We are transitioning from preparing to committed state. + + // Should we write secrets? + if db_config.encrypted_rack_secrets_salt.is_none() + && config.encrypted_rack_secrets.is_some() + { + Self::update_tq_encrypted_rack_secrets_in_txn( + opctx, + conn, + db_config.rack_id, + db_config.epoch, + config.encrypted_rack_secrets.unwrap(), + ) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + } + + // Do we have enough acks to commit? + if total_acks + >= (db_config.threshold.0 + + db_config.commit_crash_tolerance.0) + as usize + { + Self::update_tq_state_committing_in_txn( + opctx, + conn, + db_config.rack_id, + db_config.epoch, + ) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + } + + Ok(()) + } + }) + .await + .map_err(|e| match err.take() { + Some(err) => err.into_public_ignore_retries(), + None => public_error_from_diesel(e, ErrorHandler::Server), + }) + } + + /// If this configuration is in the `Committing` state, then update any + /// members to acknowledge their commit acknowledgements. + pub async fn tq_update_commit_status( + &self, + opctx: &OpContext, + rack_id: RackUuid, + epoch: Epoch, + acked_commits: BTreeSet, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let conn = &*self.pool_connection_authorized(opctx).await?; + + let epoch = epoch_to_i64(epoch)?; + + let err = OptionalError::new(); + + self.transaction_retry_wrapper("tq_update_commit_status") + .transaction(&conn, |c| { + let err = err.clone(); + let acked_commits = acked_commits.clone(); + async move { + // First, retrieve our configuration if there is one. + let latest = + Self::tq_get_latest_config_conn(opctx, &c, rack_id) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + + let Some(db_config) = latest else { + bail_txn!( + err, + "No trust quorum config for rack_id {} at epoch {}", + rack_id, + epoch + ); + }; + + if db_config.epoch != epoch { + let actual = db_config.epoch; + bail_txn!( + err, + "Cannot update trust quorum config. \ + Latest epoch does not match. Expected {}, Got {}", + epoch, + actual + ); + } + + // Nexus should not be retrieving committed acks if the + // configuration is not `Committing`. + if db_config.state + != DbTrustQuorumConfigurationState::Committing + { + let state = db_config.state; + bail_txn!( + err, + "Invalid update of trust quorum commit status. \ + Expected `Committing`, got {:?}", + state + ); + } + + Self::update_tq_members_state_commit_in_txn( + opctx, + conn, + rack_id.into(), + epoch, + acked_commits, + ) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + + // Then get any members associated with the configuration + let db_members = Self::tq_get_members_conn( + opctx, + &c, + rack_id, + db_config.epoch, + ) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + + // If all members have acked their commits then mark the + // configuration as committed. + if db_members.iter().all(|(m, _)| { + m.state == DbTrustQuorumMemberState::Committed + }) { + Self::update_tq_state_committed_in_txn( + opctx, + conn, + db_config.rack_id, + db_config.epoch, + ) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + } + + Ok(()) + } + }) + .await + .map_err(|e| match err.take() { + Some(err) => err.into_public_ignore_retries(), + None => public_error_from_diesel(e, ErrorHandler::Server), + }) + } + + /// Abort the configuration for a trust quorum if `epoch` is the latest per `rack_id` + /// and the configuration has not been committed. + /// + /// This operation returns `Ok(())` if the configuration has already been + /// aborted and it is still the latest configuration. + pub async fn tq_abort_config( + &self, + opctx: &OpContext, + rack_id: RackUuid, + epoch: Epoch, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let conn = &*self.pool_connection_authorized(opctx).await?; + + let epoch = epoch_to_i64(epoch)?; + + let err = OptionalError::new(); + + self.transaction_retry_wrapper("tq_abort_config") + .transaction(&conn, |c| { + let err = err.clone(); + async move { + // First, retrieve our configuration if there is one. + let latest = + Self::tq_get_latest_config_conn(opctx, &c, rack_id) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + + let Some(db_config) = latest else { + bail_txn!( + err, + "No trust quorum config for rack_id {} at epoch {}", + rack_id, + epoch + ); + }; + + if db_config.epoch != epoch { + let actual = db_config.epoch; + bail_txn!( + err, + "Cannot abort trust quorum config. \ + Latest epoch does not match. Expected {}, Got {}", + epoch, + actual + ); + } + + if db_config.state + == DbTrustQuorumConfigurationState::Aborted + { + // Abort is idempotent + return Ok(()); + } + + // If we've already started committing , we can't abort + if db_config.state + == DbTrustQuorumConfigurationState::Committing + || db_config.state + == DbTrustQuorumConfigurationState::Committed + { + let state = db_config.state; + bail_txn!( + err, + "Invalid update of trust quorum abort status. \ + Expected `Preparing`, got `{:?}`", + state + ); + } + + Self::update_tq_abort_state_in_txn( + opctx, + conn, + db_config.rack_id, + db_config.epoch, + ) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + + Ok(()) + } + }) + .await + .map_err(|e| match err.take() { + Some(err) => err.into_public_ignore_retries(), + None => public_error_from_diesel(e, ErrorHandler::Server), + }) + } + + // Unconditional insert that should only run inside a transaction + async fn insert_tq_config_in_txn( + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + config: TrustQuorumConfig, + ) -> Result<(), TransactionError> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + + let members = Self::lookup_hw_baseboard_ids_conn( + opctx, + conn, + config.members.keys().cloned(), + ) + .await?; + + let (salt, secrets) = + config.encrypted_rack_secrets.map_or((None, None), |s| { + (Some(hex::encode(s.salt.0)), Some(s.data.into())) + }); + + // Max of 32 members to search. We could use binary search if we sorted + // the output with an `order_by` in the DB query, or speed up search + // if converted to a map. Neither seems necessary for such a rare + // operation. + let coordinator_id = members.iter().find(|m| { + m.part_number == config.coordinator.part_number + && m.serial_number == config.coordinator.serial_number + }); + bail_unless!( + coordinator_id.is_some(), + "Coordinator: {} is not a member of the trust quorum", + config.coordinator + ); + let coordinator_id = coordinator_id.unwrap().id; + + let epoch = epoch_to_i64(config.epoch) + .map_err(|e| TransactionError::from(e))?; + + // Insert the configuration + use nexus_db_schema::schema::trust_quorum_configuration::dsl; + diesel::insert_into(dsl::trust_quorum_configuration) + .values(DbTrustQuorumConfiguration { + rack_id: config.rack_id.into(), + epoch, + state: config.state.into(), + threshold: config.threshold.0.into(), + commit_crash_tolerance: config.commit_crash_tolerance.into(), + coordinator: coordinator_id, + encrypted_rack_secrets_salt: salt, + encrypted_rack_secrets: secrets, + }) + .execute_async(conn) + .await?; + + // Insert the members + let members: Vec<_> = members + .into_iter() + .map(|m| DbTrustQuorumMember { + rack_id: config.rack_id.into(), + epoch, + hw_baseboard_id: m.id, + state: nexus_db_model::DbTrustQuorumMemberState::Unacked, + share_digest: None, + }) + .collect(); + + use nexus_db_schema::schema::trust_quorum_member::dsl as members_dsl; + diesel::insert_into(members_dsl::trust_quorum_member) + .values(members) + .execute_async(conn) + .await?; + + Ok(()) + } + + async fn update_tq_members_state_commit_in_txn( + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + rack_id: DbTypedUuid, + epoch: i64, + acked_commits: BTreeSet, + ) -> Result<(), TransactionError> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + use nexus_db_schema::schema::trust_quorum_member::dsl; + + let hw_baseboard_ids: Vec<_> = Self::lookup_hw_baseboard_ids_conn( + opctx, + conn, + acked_commits.into_iter(), + ) + .await? + .into_iter() + .map(|hw| hw.id) + .collect(); + + diesel::update(dsl::trust_quorum_member) + .filter(dsl::rack_id.eq(rack_id)) + .filter(dsl::epoch.eq(epoch)) + .filter(dsl::hw_baseboard_id.eq_any(hw_baseboard_ids)) + .filter(dsl::share_digest.is_not_null()) + .filter(dsl::state.eq_any(vec![ + DbTrustQuorumMemberState::Unacked, + DbTrustQuorumMemberState::Prepared, + ])) + .set(dsl::state.eq(DbTrustQuorumMemberState::Committed)) + .execute_async(conn) + .await?; + Ok(()) + } + + async fn update_tq_member_share_digest_in_txn( + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + member: DbTrustQuorumMember, + ) -> Result<(), TransactionError> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + use nexus_db_schema::schema::trust_quorum_member::dsl; + + diesel::update(dsl::trust_quorum_member) + .filter(dsl::rack_id.eq(member.rack_id)) + .filter(dsl::epoch.eq(member.epoch)) + .filter(dsl::hw_baseboard_id.eq(member.hw_baseboard_id)) + .filter(dsl::share_digest.is_null()) + .filter(dsl::state.eq(DbTrustQuorumMemberState::Unacked)) + .set(dsl::share_digest.eq(member.share_digest)) + .execute_async(conn) + .await?; + + Ok(()) + } + + async fn update_tq_member_state_prepared_in_txn( + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + member: DbTrustQuorumMember, + ) -> Result<(), TransactionError> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + use nexus_db_schema::schema::trust_quorum_member::dsl; + + diesel::update(dsl::trust_quorum_member) + .filter(dsl::rack_id.eq(member.rack_id)) + .filter(dsl::epoch.eq(member.epoch)) + .filter(dsl::hw_baseboard_id.eq(member.hw_baseboard_id)) + .filter(dsl::share_digest.is_not_null()) + .filter(dsl::state.eq(DbTrustQuorumMemberState::Unacked)) + .set(dsl::state.eq(DbTrustQuorumMemberState::Prepared)) + .execute_async(conn) + .await?; + + Ok(()) + } + + async fn update_tq_encrypted_rack_secrets_in_txn( + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + rack_id: DbTypedUuid, + epoch: i64, + encrypted_rack_secrets: EncryptedRackSecrets, + ) -> Result<(), TransactionError> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let salt = Some(hex::encode(encrypted_rack_secrets.salt.0)); + let secrets: Option> = Some(encrypted_rack_secrets.data.into()); + + use nexus_db_schema::schema::trust_quorum_configuration::dsl; + + diesel::update(dsl::trust_quorum_configuration) + .filter(dsl::rack_id.eq(rack_id)) + .filter(dsl::epoch.eq(epoch)) + .filter(dsl::encrypted_rack_secrets_salt.is_null()) + .filter(dsl::encrypted_rack_secrets.is_null()) + .set(( + dsl::encrypted_rack_secrets_salt.eq(salt), + dsl::encrypted_rack_secrets.eq(secrets), + )) + .execute_async(conn) + .await?; + + Ok(()) + } + + /// Returns the number of rows update + async fn update_tq_state_committing_in_txn( + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + rack_id: DbTypedUuid, + epoch: i64, + ) -> Result> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + use nexus_db_schema::schema::trust_quorum_configuration::dsl; + + let num_rows_updated = diesel::update(dsl::trust_quorum_configuration) + .filter(dsl::rack_id.eq(rack_id)) + .filter(dsl::epoch.eq(epoch)) + .filter(dsl::state.eq_any(vec![ + DbTrustQuorumConfigurationState::Preparing, + DbTrustQuorumConfigurationState::PreparingLrtqUpgrade, + ])) + .set(dsl::state.eq(DbTrustQuorumConfigurationState::Committing)) + .execute_async(conn) + .await?; + + Ok(num_rows_updated) + } + + /// Returns the number of rows update + async fn update_tq_state_committed_in_txn( + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + rack_id: DbTypedUuid, + epoch: i64, + ) -> Result> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + use nexus_db_schema::schema::trust_quorum_configuration::dsl; + + let num_rows_updated = diesel::update(dsl::trust_quorum_configuration) + .filter(dsl::rack_id.eq(rack_id)) + .filter(dsl::epoch.eq(epoch)) + .filter(dsl::state.eq(DbTrustQuorumConfigurationState::Committing)) + .set(dsl::state.eq(DbTrustQuorumConfigurationState::Committed)) + .execute_async(conn) + .await?; + + Ok(num_rows_updated) + } + + async fn update_tq_abort_state_in_txn( + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + rack_id: DbTypedUuid, + epoch: i64, + ) -> Result<(), TransactionError> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + use nexus_db_schema::schema::trust_quorum_configuration::dsl; + + diesel::update(dsl::trust_quorum_configuration) + .filter(dsl::rack_id.eq(rack_id)) + .filter(dsl::epoch.eq(epoch)) + .filter(dsl::state.eq_any([ + DbTrustQuorumConfigurationState::Preparing, + DbTrustQuorumConfigurationState::PreparingLrtqUpgrade, + ])) + .set(dsl::state.eq(DbTrustQuorumConfigurationState::Aborted)) + .execute_async(conn) + .await?; + + Ok(()) + } + + async fn lookup_hw_baseboard_ids_conn( + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + members: impl Iterator, + ) -> Result, TransactionError> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use nexus_db_schema::schema::hw_baseboard_id::dsl; + + let (parts, serials): (Vec<_>, Vec<_>) = members + .into_iter() + .map(|m| (m.part_number, m.serial_number)) + .collect(); + + dsl::hw_baseboard_id + .filter(dsl::part_number.eq_any(parts)) + .filter(dsl::serial_number.eq_any(serials)) + .select(HwBaseboardId::as_select()) + .load_async(conn) + .await + .map_err(TransactionError::Database) + } + + async fn tq_get_latest_epoch_in_txn( + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + rack_id: RackUuid, + ) -> Result, TransactionError> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use nexus_db_schema::schema::trust_quorum_configuration::dsl; + let Some(latest_epoch) = dsl::trust_quorum_configuration + .filter(dsl::rack_id.eq(DbTypedUuid::::from(rack_id))) + .order_by(dsl::epoch.desc()) + .select(dsl::epoch) + .first_async::(conn) + .await + .optional()? + else { + return Ok(None); + }; + let latest_epoch = i64_to_epoch(latest_epoch)?; + Ok(Some(latest_epoch)) + } + + async fn tq_get_latest_config_conn( + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + rack_id: RackUuid, + ) -> Result, TransactionError> + { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use nexus_db_schema::schema::trust_quorum_configuration::dsl; + + let latest = dsl::trust_quorum_configuration + .filter(dsl::rack_id.eq(DbTypedUuid::::from(rack_id))) + .order_by(dsl::epoch.desc()) + .first_async::(conn) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(latest) + } + + async fn tq_get_members_conn( + opctx: &OpContext, + conn: &async_bb8_diesel::Connection, + rack_id: RackUuid, + epoch: i64, + ) -> Result< + Vec<(DbTrustQuorumMember, HwBaseboardId)>, + TransactionError, + > { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use nexus_db_schema::schema::hw_baseboard_id::dsl as hw_baseboard_id_dsl; + use nexus_db_schema::schema::trust_quorum_member::dsl; + + let members = dsl::trust_quorum_member + .filter(dsl::rack_id.eq(DbTypedUuid::::from(rack_id))) + .filter(dsl::epoch.eq(epoch)) + .inner_join( + hw_baseboard_id_dsl::hw_baseboard_id + .on(hw_baseboard_id_dsl::id.eq(dsl::hw_baseboard_id)), + ) + .select(( + DbTrustQuorumMember::as_select(), + HwBaseboardId::as_select(), + )) + .load_async(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(members) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::pub_test_utils::TestDatabase; + use nexus_db_model::HwBaseboardId; + use nexus_types::trust_quorum::{ + TrustQuorumConfigState, TrustQuorumMemberState, + }; + use omicron_test_utils::dev::test_setup_log; + use omicron_uuid_kinds::RackUuid; + use uuid::Uuid; + + async fn insert_hw_baseboard_ids(db: &TestDatabase) -> Vec { + let (_, datastore) = (db.opctx(), db.datastore()); + let conn = datastore.pool_connection_for_tests().await.unwrap(); + use nexus_db_schema::schema::hw_baseboard_id::dsl; + let hw_baseboard_ids: Vec<_> = (0..10) + .map(|i| HwBaseboardId { + id: Uuid::new_v4(), + part_number: "test-part".to_string(), + serial_number: i.to_string(), + }) + .collect(); + + diesel::insert_into(dsl::hw_baseboard_id) + .values(hw_baseboard_ids.clone()) + .execute_async(&*conn) + .await + .unwrap(); + + hw_baseboard_ids + } + + #[tokio::test] + async fn test_insert_latest_tq_round_trip() { + let logctx = test_setup_log("test_insert_latest_tq_round_trip"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let hw_ids = insert_hw_baseboard_ids(&db).await; + + let rack_id = RackUuid::new_v4(); + + // Create an initial config + let mut config = TrustQuorumConfig { + rack_id, + epoch: Epoch(1), + state: TrustQuorumConfigState::Preparing, + threshold: Threshold((hw_ids.len() / 2 + 1) as u8), + commit_crash_tolerance: 2, + coordinator: hw_ids.first().unwrap().clone().into(), + encrypted_rack_secrets: None, + members: hw_ids + .clone() + .into_iter() + .map(|m| (m.into(), TrustQuorumMemberData::new())) + .collect(), + }; + + // Create a couple of invalid configs andd try to insert them. + // They should return distinct errors. + let bad_config = + TrustQuorumConfig { epoch: Epoch(2), ..config.clone() }; + let e1 = datastore + .tq_insert_latest_config(opctx, bad_config) + .await + .unwrap_err(); + + let bad_config = TrustQuorumConfig { + epoch: Epoch(3), + state: TrustQuorumConfigState::PreparingLrtqUpgrade, + ..config.clone() + }; + let e2 = datastore + .tq_insert_latest_config(opctx, bad_config) + .await + .unwrap_err(); + + let bad_config = TrustQuorumConfig { + state: TrustQuorumConfigState::Committing, + ..config.clone() + }; + let e3 = datastore + .tq_insert_latest_config(opctx, bad_config) + .await + .unwrap_err(); + + assert_ne!(e1, e2); + assert_ne!(e1, e3); + assert_ne!(e2, e3); + + // Insert a valid config and watch it succeed + + datastore.tq_insert_latest_config(opctx, config.clone()).await.unwrap(); + + let read_config = datastore + .tq_get_latest_config(opctx, rack_id) + .await + .expect("no error") + .expect("returned config"); + + assert_eq!(config, read_config); + + // Inserting the same config again should fail + datastore + .tq_insert_latest_config(opctx, config.clone()) + .await + .expect_err("duplicate insert should fail"); + + // Bumping the epoch and inserting should succeed + config.epoch = Epoch(2); + datastore.tq_insert_latest_config(opctx, config.clone()).await.unwrap(); + + let read_config = datastore + .tq_get_latest_config(opctx, rack_id) + .await + .expect("no error") + .expect("returned config"); + + assert_eq!(config, read_config); + + // We should get an error if we try to insert with a coordinator that is + // not part of the membership. + config.epoch = Epoch(3); + let saved_serial = config.coordinator.serial_number.clone(); + config.coordinator.serial_number = "dummy".to_string(); + datastore + .tq_insert_latest_config(opctx, config.clone()) + .await + .expect_err("insert should fail with invalid coordinator"); + + // Restoring the serial number should succeed + config.coordinator.serial_number = saved_serial; + datastore.tq_insert_latest_config(opctx, config.clone()).await.unwrap(); + + let read_config = datastore + .tq_get_latest_config(opctx, rack_id) + .await + .expect("no error") + .expect("returned config"); + + assert_eq!(config, read_config); + + // Incrementing the epoch by more than one should fail + config.epoch = Epoch(5); + datastore + .tq_insert_latest_config(opctx, config.clone()) + .await + .expect_err( + "insert should fail because previous epoch is incorrect", + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_tq_update_prepare_and_commit_normal_case() { + let logctx = + test_setup_log("test_tq_update_prepare_and_commit_normal_case"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let hw_ids = insert_hw_baseboard_ids(&db).await; + + let rack_id = RackUuid::new_v4(); + + // Create an initial config + let config = TrustQuorumConfig { + rack_id, + epoch: Epoch(1), + state: TrustQuorumConfigState::Preparing, + threshold: Threshold((hw_ids.len() / 2 + 1) as u8), + commit_crash_tolerance: 2, + coordinator: hw_ids.first().unwrap().clone().into(), + encrypted_rack_secrets: None, + members: hw_ids + .clone() + .into_iter() + .map(|m| (m.into(), TrustQuorumMemberData::new())) + .collect(), + }; + + datastore.tq_insert_latest_config(opctx, config.clone()).await.unwrap(); + + // A configuration returned from a coordinator is different + let coordinator_config = trust_quorum_protocol::Configuration { + rack_id: config.rack_id, + epoch: config.epoch, + coordinator: hw_ids.first().unwrap().clone().into(), + members: config + .members + .keys() + .cloned() + .map(|id| (id, Sha3_256Digest([0u8; 32]))) + .collect(), + threshold: config.threshold, + encrypted_rack_secrets: None, + }; + + // Ack only the coordinator + datastore + .tq_update_prepare_status( + opctx, + coordinator_config.clone(), + [coordinator_config.coordinator.clone()].into_iter().collect(), + ) + .await + .unwrap(); + + let read_config = datastore + .tq_get_latest_config(opctx, rack_id) + .await + .expect("no error") + .expect("returned config"); + + // Ensure that Nexus has only seen the coordinator ack and that it has + // not yet committed. There should also be no encrypted rack secrets, + // and all members should now have share digests. + assert_eq!(read_config.epoch, config.epoch); + assert_eq!(read_config.state, TrustQuorumConfigState::Preparing); + assert!(read_config.encrypted_rack_secrets.is_none()); + for (id, info) in &read_config.members { + assert!(info.digest.is_some()); + if *id == coordinator_config.coordinator { + assert_eq!(info.state, TrustQuorumMemberState::Prepared); + } else { + assert_eq!(info.state, TrustQuorumMemberState::Unacked); + } + } + + // Ack a threshold of peers. + datastore + .tq_update_prepare_status( + opctx, + coordinator_config.clone(), + coordinator_config + .members + .keys() + .take(config.threshold.0 as usize) + .cloned() + .collect(), + ) + .await + .unwrap(); + + let read_config = datastore + .tq_get_latest_config(opctx, rack_id) + .await + .expect("no error") + .expect("returned config"); + + // We've acked a threshold of nodes, but still should not have committed + // because we haven't yet acked the `commit_crash_tolerance` number of + // nodes in addition. + assert_eq!(read_config.epoch, config.epoch); + assert_eq!(read_config.state, TrustQuorumConfigState::Preparing); + assert!(read_config.encrypted_rack_secrets.is_none()); + assert_eq!( + config.threshold.0 as usize, + read_config + .members + .iter() + .filter( + |(_, info)| info.state == TrustQuorumMemberState::Prepared + ) + .count() + ); + + // Ack an additional `commit_crash_tolerance` of nodes. This should + // trigger a commit. + let acked_prepares = config.threshold.0 as usize + + config.commit_crash_tolerance as usize; + + datastore + .tq_update_prepare_status( + opctx, + coordinator_config.clone(), + coordinator_config + .members + .keys() + .take(acked_prepares) + .cloned() + .collect(), + ) + .await + .unwrap(); + + let read_config = datastore + .tq_get_latest_config(opctx, rack_id) + .await + .expect("no error") + .expect("returned config"); + + // We've acked enough nodes and should have written our status to the DB + assert_eq!(read_config.epoch, config.epoch); + assert_eq!(read_config.state, TrustQuorumConfigState::Committing); + assert!(read_config.encrypted_rack_secrets.is_none()); + assert_eq!( + acked_prepares, + read_config + .members + .iter() + .filter( + |(_, info)| info.state == TrustQuorumMemberState::Prepared + ) + .count() + ); + + // Future prepare acks should fail because we have already committed. + datastore + .tq_update_prepare_status( + opctx, + coordinator_config.clone(), + coordinator_config + .members + .keys() + .take(acked_prepares) + .cloned() + .collect(), + ) + .await + .unwrap_err(); + + // Commit at all nodes + datastore + .tq_update_commit_status( + opctx, + rack_id, + config.epoch, + coordinator_config.members.keys().cloned().collect(), + ) + .await + .unwrap(); + + let read_config = datastore + .tq_get_latest_config(opctx, rack_id) + .await + .expect("no error") + .expect("returned config"); + + assert_eq!(read_config.epoch, config.epoch); + // Now that all nodes have committed, we should see the config state + // change from `Committing` to Committed. + assert_eq!(read_config.state, TrustQuorumConfigState::Committed); + assert!(read_config.encrypted_rack_secrets.is_none()); + assert!( + read_config.members.iter().all( + |(_, info)| info.state == TrustQuorumMemberState::Committed + ) + ); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_tq_abort() { + let logctx = test_setup_log("test_tq_abort"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let hw_ids = insert_hw_baseboard_ids(&db).await; + + let rack_id = RackUuid::new_v4(); + + // Create an initial config + let config = TrustQuorumConfig { + rack_id, + epoch: Epoch(1), + state: TrustQuorumConfigState::Preparing, + threshold: Threshold((hw_ids.len() / 2 + 1) as u8), + commit_crash_tolerance: 2, + coordinator: hw_ids.first().unwrap().clone().into(), + encrypted_rack_secrets: None, + members: hw_ids + .clone() + .into_iter() + .map(|m| (m.into(), TrustQuorumMemberData::new())) + .collect(), + }; + + datastore.tq_insert_latest_config(opctx, config.clone()).await.unwrap(); + + // Aborting should succeed, since we haven't committed + datastore + .tq_abort_config(opctx, config.rack_id, config.epoch) + .await + .unwrap(); + + // Aborting is idempotent + datastore + .tq_abort_config(opctx, config.rack_id, config.epoch) + .await + .unwrap(); + + // Committing will fail to update any rows + // (This is not directly callable from a public API). + { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + let num_rows_updated = + DataStore::update_tq_state_committing_in_txn( + opctx, + &conn, + config.rack_id.into(), + config.epoch.0 as i64, + ) + .await + .unwrap(); + assert_eq!(num_rows_updated, 0); + } + + // A configuration returned from a coordinator is different + let coordinator_config = trust_quorum_protocol::Configuration { + rack_id: config.rack_id, + epoch: config.epoch, + coordinator: hw_ids.first().unwrap().clone().into(), + members: config + .members + .keys() + .cloned() + .map(|id| (id, Sha3_256Digest([0u8; 32]))) + .collect(), + threshold: config.threshold, + encrypted_rack_secrets: None, + }; + + // This is how we actually try to trigger commit operations. This should fail outright. + let acked_prepares = config.threshold.0 as usize + + config.commit_crash_tolerance as usize; + datastore + .tq_update_prepare_status( + opctx, + coordinator_config.clone(), + coordinator_config + .members + .keys() + .take(acked_prepares) + .cloned() + .collect(), + ) + .await + .unwrap_err(); + + // Retrieve the configuration and ensure it is actually aborted + let read_config = datastore + .tq_get_latest_config(opctx, rack_id) + .await + .expect("no error") + .expect("returned config"); + assert_eq!(read_config.state, TrustQuorumConfigState::Aborted); + + // Create a second config + let config2 = TrustQuorumConfig { epoch: Epoch(2), ..config.clone() }; + datastore + .tq_insert_latest_config(opctx, config2.clone()) + .await + .unwrap(); + + // Trying to abort the old config will fail because it's stale + datastore + .tq_abort_config(opctx, config.rack_id, config.epoch) + .await + .unwrap_err(); + + // Commit it + let coordinator_config2 = trust_quorum_protocol::Configuration { + epoch: config2.epoch, + ..coordinator_config + }; + let acked_prepares = config2.threshold.0 as usize + + config2.commit_crash_tolerance as usize; + datastore + .tq_update_prepare_status( + opctx, + coordinator_config2.clone(), + coordinator_config2 + .members + .keys() + .take(acked_prepares) + .cloned() + .collect(), + ) + .await + .unwrap(); + + // Abort of latest config should fail because it has already committed + datastore + .tq_abort_config(opctx, config2.rack_id, config2.epoch) + .await + .unwrap_err(); + + db.terminate().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_get_all_rack_id_and_latest_epoch() { + let logctx = test_setup_log("test_get_all_rack_id_and_latest_epoch"); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let rack_id1 = RackUuid::new_v4(); + let rack_id2 = RackUuid::new_v4(); + let rack_id3 = RackUuid::new_v4(); + + // Create an initial config for 3 diff racks + for rack_id in [rack_id1, rack_id2, rack_id3] { + let hw_ids = insert_hw_baseboard_ids(&db).await; + let config = TrustQuorumConfig { + rack_id, + epoch: Epoch(1), + state: TrustQuorumConfigState::Preparing, + threshold: Threshold((hw_ids.len() / 2 + 1) as u8), + commit_crash_tolerance: 2, + coordinator: hw_ids.first().unwrap().clone().into(), + encrypted_rack_secrets: None, + members: hw_ids + .clone() + .into_iter() + .map(|m| (m.into(), TrustQuorumMemberData::new())) + .collect(), + }; + + datastore + .tq_insert_latest_config(opctx, config.clone()) + .await + .unwrap(); + } + + // Create a second rack config for rack 2 + let hw_ids = insert_hw_baseboard_ids(&db).await; + let config = TrustQuorumConfig { + rack_id: rack_id2, + epoch: Epoch(2), + state: TrustQuorumConfigState::Preparing, + threshold: Threshold((hw_ids.len() / 2 + 1) as u8), + commit_crash_tolerance: 2, + coordinator: hw_ids.first().unwrap().clone().into(), + encrypted_rack_secrets: None, + members: hw_ids + .clone() + .into_iter() + .map(|m| (m.into(), TrustQuorumMemberData::new())) + .collect(), + }; + + datastore.tq_insert_latest_config(opctx, config.clone()).await.unwrap(); + + // Retreive the latest epochs per rack_id + let values = + datastore.tq_get_all_rack_id_and_latest_epoch(opctx).await.unwrap(); + + // We should have retrieved one epoch per rack_id + assert_eq!(values.len(), 3); + + // The epoch should be the latest that exists + for (rack_id, epoch) in values { + if rack_id == rack_id2 { + assert_eq!(epoch, Epoch(2)); + } else { + assert_eq!(epoch, Epoch(1)); + } + } + + db.terminate().await; + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index 2f71eb7ac50..ff59a4810f8 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -99,6 +99,8 @@ define_enums! { SwitchLinkSpeedEnum => "switch_link_speed", SwitchPortGeometryEnum => "switch_port_geometry", TargetReleaseSourceEnum => "target_release_source", + TrustQuorumConfigurationStateEnum => "trust_quorum_configuration_state", + TrustQuorumMemberStateEnum => "trust_quorum_member_state", UpstairsRepairNotificationTypeEnum => "upstairs_repair_notification_type", UpstairsRepairTypeEnum => "upstairs_repair_type", UserDataExportResourceTypeEnum => "user_data_export_resource_type", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 3ef6efb4b0a..55062e9682b 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -2989,3 +2989,29 @@ table! { allow_tables_to_appear_in_same_query!(fm_ereport_in_case, ereport); allow_tables_to_appear_in_same_query!(fm_sitrep, fm_case); + +table! { + trust_quorum_configuration (rack_id, epoch) { + rack_id -> Uuid, + epoch -> Int8, + state -> crate::enums::TrustQuorumConfigurationStateEnum, + threshold -> Int2, + commit_crash_tolerance -> Int2, + coordinator -> Uuid, + encrypted_rack_secrets_salt -> Nullable, + encrypted_rack_secrets -> Nullable, + } +} + +table! { + trust_quorum_member (rack_id, epoch, hw_baseboard_id) { + rack_id -> Uuid, + epoch -> Int8, + hw_baseboard_id -> Uuid, + state -> crate::enums::TrustQuorumMemberStateEnum, + share_digest -> Nullable, + } +} + +allow_tables_to_appear_in_same_query!(trust_quorum_member, hw_baseboard_id); +joinable!(trust_quorum_member -> hw_baseboard_id(hw_baseboard_id)); diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index aeea2588652..e40529eaee1 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -17,7 +17,6 @@ use cockroach_admin_types::NodeId; use gateway_client::types::SpComponentCaboose; use gateway_client::types::SpState; use iddqd::IdOrdMap; -use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Caboose; use nexus_types::inventory::CabooseFound; use nexus_types::inventory::CabooseWhich; @@ -41,6 +40,7 @@ use omicron_common::disk::M2Slot; use omicron_uuid_kinds::CollectionKind; use sled_agent_types::inventory::Baseboard; use sled_agent_types::inventory::Inventory; +use sled_agent_types::sled::BaseboardId; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::hash::Hash; @@ -797,7 +797,6 @@ mod test { use gateway_client::types::SpComponentCaboose; use gateway_client::types::SpState; use gateway_types::rot::RotSlot; - use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Caboose; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::RotPage; @@ -805,6 +804,7 @@ mod test { use nexus_types::inventory::SpType; use omicron_common::api::external::ByteCount; use sled_agent_types::inventory::SledRole; + use sled_agent_types::sled::BaseboardId; // Verify the contents of an empty collection. #[test] diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 27b8696233b..746a882aeea 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -15,7 +15,6 @@ use gateway_client::types::SpComponentCaboose; use gateway_client::types::SpState; use gateway_types::rot::RotSlot; use iddqd::id_ord_map; -use nexus_types::inventory::BaseboardId; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::InternalDnsGenerationStatus; use nexus_types::inventory::RotPage; @@ -53,6 +52,7 @@ use sled_agent_types::inventory::OrphanedDataset; use sled_agent_types::inventory::SledCpuFamily; use sled_agent_types::inventory::SledRole; use sled_agent_types::inventory::ZoneImageResolverInventory; +use sled_agent_types::sled::BaseboardId; use sled_agent_types::zone_images::MeasurementManifestStatus; use sled_agent_types::zone_images::MupdateOverrideNonBootInfo; use sled_agent_types::zone_images::MupdateOverrideNonBootMismatch; diff --git a/nexus/mgs-updates/src/driver.rs b/nexus/mgs-updates/src/driver.rs index 8e51402b43e..0efd59a3594 100644 --- a/nexus/mgs-updates/src/driver.rs +++ b/nexus/mgs-updates/src/driver.rs @@ -24,9 +24,9 @@ use nexus_types::internal_api::views::MgsUpdateDriverStatus; use nexus_types::internal_api::views::UpdateAttemptStatus; use nexus_types::internal_api::views::UpdateCompletedHow; use nexus_types::internal_api::views::WaitingStatus; -use nexus_types::inventory::BaseboardId; use omicron_uuid_kinds::SpUpdateUuid; use qorb::resolver::AllBackends; +use sled_agent_types::sled::BaseboardId; use slog::{error, info, o, warn}; use slog_error_chain::InlineErrorChain; use std::collections::VecDeque; diff --git a/nexus/mgs-updates/src/driver_update.rs b/nexus/mgs-updates/src/driver_update.rs index 349406f7b8b..da49ba9e7a4 100644 --- a/nexus/mgs-updates/src/driver_update.rs +++ b/nexus/mgs-updates/src/driver_update.rs @@ -782,8 +782,8 @@ mod test { use nexus_types::deployment::ExpectedVersion; use nexus_types::internal_api::views::UpdateAttemptStatus; use nexus_types::internal_api::views::UpdateCompletedHow; - use nexus_types::inventory::BaseboardId; use nexus_types::inventory::SpType; + use sled_agent_types::sled::BaseboardId; use slog_error_chain::InlineErrorChain; use std::time::Duration; use tufaceous_artifact::ArtifactHash; diff --git a/nexus/mgs-updates/src/driver_update/test_host_phase_1.rs b/nexus/mgs-updates/src/driver_update/test_host_phase_1.rs index 1db0422bc85..f1eb93fc653 100644 --- a/nexus/mgs-updates/src/driver_update/test_host_phase_1.rs +++ b/nexus/mgs-updates/src/driver_update/test_host_phase_1.rs @@ -15,8 +15,8 @@ use gateway_messages::SpPort; use gateway_test_utils::setup::GatewayTestContext; use nexus_types::internal_api::views::UpdateAttemptStatus; use nexus_types::internal_api::views::UpdateCompletedHow; -use nexus_types::inventory::BaseboardId; use omicron_common::disk::M2Slot; +use sled_agent_types::sled::BaseboardId; use slog_error_chain::InlineErrorChain; use sp_sim::SimulatedSp; use std::time::Duration; diff --git a/nexus/mgs-updates/src/test_util/sp_test_state.rs b/nexus/mgs-updates/src/test_util/sp_test_state.rs index 3b6d87919c5..7138f7f45bf 100644 --- a/nexus/mgs-updates/src/test_util/sp_test_state.rs +++ b/nexus/mgs-updates/src/test_util/sp_test_state.rs @@ -14,8 +14,8 @@ use gateway_types::component::SpType; use gateway_types::rot::RotSlot; use nexus_types::deployment::ExpectedActiveRotSlot; use nexus_types::deployment::ExpectedVersion; -use nexus_types::inventory::BaseboardId; use omicron_common::disk::M2Slot; +use sled_agent_types::sled::BaseboardId; use slog_error_chain::InlineErrorChain; use std::time::Duration; use tufaceous_artifact::ArtifactHash; diff --git a/nexus/mgs-updates/src/test_util/updates.rs b/nexus/mgs-updates/src/test_util/updates.rs index f13ee384daf..92b352351ee 100644 --- a/nexus/mgs-updates/src/test_util/updates.rs +++ b/nexus/mgs-updates/src/test_util/updates.rs @@ -35,9 +35,9 @@ use nexus_types::internal_api::views::InProgressUpdateStatus; use nexus_types::internal_api::views::MgsUpdateDriverStatus; use nexus_types::internal_api::views::UpdateAttemptStatus; use nexus_types::internal_api::views::UpdateCompletedHow; -use nexus_types::inventory::BaseboardId; use omicron_common::disk::M2Slot; use omicron_uuid_kinds::SpUpdateUuid; +use sled_agent_types::sled::BaseboardId; use slog::debug; use slog_error_chain::InlineErrorChain; use std::sync::Arc; diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 217eefd11f2..5c5708afd5c 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -51,7 +51,6 @@ use nexus_types::deployment::UpstreamNtpConfig; use nexus_types::deployment::ZpoolName; use nexus_types::deployment::blueprint_zone_type; use nexus_types::external_api::views::SledState; -use nexus_types::inventory::BaseboardId; use omicron_common::address::CLICKHOUSE_HTTP_PORT; use omicron_common::address::DNS_HTTP_PORT; use omicron_common::address::DNS_PORT; @@ -75,6 +74,7 @@ use omicron_uuid_kinds::ZpoolUuid; use sled_agent_types::inventory::MupdateOverrideBootInventory; use sled_agent_types::inventory::OmicronZoneDataset; use sled_agent_types::inventory::ZoneKind; +use sled_agent_types::sled::BaseboardId; use slog::Logger; use slog::debug; use slog::error; diff --git a/nexus/reconfigurator/planning/src/mgs_updates/host_phase_1.rs b/nexus/reconfigurator/planning/src/mgs_updates/host_phase_1.rs index 43ae62bd272..6304092a490 100644 --- a/nexus/reconfigurator/planning/src/mgs_updates/host_phase_1.rs +++ b/nexus/reconfigurator/planning/src/mgs_updates/host_phase_1.rs @@ -15,13 +15,13 @@ use nexus_types::deployment::PendingMgsUpdate; use nexus_types::deployment::PendingMgsUpdateDetails; use nexus_types::deployment::PendingMgsUpdateHostPhase1Details; use nexus_types::deployment::planning_report::FailedHostOsUpdateReason; -use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; use nexus_types::inventory::SpType; use omicron_common::api::external::TufArtifactMeta; use omicron_common::api::external::TufRepoDescription; use omicron_common::disk::M2Slot; use omicron_uuid_kinds::SledUuid; +use sled_agent_types::sled::BaseboardId; use sled_hardware_types::OxideSled; use slog::Logger; use slog::debug; diff --git a/nexus/reconfigurator/planning/src/mgs_updates/mod.rs b/nexus/reconfigurator/planning/src/mgs_updates/mod.rs index 027cf0a5293..6df72426f70 100644 --- a/nexus/reconfigurator/planning/src/mgs_updates/mod.rs +++ b/nexus/reconfigurator/planning/src/mgs_updates/mod.rs @@ -24,12 +24,12 @@ use nexus_types::deployment::PendingMgsUpdateSpDetails; use nexus_types::deployment::PendingMgsUpdates; use nexus_types::deployment::TargetReleaseDescription; use nexus_types::deployment::planning_report::BlockedMgsUpdate; -use nexus_types::inventory::BaseboardId; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::Collection; use omicron_common::api::external::TufRepoDescription; use omicron_common::disk::M2Slot; use omicron_uuid_kinds::SledUuid; +use sled_agent_types::sled::BaseboardId; use slog::{error, info, warn}; use slog_error_chain::InlineErrorChain; use std::collections::BTreeSet; @@ -795,10 +795,10 @@ mod test { use nexus_types::deployment::planning_report::FailedRotBootloaderUpdateReason; use nexus_types::deployment::planning_report::FailedRotUpdateReason; use nexus_types::deployment::planning_report::FailedSpUpdateReason; - use nexus_types::inventory::BaseboardId; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::SpType; use omicron_test_utils::dev::LogContext; + use sled_agent_types::sled::BaseboardId; use sled_hardware_types::OxideSled; use std::collections::BTreeSet; use std::sync::Arc; diff --git a/nexus/reconfigurator/planning/src/mgs_updates/rot.rs b/nexus/reconfigurator/planning/src/mgs_updates/rot.rs index 4896c23291f..4441e93d3d2 100644 --- a/nexus/reconfigurator/planning/src/mgs_updates/rot.rs +++ b/nexus/reconfigurator/planning/src/mgs_updates/rot.rs @@ -15,10 +15,10 @@ use nexus_types::deployment::PendingMgsUpdate; use nexus_types::deployment::PendingMgsUpdateDetails; use nexus_types::deployment::PendingMgsUpdateRotDetails; use nexus_types::deployment::planning_report::FailedRotUpdateReason; -use nexus_types::inventory::BaseboardId; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::Collection; use omicron_common::api::external::TufRepoDescription; +use sled_agent_types::sled::BaseboardId; use slog::{debug, warn}; use std::sync::Arc; use tufaceous_artifact::ArtifactKind; diff --git a/nexus/reconfigurator/planning/src/mgs_updates/rot_bootloader.rs b/nexus/reconfigurator/planning/src/mgs_updates/rot_bootloader.rs index cd8a4764345..946884f371a 100644 --- a/nexus/reconfigurator/planning/src/mgs_updates/rot_bootloader.rs +++ b/nexus/reconfigurator/planning/src/mgs_updates/rot_bootloader.rs @@ -13,10 +13,10 @@ use nexus_types::deployment::PendingMgsUpdate; use nexus_types::deployment::PendingMgsUpdateDetails; use nexus_types::deployment::PendingMgsUpdateRotBootloaderDetails; use nexus_types::deployment::planning_report::FailedRotBootloaderUpdateReason; -use nexus_types::inventory::BaseboardId; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::Collection; use omicron_common::api::external::TufRepoDescription; +use sled_agent_types::sled::BaseboardId; use slog::{debug, warn}; use std::sync::Arc; use tufaceous_artifact::ArtifactVersion; diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 16de13ea5af..21397b82841 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -36,7 +36,6 @@ use nexus_types::external_api::views::PhysicalDiskState; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::external_api::views::SledState; -use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Caboose; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::PowerState; @@ -74,6 +73,7 @@ use sled_agent_types::inventory::SledCpuFamily; use sled_agent_types::inventory::SledRole; use sled_agent_types::inventory::ZoneImageResolverInventory; use sled_agent_types::inventory::ZoneKind; +use sled_agent_types::sled::BaseboardId; use sled_hardware_types::GIMLET_SLED_MODEL; use std::collections::BTreeMap; use std::collections::BTreeSet; diff --git a/nexus/reconfigurator/preparation/Cargo.toml b/nexus/reconfigurator/preparation/Cargo.toml index d6cec285dea..994b4b50572 100644 --- a/nexus/reconfigurator/preparation/Cargo.toml +++ b/nexus/reconfigurator/preparation/Cargo.toml @@ -19,6 +19,7 @@ omicron-common.workspace = true omicron-uuid-kinds.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" +sled-agent-types.workspace = true slog.workspace = true slog-error-chain.workspace = true diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index 650d3d4f0f6..29691d4eca6 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -37,7 +37,6 @@ use nexus_types::deployment::TufRepoPolicy; use nexus_types::deployment::UnstableReconfiguratorState; use nexus_types::identity::Asset; use nexus_types::identity::Resource; -use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; @@ -54,6 +53,7 @@ use omicron_common::policy::NEXUS_REDUNDANCY; use omicron_common::policy::OXIMETER_REDUNDANCY; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; +use sled_agent_types::sled::BaseboardId; use slog::Logger; use slog::error; use slog_error_chain::InlineErrorChain; diff --git a/nexus/src/app/update.rs b/nexus/src/app/update.rs index e0f1e37c9d0..f6e08037945 100644 --- a/nexus/src/app/update.rs +++ b/nexus/src/app/update.rs @@ -21,12 +21,12 @@ use nexus_types::external_api::shared::TufSignedRootRole; use nexus_types::external_api::views; use nexus_types::identity::Asset; use nexus_types::internal_api::views as internal_views; -use nexus_types::inventory::BaseboardId; use omicron_common::api::external::InternalContext; use omicron_common::api::external::Nullable; use omicron_common::api::external::{DataPageParams, Error}; use omicron_uuid_kinds::{GenericUuid, TufTrustRootUuid}; use semver::Version; +use sled_agent_types::sled::BaseboardId; use std::collections::BTreeMap; use std::iter; use tokio::sync::watch; diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index 973f7d0993f..5d89b89009a 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -41,6 +41,7 @@ schemars = { workspace = true, features = ["chrono", "uuid1", "url"] } serde.workspace = true serde_json.workspace = true serde_with.workspace = true +sled-agent-types.workspace = true sled-hardware-types.workspace = true slog.workspace = true slog-error-chain.workspace = true @@ -51,7 +52,9 @@ test-strategy.workspace = true textwrap.workspace = true thiserror.workspace = true tokio.workspace = true +trust-quorum-protocol.workspace = true tufaceous-artifact.workspace = true + newtype-uuid.workspace = true update-engine.workspace = true unicode-width.workspace = true diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 080b1f3c883..907a9a6d877 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -81,7 +81,6 @@ pub mod planning_report; mod reconfigurator_config; mod zone_type; -use crate::inventory::BaseboardId; use anyhow::anyhow; use anyhow::bail; pub use blueprint_diff::BlueprintDiffSummary; @@ -153,6 +152,7 @@ pub use reconfigurator_config::ReconfiguratorConfigDisplay; pub use reconfigurator_config::ReconfiguratorConfigParam; pub use reconfigurator_config::ReconfiguratorConfigView; pub use reconfigurator_config::ReconfiguratorConfigViewDisplay; +use sled_agent_types::sled::BaseboardId; pub use zone_type::BlueprintZoneType; pub use zone_type::DurableDataset; pub use zone_type::blueprint_zone_type; @@ -2524,8 +2524,8 @@ mod test { use super::PendingMgsUpdateDetails; use super::PendingMgsUpdateSpDetails; use super::PendingMgsUpdates; - use crate::inventory::BaseboardId; use gateway_types::component::SpType; + use sled_agent_types::sled::BaseboardId; use sled_hardware_types::GIMLET_SLED_MODEL; #[test] diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index 3b38e748b6b..065301fbe79 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -17,7 +17,6 @@ use crate::external_api::views::PhysicalDiskState; use crate::external_api::views::SledPolicy; use crate::external_api::views::SledProvisionPolicy; use crate::external_api::views::SledState; -use crate::inventory::BaseboardId; use chrono::DateTime; use chrono::TimeDelta; use chrono::Utc; @@ -42,6 +41,7 @@ use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; +use sled_agent_types::sled::BaseboardId; use sled_agent_types_versions::latest::inventory::ZoneKind; use std::collections::BTreeMap; use std::collections::BTreeSet; diff --git a/nexus/types/src/deployment/planning_report.rs b/nexus/types/src/deployment/planning_report.rs index 2d260f857ed..7f3d551125e 100644 --- a/nexus/types/src/deployment/planning_report.rs +++ b/nexus/types/src/deployment/planning_report.rs @@ -10,7 +10,6 @@ use super::BlueprintZoneImageSource; use super::CockroachDbPreserveDowngrade; use super::PendingMgsUpdates; use super::PlannerConfig; -use crate::inventory::BaseboardId; use crate::inventory::CabooseWhich; use daft::Diffable; @@ -30,7 +29,8 @@ use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; -use sled_agent_types_versions::latest::inventory::ZoneKind; +use sled_agent_types::inventory::ZoneKind; +use sled_agent_types::sled::BaseboardId; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::BTreeSet; diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 6ec29550d2f..33fc3545c51 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -27,6 +27,7 @@ use serde::{ Deserialize, Deserializer, Serialize, Serializer, de::{self, Visitor}, }; +use sled_agent_types::sled::BaseboardId; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::num::NonZeroU32; @@ -81,6 +82,12 @@ pub struct UninitializedSledId { pub part: String, } +impl From for BaseboardId { + fn from(value: UninitializedSledId) -> Self { + BaseboardId { part_number: value.part, serial_number: value.serial } + } +} + path_param!(AffinityGroupPath, affinity_group, "affinity group"); path_param!(AntiAffinityGroupPath, anti_affinity_group, "anti affinity group"); path_param!(MulticastGroupPath, multicast_group, "multicast group"); diff --git a/nexus/types/src/external_api/shared.rs b/nexus/types/src/external_api/shared.rs index 6af5b2dc6e8..4ad69f1e2fc 100644 --- a/nexus/types/src/external_api/shared.rs +++ b/nexus/types/src/external_api/shared.rs @@ -23,6 +23,7 @@ use serde::Deserialize; use serde::Deserializer; use serde::Serialize; use serde::de::Error as _; +use sled_agent_types::sled::BaseboardId; use slog_error_chain::InlineErrorChain; use strum::EnumIter; use uuid::Uuid; @@ -356,6 +357,12 @@ pub struct Baseboard { pub revision: u32, } +impl From for BaseboardId { + fn from(value: crate::external_api::shared::Baseboard) -> Self { + BaseboardId { part_number: value.part, serial_number: value.serial } + } +} + /// A sled that has not been added to an initialized rack yet #[derive( Clone, diff --git a/nexus/types/src/internal_api/views.rs b/nexus/types/src/internal_api/views.rs index e9ed52ad728..b7a15f7a5ff 100644 --- a/nexus/types/src/internal_api/views.rs +++ b/nexus/types/src/internal_api/views.rs @@ -4,7 +4,6 @@ use crate::deployment::PendingMgsUpdate; use crate::deployment::TargetReleaseDescription; -use crate::inventory::BaseboardId; use crate::inventory::CabooseWhich; use crate::inventory::Collection; use crate::quiesce::SagaQuiesceStatus; @@ -31,11 +30,12 @@ use schemars::JsonSchema; use semver::Version; use serde::Deserialize; use serde::Serialize; -use sled_agent_types_versions::latest::inventory::BootPartitionContents; -use sled_agent_types_versions::latest::inventory::BootPartitionDetails; -use sled_agent_types_versions::latest::inventory::ConfigReconcilerInventoryResult; -use sled_agent_types_versions::latest::inventory::OmicronZoneImageSource; -use sled_agent_types_versions::latest::inventory::OmicronZoneType; +use sled_agent_types::inventory::BootPartitionContents; +use sled_agent_types::inventory::BootPartitionDetails; +use sled_agent_types::inventory::ConfigReconcilerInventoryResult; +use sled_agent_types::inventory::OmicronZoneImageSource; +use sled_agent_types::inventory::OmicronZoneType; +use sled_agent_types::sled::BaseboardId; use std::collections::BTreeMap; use std::collections::VecDeque; use std::fmt::Display; @@ -1261,9 +1261,9 @@ mod test { use crate::deployment::PendingMgsUpdateDetails; use crate::deployment::PendingMgsUpdateSpDetails; use crate::internal_api::views::UpdateAttemptStatus; - use crate::inventory::BaseboardId; use chrono::Utc; use gateway_types::component::SpType; + use sled_agent_types::sled::BaseboardId; use std::collections::VecDeque; use std::sync::Arc; use std::time::Instant; diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 1a3b5ae5c0e..4bdd5ce35ef 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -10,7 +10,6 @@ //! convenient to separate these concerns.) use crate::external_api::params::PhysicalDiskKind; -use crate::external_api::params::UninitializedSledId; use chrono::DateTime; use chrono::Utc; use clickhouse_admin_types::keeper::ClickhouseKeeperClusterMembership; @@ -36,6 +35,7 @@ use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use serde_with::serde_as; +use sled_agent_types::sled::BaseboardId; use sled_agent_types_versions::latest::inventory::ConfigReconcilerInventory; use sled_agent_types_versions::latest::inventory::ConfigReconcilerInventoryResult; use sled_agent_types_versions::latest::inventory::ConfigReconcilerInventoryStatus; @@ -293,69 +293,6 @@ impl Collection { } } -/// A unique baseboard id found during a collection -/// -/// Baseboard ids are the keys used to link up information from disparate -/// sources (like a service processor and a sled agent). -/// -/// These are normalized in the database. Each distinct baseboard id is -/// assigned a uuid and shared across the many possible collections that -/// reference it. -/// -/// Usually, the part number and serial number are combined with a revision -/// number. We do not include that here. If we ever did find a baseboard with -/// the same part number and serial number but a new revision number, we'd want -/// to treat that as the same baseboard as one with a different revision number. -#[derive( - Clone, - Debug, - Diffable, - Ord, - Eq, - Hash, - PartialOrd, - PartialEq, - Deserialize, - Serialize, - JsonSchema, -)] -#[cfg_attr(test, derive(test_strategy::Arbitrary))] -pub struct BaseboardId { - /// Oxide Part Number - pub part_number: String, - /// Serial number (unique for a given part number) - pub serial_number: String, -} - -impl std::fmt::Display for BaseboardId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}:{}", self.part_number, self.serial_number) - } -} - -impl From for BaseboardId { - fn from(value: crate::external_api::shared::Baseboard) -> Self { - BaseboardId { part_number: value.part, serial_number: value.serial } - } -} - -impl From for BaseboardId { - fn from(value: UninitializedSledId) -> Self { - BaseboardId { part_number: value.part, serial_number: value.serial } - } -} - -impl slog::KV for BaseboardId { - fn serialize( - &self, - _record: &slog::Record, - serializer: &mut dyn slog::Serializer, - ) -> slog::Result { - serializer.emit_str("part_number".into(), &self.part_number)?; - serializer.emit_str("serial_number".into(), &self.serial_number) - } -} - /// Caboose contents found during a collection /// /// These are normalized in the database. Each distinct `Caboose` is assigned a diff --git a/nexus/types/src/lib.rs b/nexus/types/src/lib.rs index fd1f2243bfa..c9b203134a9 100644 --- a/nexus/types/src/lib.rs +++ b/nexus/types/src/lib.rs @@ -38,3 +38,4 @@ pub mod internal_api; pub mod inventory; pub mod quiesce; pub mod silo; +pub mod trust_quorum; diff --git a/nexus/types/src/trust_quorum.rs b/nexus/types/src/trust_quorum.rs new file mode 100644 index 00000000000..90bd2519672 --- /dev/null +++ b/nexus/types/src/trust_quorum.rs @@ -0,0 +1,59 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types describing the state of trust quorum in Nexus + +use std::collections::BTreeMap; + +use omicron_uuid_kinds::RackUuid; +use sled_agent_types::sled::BaseboardId; +use trust_quorum_protocol::{ + EncryptedRackSecrets, Epoch, Sha3_256Digest, Threshold, +}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum TrustQuorumConfigState { + Preparing, + PreparingLrtqUpgrade, + Committing, + Committed, + Aborted, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum TrustQuorumMemberState { + Unacked, + Prepared, + Committed, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TrustQuorumMemberData { + pub state: TrustQuorumMemberState, + + // Only filled in once the coordinator state is succesfully polled by nexus + // after it has created the configuration. + pub digest: Option, +} + +impl TrustQuorumMemberData { + pub fn new() -> Self { + TrustQuorumMemberData { + state: TrustQuorumMemberState::Unacked, + digest: None, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TrustQuorumConfig { + pub rack_id: RackUuid, + pub epoch: Epoch, + pub state: TrustQuorumConfigState, + pub threshold: Threshold, + pub commit_crash_tolerance: u8, + pub coordinator: BaseboardId, + pub encrypted_rack_secrets: Option, + pub members: BTreeMap, +} diff --git a/openapi/nexus-lockstep.json b/openapi/nexus-lockstep.json index 2f9724cdb93..6ec654d753d 100644 --- a/openapi/nexus-lockstep.json +++ b/openapi/nexus-lockstep.json @@ -1557,7 +1557,7 @@ ] }, "BaseboardId": { - "description": "A unique baseboard id found during a collection\n\nBaseboard ids are the keys used to link up information from disparate sources (like a service processor and a sled agent).\n\nThese are normalized in the database. Each distinct baseboard id is assigned a uuid and shared across the many possible collections that reference it.\n\nUsually, the part number and serial number are combined with a revision number. We do not include that here. If we ever did find a baseboard with the same part number and serial number but a new revision number, we'd want to treat that as the same baseboard as one with a different revision number.", + "description": "A representation of a Baseboard ID as used in the inventory subsystem This type is essentially the same as a `Baseboard` except it doesn't have a revision or HW type (Gimlet, PC, Unknown).", "type": "object", "properties": { "part_number": { diff --git a/schema/crdb/add-trust-quorum/up01.sql b/schema/crdb/add-trust-quorum/up01.sql new file mode 100644 index 00000000000..bc58d91556e --- /dev/null +++ b/schema/crdb/add-trust-quorum/up01.sql @@ -0,0 +1,18 @@ +-- The state of a given trust quorum configuration +CREATE TYPE IF NOT EXISTS omicron.public.trust_quorum_configuration_state AS ENUM ( + -- Nexus is waiting for prepare acknowledgments by polling the coordinator + -- In this case, a normal trust quorum reconfiguration is being prepared + 'preparing', + -- Nexus is waiting for prepare acknowledgments by polling the coordinator + -- In this case, an LRTQ upgrade is being prepared. + 'preparing-lrtq-upgrade', + -- The configuration has committed to the dataabase, and nexus may still be + -- trying to inform nodes about the commit. + 'committing', + -- All nodes in the trust quorum have committed the configuration and nexus + -- has no more work to do. + 'committed', + -- The configuration has aborted and will not commit. The epoch can be + -- skipped. + 'aborted' +); diff --git a/schema/crdb/add-trust-quorum/up02.sql b/schema/crdb/add-trust-quorum/up02.sql new file mode 100644 index 00000000000..a0d179b7edf --- /dev/null +++ b/schema/crdb/add-trust-quorum/up02.sql @@ -0,0 +1,10 @@ +-- Whether a node has prepared or committed yet +CREATE TYPE IF NOT EXISTS omicron.public.trust_quorum_member_state AS ENUM ( + -- The node has not acknowledged either a `Prepare` or `Commit` message + 'unacked', + -- The node has acknoweledged a `Prepare` message + 'prepared', + -- The node has acknowledged a `Commit` or `PrepareAndCommit` message + -- `committed` implies `prepared` + 'committed' +); diff --git a/schema/crdb/add-trust-quorum/up03.sql b/schema/crdb/add-trust-quorum/up03.sql new file mode 100644 index 00000000000..a75e9ee2213 --- /dev/null +++ b/schema/crdb/add-trust-quorum/up03.sql @@ -0,0 +1,53 @@ +-- Information for tracking trust quorum memberships over time +CREATE TABLE IF NOT EXISTS omicron.public.trust_quorum_configuration ( + -- Foreign key into the rack table + rack_id UUID NOT NULL, + + -- Monotonically increasing version per rack_id + epoch INT8 NOT NULL, + + -- The current state of this configuration + state omicron.public.trust_quorum_configuration_state NOT NULL, + + -- The number of shares needed to compute the rack secret + -- + -- In some documentation we call this the `K` parameter. + threshold INT2 NOT NULL CHECK (threshold > 0), + + -- The number of additional nodes beyond threshold to commit + -- + -- This represents the number of prepared nodes that can be offline after + -- a commit at Nexus and still allow the secret to be reconstructed during + -- rack unlock. If this number is equivalent to the total membership (`N`) + -- minus `threshold` nodes, then all nodes in the membership set for this + -- epoch must ack a prepare for a commit to occur. By varying this value we + -- allow commit to occur even if some nodes haven't prepared, thus providing + -- fault tolerance during the prepare phase and also during unlock. + -- + -- In some documentation we call this the `Z` parameter. + commit_crash_tolerance INT2 NOT NULL CHECK (commit_crash_tolerance >= 0), + + -- Which member is coordinating the prepare phase of the protocol this epoch + -- Foreign key into the `hw_baseboard_id` table + coordinator UUID NOT NULL, + + -- Encrypted rack secrets for prior committed epochs + -- + -- These are only filled in during a reconfiguration and retrieved + -- during the prepare phase of the protocol by Nexus from the coordinator. + -- + -- Salt is a hex-encoded string + encrypted_rack_secrets_salt String(64), + encrypted_rack_secrets BYTES, + + CONSTRAINT encrypted_rack_secrets_both_or_neither_null CHECK ( + (encrypted_rack_secrets_salt IS NULL + AND encrypted_rack_secrets IS NULL) + OR + (encrypted_rack_secrets_salt IS NOT NULL + AND encrypted_rack_secrets IS NOT NULL) + ), + + -- Each rack has its own trust quorum + PRIMARY KEY (rack_id, epoch) +); diff --git a/schema/crdb/add-trust-quorum/up04.sql b/schema/crdb/add-trust-quorum/up04.sql new file mode 100644 index 00000000000..99aa5c9fcb1 --- /dev/null +++ b/schema/crdb/add-trust-quorum/up04.sql @@ -0,0 +1,24 @@ +-- Total group membership in trust quorum for a given epoch +CREATE TABLE IF NOT EXISTS omicron.public.trust_quorum_member ( + -- Foreign key into the rack table + -- Foreign key into the `trust_quorum_configuration` table along with `epoch` + rack_id UUID NOT NULL, + + -- Foreign key into the `trust_quorum_configuration` table along with `rack_id` + epoch INT8 NOT NULL, + + -- Foreign key into the `hw_baseboard_id` table + hw_baseboard_id UUID NOT NULL, + + -- Whether a node has acknowledged a prepare or commit yet + state omicron.public.trust_quorum_member_state NOT NULL, + + -- The sha3-256 hash of the key share for this node. This is only filled in + -- after Nexus has retrieved the configuration from the coordinator during + -- the prepare phase of the protocol. + -- + -- Hex formatted string + share_digest STRING(64), + + PRIMARY KEY (rack_id, epoch, hw_baseboard_id) +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 548bba8b50b..f95063455d7 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -7469,6 +7469,117 @@ ON WHERE time_deleted IS NULL; +-- The state of a given trust quorum configuration +CREATE TYPE IF NOT EXISTS omicron.public.trust_quorum_configuration_state AS ENUM ( + -- Nexus is waiting for prepare acknowledgments by polling the coordinator + -- In this case, a normal trust quorum reconfiguration is being prepared + 'preparing', + -- Nexus is waiting for prepare acknowledgments by polling the coordinator + -- In this case, an LRTQ upgrade is being prepared. + 'preparing-lrtq-upgrade', + -- The configuration has committed to the dataabase, and nexus may still be + -- trying to inform nodes about the commit. + 'committing', + -- All nodes in the trust quorum have committed the configuration and nexus + -- has no more work to do. + 'committed', + -- The configuration has aborted and will not commit. The epoch can be + -- skipped. + 'aborted' +); + +-- Information for tracking trust quorum memberships over time +CREATE TABLE IF NOT EXISTS omicron.public.trust_quorum_configuration ( + -- Foreign key into the rack table + rack_id UUID NOT NULL, + + -- Monotonically increasing version per rack_id + epoch INT8 NOT NULL, + + -- The current state of this configuration + state omicron.public.trust_quorum_configuration_state NOT NULL, + + -- The number of shares needed to compute the rack secret + -- + -- In some documentation we call this the `K` parameter. + threshold INT2 NOT NULL CHECK (threshold > 0), + + -- The number of additional nodes beyond threshold to commit + -- + -- This represents the number of prepared nodes that can be offline after + -- a commit at Nexus and still allow the secret to be reconstructed during + -- rack unlock. If this number is equivalent to the total membership (`N`) + -- minus `threshold` nodes, then all nodes in the membership set for this + -- epoch must ack a prepare for a commit to occur. By varying this value we + -- allow commit to occur even if some nodes haven't prepared, thus providing + -- fault tolerance during the prepare phase and also during unlock. + -- + -- In some documentation we call this the `Z` parameter. + commit_crash_tolerance INT2 NOT NULL CHECK (commit_crash_tolerance >= 0), + + -- Which member is coordinating the prepare phase of the protocol this epoch + -- Foreign key into the `hw_baseboard_id` table + coordinator UUID NOT NULL, + + -- Encrypted rack secrets for prior committed epochs + -- + -- These are only filled in during a reconfiguration and retrieved + -- during the prepare phase of the protocol by Nexus from the coordinator. + -- + -- Salt is a hex-encoded string + -- TODO: Add a check constraint that both are null or not null + encrypted_rack_secrets_salt STRING(64), + encrypted_rack_secrets BYTES, + + CONSTRAINT encrypted_rack_secrets_both_or_neither_null CHECK ( + (encrypted_rack_secrets_salt IS NULL + AND encrypted_rack_secrets IS NULL) + OR + (encrypted_rack_secrets_salt IS NOT NULL + AND encrypted_rack_secrets IS NOT NULL) + ), + + -- Each rack has its own trust quorum + PRIMARY KEY (rack_id, epoch) +); + +-- Whether a node has prepared or committed yet +CREATE TYPE IF NOT EXISTS omicron.public.trust_quorum_member_state AS ENUM ( + -- The node has not acknowledged either a `Prepare` or `Commit` message + 'unacked', + -- The node has acknoweledged a `Prepare` message + 'prepared', + -- The node has acknowledged a `Commit` or `PrepareAndCommit` message + -- `committed` implies `prepared` + 'committed' +); + +-- Total group membership in trust quorum for a given epoch +CREATE TABLE IF NOT EXISTS omicron.public.trust_quorum_member ( + -- Foreign key into the rack table + -- Foreign key into the `trust_quorum_configuration` table along with `epoch` + rack_id UUID NOT NULL, + + -- Foreign key into the `trust_quorum_configuration` table along with `rack_id` + epoch INT8 NOT NULL, + + -- Foreign key into the `hw_baseboard_id` table + hw_baseboard_id UUID NOT NULL, + + -- Whether a node has acknowledged a prepare or commit yet + state omicron.public.trust_quorum_member_state NOT NULL, + + -- The sha3-256 hash of the key share for this node. This is only filled in + -- after Nexus has retrieved the configuration from the coordinator during + -- the prepare phase of the protocol. + -- + -- Hex formatted string + share_digest STRING(64), + + PRIMARY KEY (rack_id, epoch, hw_baseboard_id) +); + + -- Keep this at the end of file so that the database does not contain a version -- until it is fully populated. INSERT INTO omicron.public.db_metadata ( @@ -7478,7 +7589,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '215.0.0', NULL) + (TRUE, NOW(), NOW(), '216.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/sled-agent/types/Cargo.toml b/sled-agent/types/Cargo.toml index 18e8cef9022..1418ade449a 100644 --- a/sled-agent/types/Cargo.toml +++ b/sled-agent/types/Cargo.toml @@ -20,6 +20,7 @@ omicron-uuid-kinds.workspace = true omicron-workspace-hack.workspace = true oxnet.workspace = true propolis-client.workspace = true +proptest = { workspace = true, optional = true } schemars.workspace = true serde.workspace = true serde_human_bytes.workspace = true @@ -30,6 +31,7 @@ slog.workspace = true slog-error-chain.workspace = true strum.workspace = true swrite.workspace = true +test-strategy = { workspace = true, optional = true } thiserror.workspace = true toml.workspace = true tufaceous-artifact.workspace = true @@ -38,4 +40,9 @@ uuid.workspace = true [dev-dependencies] camino-tempfile.workspace = true omicron-test-utils.workspace = true +proptest.workspace = true rcgen.workspace = true +test-strategy.workspace = true + +[features] +testing = ["proptest", "test-strategy"] diff --git a/sled-agent/types/versions/src/impls/sled.rs b/sled-agent/types/versions/src/impls/sled.rs index e86d55b2fb8..13e10cafd11 100644 --- a/sled-agent/types/versions/src/impls/sled.rs +++ b/sled-agent/types/versions/src/impls/sled.rs @@ -9,7 +9,7 @@ use std::net::{Ipv6Addr, SocketAddrV6}; use omicron_common::address; use sha3::{Digest, Sha3_256}; -use crate::latest::sled::StartSledAgentRequest; +use crate::latest::sled::{BaseboardId, StartSledAgentRequest}; impl StartSledAgentRequest { /// Returns the sled's address. @@ -34,3 +34,14 @@ impl StartSledAgentRequest { .unwrap() } } + +impl slog::KV for BaseboardId { + fn serialize( + &self, + _record: &slog::Record, + serializer: &mut dyn slog::Serializer, + ) -> slog::Result { + serializer.emit_str("part_number".into(), &self.part_number)?; + serializer.emit_str("serial_number".into(), &self.serial_number) + } +} diff --git a/sled-agent/types/versions/src/initial/sled.rs b/sled-agent/types/versions/src/initial/sled.rs index 8d2fda2e707..37f35fb4d6f 100644 --- a/sled-agent/types/versions/src/initial/sled.rs +++ b/sled-agent/types/versions/src/initial/sled.rs @@ -30,6 +30,7 @@ use uuid::Uuid; Diffable, )] #[daft(leaf)] +#[cfg_attr(any(test, feature = "testing"), derive(test_strategy::Arbitrary))] pub struct BaseboardId { /// Oxide Part Number pub part_number: String, diff --git a/trust-quorum/protocol/src/crypto.rs b/trust-quorum/protocol/src/crypto.rs index 84ba89c4691..e56eb143cd1 100644 --- a/trust-quorum/protocol/src/crypto.rs +++ b/trust-quorum/protocol/src/crypto.rs @@ -306,8 +306,8 @@ impl Default for Salt { pub struct EncryptedRackSecrets { /// A random value used to derive the key to encrypt the rack secrets for /// prior committed epochs. - salt: Salt, - data: Box<[u8]>, + pub salt: Salt, + pub data: Box<[u8]>, } #[derive( diff --git a/trust-quorum/protocol/src/lib.rs b/trust-quorum/protocol/src/lib.rs index 44f0d75379c..a380f56415d 100644 --- a/trust-quorum/protocol/src/lib.rs +++ b/trust-quorum/protocol/src/lib.rs @@ -9,12 +9,11 @@ //! All persistent state and all networking is managed outside of this //! implementation. -use crypto::Sha3_256Digest; use daft::Diffable; use derive_more::Display; use gfss::shamir::Share; use serde::{Deserialize, Serialize}; -pub use sled_agent_types::sled::BaseboardId; +use sled_agent_types::sled::BaseboardId; use slog::{Logger, error, warn}; mod alarm; @@ -42,7 +41,10 @@ pub use validators::{ }; pub use alarm::Alarm; -pub use crypto::{RackSecret, ReconstructedRackSecret}; +pub use crypto::{ + EncryptedRackSecrets, RackSecret, ReconstructedRackSecret, Salt, + Sha3_256Digest, +}; pub use messages::*; pub use node::{CommitError, Node, NodeDiff, PrepareAndCommitError}; // public only for docs. @@ -73,6 +75,10 @@ impl Epoch { pub fn next(&self) -> Epoch { Epoch(self.0.checked_add(1).expect("fewer than 2^64 epochs")) } + + pub fn previous(&self) -> Option { + self.0.checked_sub(1).map(Epoch) + } } /// The number of shares required to reconstruct the rack secret diff --git a/trust-quorum/protocol/tests/cluster.rs b/trust-quorum/protocol/tests/cluster.rs index e1b14994dd4..d52fda94a86 100644 --- a/trust-quorum/protocol/tests/cluster.rs +++ b/trust-quorum/protocol/tests/cluster.rs @@ -12,12 +12,12 @@ use proptest::collection::{btree_set, size_range}; use proptest::prelude::*; use proptest::sample::Selector; use secrecy::ExposeSecret; +use sled_agent_types::sled::BaseboardId; use slog::{Logger, info, o}; use std::collections::BTreeSet; use test_strategy::{Arbitrary, proptest}; use trust_quorum_protocol::{ - BaseboardId, CoordinatorOperation, Epoch, NodeCallerCtx, NodeCommonCtx, - Threshold, + CoordinatorOperation, Epoch, NodeCallerCtx, NodeCommonCtx, Threshold, }; use trust_quorum_test_utils::TqState; use trust_quorum_test_utils::{ diff --git a/trust-quorum/src/connection_manager.rs b/trust-quorum/src/connection_manager.rs index aa06cc9a351..1230bfe456b 100644 --- a/trust-quorum/src/connection_manager.rs +++ b/trust-quorum/src/connection_manager.rs @@ -6,7 +6,8 @@ use crate::established_conn::EstablishedConn; use crate::proxy; -use trust_quorum_protocol::{BaseboardId, Envelope, PeerMsg}; +use sled_agent_types::sled::BaseboardId; +use trust_quorum_protocol::{Envelope, PeerMsg}; // TODO: Move to this crate // https://github.com/oxidecomputer/omicron/issues/9311 diff --git a/trust-quorum/src/established_conn.rs b/trust-quorum/src/established_conn.rs index 8e38245ace4..785b956b81a 100644 --- a/trust-quorum/src/established_conn.rs +++ b/trust-quorum/src/established_conn.rs @@ -7,6 +7,7 @@ use crate::{ConnToMainMsg, ConnToMainMsgInner, MainToConnMsg, WireMsg}; use bytes::Buf; use serde::Serialize; +use sled_agent_types::sled::BaseboardId; use slog::{Logger, debug, error, o, warn}; use slog_error_chain::SlogInlineError; use std::collections::VecDeque; @@ -17,7 +18,6 @@ use tokio::net::TcpStream; use tokio::sync::mpsc; use tokio::task; use tokio::time::{Instant, MissedTickBehavior, interval}; -use trust_quorum_protocol::BaseboardId; /// Max buffer size of a connection const CONN_BUF_SIZE: usize = 1024 * 1024; diff --git a/trust-quorum/src/proxy.rs b/trust-quorum/src/proxy.rs index c44bcd59b91..9a51c7c149d 100644 --- a/trust-quorum/src/proxy.rs +++ b/trust-quorum/src/proxy.rs @@ -21,11 +21,12 @@ use derive_more::From; use iddqd::{IdHashItem, IdHashMap, id_upcast}; use omicron_uuid_kinds::RackUuid; use serde::{Deserialize, Serialize}; +use sled_agent_types::sled::BaseboardId; use slog_error_chain::{InlineErrorChain, SlogInlineError}; use tokio::sync::{mpsc, oneshot}; use tokio::task; use trust_quorum_protocol::{ - BaseboardId, CommitError, Configuration, Epoch, PrepareAndCommitError, + CommitError, Configuration, Epoch, PrepareAndCommitError, }; use uuid::Uuid; diff --git a/trust-quorum/src/task.rs b/trust-quorum/src/task.rs index e14776755b1..21da97e74ed 100644 --- a/trust-quorum/src/task.rs +++ b/trust-quorum/src/task.rs @@ -14,6 +14,7 @@ use crate::proxy; use camino::Utf8PathBuf; use omicron_uuid_kinds::RackUuid; use serde::{Deserialize, Serialize}; +use sled_agent_types::sled::BaseboardId; use slog::{Logger, debug, error, info, o, warn}; use slog_error_chain::SlogInlineError; use sprockets_tls::keys::SprocketsConfig; @@ -24,7 +25,7 @@ use tokio::sync::mpsc::error::SendError; use tokio::sync::oneshot::error::RecvError; use tokio::sync::{mpsc, oneshot}; use trust_quorum_protocol::{ - Alarm, BaseboardId, CommitError, Configuration, Epoch, ExpungedMetadata, + Alarm, CommitError, Configuration, Epoch, ExpungedMetadata, LoadRackSecretError, LrtqUpgradeError, LrtqUpgradeMsg, Node, NodeCallerCtx, NodeCommonCtx, NodeCtx, PersistentState, PrepareAndCommitError, ReconfigurationError, ReconfigureMsg, ReconstructedRackSecret, diff --git a/trust-quorum/test-utils/Cargo.toml b/trust-quorum/test-utils/Cargo.toml index ca028342402..b288139096f 100644 --- a/trust-quorum/test-utils/Cargo.toml +++ b/trust-quorum/test-utils/Cargo.toml @@ -18,6 +18,7 @@ secrecy.workspace = true serde.workspace = true serde_json.workspace = true sled-hardware-types.workspace = true +sled-agent-types.workspace = true slog.workspace = true trust-quorum-protocol = { workspace = true, features = ["danger_partial_eq_ct_wrapper", "testing"] } diff --git a/trust-quorum/test-utils/src/event.rs b/trust-quorum/test-utils/src/event.rs index 8bead3c53e2..6d07788749d 100644 --- a/trust-quorum/test-utils/src/event.rs +++ b/trust-quorum/test-utils/src/event.rs @@ -6,8 +6,9 @@ use crate::nexus::{NexusConfig, NexusReply}; use serde::{Deserialize, Serialize}; +use sled_agent_types::sled::BaseboardId; use std::collections::BTreeSet; -use trust_quorum_protocol::{BaseboardId, Envelope, Epoch}; +use trust_quorum_protocol::{Envelope, Epoch}; /// An event that can be fed into our system under test (SUT) /// diff --git a/trust-quorum/test-utils/src/lib.rs b/trust-quorum/test-utils/src/lib.rs index 9bfffdde256..509cdd3c193 100644 --- a/trust-quorum/test-utils/src/lib.rs +++ b/trust-quorum/test-utils/src/lib.rs @@ -13,7 +13,7 @@ pub use event::Event; pub use event_log::EventLog; pub use state::TqState; -use trust_quorum_protocol::BaseboardId; +use sled_agent_types::sled::BaseboardId; /// All possible members used in a test pub fn member_universe(size: usize) -> Vec { diff --git a/trust-quorum/test-utils/src/nexus.rs b/trust-quorum/test-utils/src/nexus.rs index c2665f37870..33869bf02fb 100644 --- a/trust-quorum/test-utils/src/nexus.rs +++ b/trust-quorum/test-utils/src/nexus.rs @@ -9,10 +9,9 @@ use iddqd::id_ord_map::RefMut; use iddqd::{IdOrdItem, IdOrdMap, id_upcast}; use omicron_uuid_kinds::RackUuid; use serde::{Deserialize, Serialize}; +use sled_agent_types::sled::BaseboardId; use std::collections::BTreeSet; -use trust_quorum_protocol::{ - BaseboardId, Epoch, LrtqUpgradeMsg, ReconfigureMsg, Threshold, -}; +use trust_quorum_protocol::{Epoch, LrtqUpgradeMsg, ReconfigureMsg, Threshold}; // The operational state of nexus for a given configuration #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Diffable)] diff --git a/trust-quorum/test-utils/src/state.rs b/trust-quorum/test-utils/src/state.rs index 3c1b31e5a32..62b00451971 100644 --- a/trust-quorum/test-utils/src/state.rs +++ b/trust-quorum/test-utils/src/state.rs @@ -13,15 +13,16 @@ use daft::{BTreeMapDiff, BTreeSetDiff, Diffable, Leaf}; use iddqd::IdOrdMap; use omicron_uuid_kinds::GenericUuid; use secrecy::ExposeSecretMut; +use sled_agent_types::sled::BaseboardId; use sled_hardware_types::Baseboard; use slog::{Logger, info}; use std::collections::{BTreeMap, BTreeSet}; use std::fmt::Display; use trust_quorum_protocol::{ - BaseboardId, Configuration, CoordinatingMsg, CoordinatorOperation, - CoordinatorStateDiff, Envelope, Epoch, LoadRackSecretError, Node, - NodeCallerCtx, NodeCommonCtx, NodeCtx, NodeCtxDiff, NodeDiff, PeerMsgKind, - PersistentState, ValidatedLrtqUpgradeMsgDiff, ValidatedReconfigureMsgDiff, + Configuration, CoordinatingMsg, CoordinatorOperation, CoordinatorStateDiff, + Envelope, Epoch, LoadRackSecretError, Node, NodeCallerCtx, NodeCommonCtx, + NodeCtx, NodeCtxDiff, NodeDiff, PeerMsgKind, PersistentState, + ValidatedLrtqUpgradeMsgDiff, ValidatedReconfigureMsgDiff, }; // The state of our entire system including the system under test and diff --git a/trust-quorum/tqdb/Cargo.toml b/trust-quorum/tqdb/Cargo.toml index 401f2a2cb3b..bb0efdda0e9 100644 --- a/trust-quorum/tqdb/Cargo.toml +++ b/trust-quorum/tqdb/Cargo.toml @@ -18,6 +18,7 @@ omicron-repl-utils.workspace = true reedline.workspace = true reconfigurator-cli.workspace = true serde_json.workspace = true +sled-agent-types.workspace = true slog.workspace = true tabled.workspace = true trust-quorum-protocol = { workspace = true, features = ["danger_partial_eq_ct_wrapper"] } diff --git a/trust-quorum/tqdb/src/bin/tqdb/main.rs b/trust-quorum/tqdb/src/bin/tqdb/main.rs index 12e163f801b..e17037c2761 100644 --- a/trust-quorum/tqdb/src/bin/tqdb/main.rs +++ b/trust-quorum/tqdb/src/bin/tqdb/main.rs @@ -18,13 +18,13 @@ use reedline::{ FileBackedHistory, KeyCode, KeyModifiers, MenuBuilder, Reedline, ReedlineEvent, default_emacs_keybindings, }; +use sled_agent_types::sled::BaseboardId; use slog::{Logger, o}; use std::collections::{BTreeMap, BTreeSet}; use std::fmt::Write; use std::fs; use std::io::IsTerminal; use tabled::Tabled; -use trust_quorum_protocol::BaseboardId; use trust_quorum_test_utils::{Event, TqState}; fn main() -> Result<(), anyhow::Error> {