From 65b174411d6094ee798c0a516c6f7b5491f59ba3 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 1 Oct 2025 12:18:38 -0700 Subject: [PATCH 1/2] add timestamps to db_metadata_nexus records for debugging handoff --- nexus/db-model/src/db_metadata.rs | 20 ++++++++++++++++ nexus/db-model/src/schema_versions.rs | 3 ++- .../src/db/datastore/db_metadata.rs | 23 +++++++++++++++++-- nexus/db-schema/src/schema.rs | 3 +++ nexus/src/app/quiesce.rs | 16 +++++++++---- schema/crdb/db-metadata-timestamps/up.sql | 4 ++++ schema/crdb/dbinit.sql | 4 ++++ 7 files changed, 66 insertions(+), 7 deletions(-) create mode 100644 schema/crdb/db-metadata-timestamps/up.sql diff --git a/nexus/db-model/src/db_metadata.rs b/nexus/db-model/src/db_metadata.rs index 91e7fa92a94..5519dcde6a1 100644 --- a/nexus/db-model/src/db_metadata.rs +++ b/nexus/db-model/src/db_metadata.rs @@ -71,14 +71,26 @@ pub struct DbMetadataNexus { nexus_id: DbTypedUuid, last_drained_blueprint_id: Option>, state: DbMetadataNexusState, + time_row_created: Option>, + time_quiesced: Option>, + time_active: Option>, } impl DbMetadataNexus { pub fn new(nexus_id: OmicronZoneUuid, state: DbMetadataNexusState) -> Self { + let now = Utc::now(); + let (time_active, time_quiesced) = match state { + DbMetadataNexusState::Active => (Some(now), None), + DbMetadataNexusState::Quiesced => (None, Some(now)), + DbMetadataNexusState::NotYet => (None, None), + }; Self { nexus_id: nexus_id.into(), last_drained_blueprint_id: None, state, + time_row_created: Some(now), + time_quiesced, + time_active, } } @@ -93,4 +105,12 @@ impl DbMetadataNexus { pub fn last_drained_blueprint_id(&self) -> Option { self.last_drained_blueprint_id.map(|id| id.into()) } + + pub fn time_active(&self) -> Option> { + self.time_active + } + + pub fn time_quiesced(&self) -> Option> { + self.time_quiesced + } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index a20ab1d4600..0fac02532a3 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(194, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(195, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(195, "db-metadata-timestamps"), KnownVersion::new(194, "tuf-pruned"), KnownVersion::new(193, "nexus-lockstep-port"), KnownVersion::new(192, "blueprint-source"), diff --git a/nexus/db-queries/src/db/datastore/db_metadata.rs b/nexus/db-queries/src/db/datastore/db_metadata.rs index 32c26200965..0250641b0e5 100644 --- a/nexus/db-queries/src/db/datastore/db_metadata.rs +++ b/nexus/db-queries/src/db/datastore/db_metadata.rs @@ -768,7 +768,11 @@ impl DataStore { let nexus_id = nexus_db_model::to_db_typed_uuid(nexus_id); let count = diesel::update(dsl::db_metadata_nexus) .filter(dsl::nexus_id.eq(nexus_id)) - .set(dsl::state.eq(DbMetadataNexusState::Quiesced)) + .filter(dsl::state.ne(DbMetadataNexusState::Quiesced)) + .set(( + dsl::state.eq(DbMetadataNexusState::Quiesced), + dsl::time_quiesced.eq(Utc::now()), + )) .execute_async(&*conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; @@ -1017,7 +1021,10 @@ impl DataStore { // Update all "not_yet" records to "active" diesel::update(dsl::db_metadata_nexus) .filter(dsl::state.eq(DbMetadataNexusState::NotYet)) - .set(dsl::state.eq(DbMetadataNexusState::Active)) + .set(( + dsl::state.eq(DbMetadataNexusState::Active), + dsl::time_active.eq(Utc::now()), + )) .execute_async(&conn) .await?; @@ -1750,7 +1757,9 @@ mod test { assert_eq!(nexus3_before.state(), DbMetadataNexusState::Quiesced); // Attempt handoff with nexus2 - should succeed + let before = Utc::now(); let result = datastore.attempt_handoff(nexus2_id).await; + let after = Utc::now(); if let Err(ref e) = result { panic!("Handoff should succeed but got error: {}", e); } @@ -1775,7 +1784,17 @@ mod test { .expect("nexus3 should exist"); assert_eq!(nexus1_after.state(), DbMetadataNexusState::Active); + let nexus1_after_active_time = nexus1_after + .time_active() + .expect("active record should have time_active"); + assert!(nexus1_after_active_time >= before); + assert!(nexus1_after_active_time <= after); assert_eq!(nexus2_after.state(), DbMetadataNexusState::Active); + let nexus2_after_active_time = nexus2_after + .time_active() + .expect("active record should have time_active"); + assert!(nexus2_after_active_time >= before); + assert!(nexus2_after_active_time <= after); // Should remain unchanged assert_eq!(nexus3_after.state(), DbMetadataNexusState::Quiesced); diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index ad191dfe067..47fa1a7852e 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -2398,6 +2398,9 @@ table! { nexus_id -> Uuid, last_drained_blueprint_id -> Nullable, state -> crate::enums::DbMetadataNexusStateEnum, + time_row_created -> Nullable, + time_quiesced -> Nullable, + time_active -> Nullable, } } diff --git a/nexus/src/app/quiesce.rs b/nexus/src/app/quiesce.rs index adffc9ab7a6..68d35e35373 100644 --- a/nexus/src/app/quiesce.rs +++ b/nexus/src/app/quiesce.rs @@ -988,10 +988,12 @@ mod test { .expect("reading access records"); assert_eq!(records.len(), 3); assert!( - records.iter().all(|r| r.state() == DbMetadataNexusState::Active) + records.iter().all(|r| r.state() == DbMetadataNexusState::Active + && r.time_quiesced().is_none()) ); // Now finish that saga. All three handles should quiesce. + let time_before = Utc::now(); drop(saga_ref); wait_for_condition( || async { @@ -1009,6 +1011,7 @@ mod test { ) .await .expect("did not quiesce within timeout"); + let time_after = Utc::now(); // Each "Nexus" record should say that it's quiesced. // @@ -1023,9 +1026,14 @@ mod test { .await .expect("reading access records"); assert_eq!(records.len(), 3); - assert!( - records.iter().all(|r| r.state() == DbMetadataNexusState::Quiesced) - ); + assert!(records.iter().all(|r| { + let time_quiesced = r + .time_quiesced() + .expect("quiesced record should have time quiesced"); + r.state() == DbMetadataNexusState::Quiesced + && time_quiesced >= time_before + && time_quiesced <= time_after + })); testdb.terminate().await; logctx.cleanup_successful(); diff --git a/schema/crdb/db-metadata-timestamps/up.sql b/schema/crdb/db-metadata-timestamps/up.sql new file mode 100644 index 00000000000..0ed727eb04e --- /dev/null +++ b/schema/crdb/db-metadata-timestamps/up.sql @@ -0,0 +1,4 @@ +ALTER TABLE omicron.public.db_metadata_nexus + ADD COLUMN IF NOT EXISTS time_row_created TIMESTAMPTZ, + ADD COLUMN IF NOT EXISTS time_quiesced TIMESTAMPTZ, + ADD COLUMN IF NOT EXISTS time_active TIMESTAMPTZ diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index e38868033d6..39f845b8556 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -6675,6 +6675,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.db_metadata_nexus ( nexus_id UUID NOT NULL PRIMARY KEY, last_drained_blueprint_id UUID, state omicron.public.db_metadata_nexus_state NOT NULL + -- the following fields are for debugging only + time_row_created TIMESTAMPTZ, -- nullable + time_quiesced TIMESTAMPTZ, -- nullable + time_active TIMESTAMPTZ, -- nullable ); CREATE UNIQUE INDEX IF NOT EXISTS lookup_db_metadata_nexus_by_state on omicron.public.db_metadata_nexus ( From 9f2c2557cce7604da1dc3ef31fe45f8d32ef12e7 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 1 Oct 2025 13:45:34 -0700 Subject: [PATCH 2/2] sql --- schema/crdb/dbinit.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index f484c9084d3..48bdcafcd45 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -6678,11 +6678,11 @@ CREATE TYPE IF NOT EXISTS omicron.public.db_metadata_nexus_state AS ENUM ( CREATE TABLE IF NOT EXISTS omicron.public.db_metadata_nexus ( nexus_id UUID NOT NULL PRIMARY KEY, last_drained_blueprint_id UUID, - state omicron.public.db_metadata_nexus_state NOT NULL + state omicron.public.db_metadata_nexus_state NOT NULL, -- the following fields are for debugging only time_row_created TIMESTAMPTZ, -- nullable time_quiesced TIMESTAMPTZ, -- nullable - time_active TIMESTAMPTZ, -- nullable + time_active TIMESTAMPTZ -- nullable ); CREATE UNIQUE INDEX IF NOT EXISTS lookup_db_metadata_nexus_by_state on omicron.public.db_metadata_nexus (