Skip to content

Commit 6fb91c6

Browse files
authored
[reconfigurator] Add planner support for starting new Crucible pantries (#6836)
This is a much smaller change than the diff stat implies; most of the changes are expectorate outputs because the example system we set up for tests now includes Crucible pantry zones, which shifted a bunch of other zone UUIDs. Fully supporting Crucible pantry replacement depends on #3763, which I'm continuing to work on. But the reconfigurator side of "start new pantries" is about as trivial as things go and does not depend on #3763, hence this PR.
1 parent 6926ca9 commit 6fb91c6

20 files changed

+847
-601
lines changed

common/src/policy.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ pub const OXIMETER_REDUNDANCY: usize = 1;
2525
/// Reconfigurator (to know whether to add new crdb zones)
2626
pub const COCKROACHDB_REDUNDANCY: usize = 5;
2727

28+
/// The amount of redundancy for Crucible Pantry services.
29+
///
30+
/// This is used by both RSS (to distribute the initial set of services) and the
31+
/// Reconfigurator (to know whether to add new pantry zones)
32+
pub const CRUCIBLE_PANTRY_REDUNDANCY: usize = 3;
33+
2834
/// The amount of redundancy for internal DNS servers.
2935
///
3036
/// Must be less than or equal to RESERVED_INTERNAL_DNS_REDUNDANCY.

nexus/reconfigurator/execution/src/dns.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ mod test {
363363
use omicron_common::api::external::IdentityMetadataCreateParams;
364364
use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY;
365365
use omicron_common::policy::COCKROACHDB_REDUNDANCY;
366+
use omicron_common::policy::CRUCIBLE_PANTRY_REDUNDANCY;
366367
use omicron_common::policy::INTERNAL_DNS_REDUNDANCY;
367368
use omicron_common::policy::NEXUS_REDUNDANCY;
368369
use omicron_common::policy::OXIMETER_REDUNDANCY;
@@ -1375,6 +1376,7 @@ mod test {
13751376
target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY,
13761377
target_cockroachdb_cluster_version:
13771378
CockroachDbClusterVersion::POLICY,
1379+
target_crucible_pantry_zone_count: CRUCIBLE_PANTRY_REDUNDANCY,
13781380
log,
13791381
}
13801382
.build()

nexus/reconfigurator/planning/src/blueprint_builder/builder.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,6 +1193,51 @@ impl<'a> BlueprintBuilder<'a> {
11931193
Ok(EnsureMultiple::Changed { added: num_oximeter_to_add, removed: 0 })
11941194
}
11951195

1196+
pub fn sled_ensure_zone_multiple_crucible_pantry(
1197+
&mut self,
1198+
sled_id: SledUuid,
1199+
desired_zone_count: usize,
1200+
) -> Result<EnsureMultiple, Error> {
1201+
// How many zones do we need to add?
1202+
let pantry_count = self
1203+
.sled_num_running_zones_of_kind(sled_id, ZoneKind::CruciblePantry);
1204+
let num_pantry_to_add =
1205+
match desired_zone_count.checked_sub(pantry_count) {
1206+
Some(0) => return Ok(EnsureMultiple::NotNeeded),
1207+
Some(n) => n,
1208+
None => {
1209+
return Err(Error::Planner(anyhow!(
1210+
"removing a Crucible pantry zone not yet supported \
1211+
(sled {sled_id} has {pantry_count}; \
1212+
planner wants {desired_zone_count})"
1213+
)));
1214+
}
1215+
};
1216+
1217+
for _ in 0..num_pantry_to_add {
1218+
let pantry_id = self.rng.zone_rng.next();
1219+
let ip = self.sled_alloc_ip(sled_id)?;
1220+
let port = omicron_common::address::CRUCIBLE_PANTRY_PORT;
1221+
let address = SocketAddrV6::new(ip, port, 0, 0);
1222+
let zone_type = BlueprintZoneType::CruciblePantry(
1223+
blueprint_zone_type::CruciblePantry { address },
1224+
);
1225+
let filesystem_pool =
1226+
self.sled_select_zpool(sled_id, zone_type.kind())?;
1227+
1228+
let zone = BlueprintZoneConfig {
1229+
disposition: BlueprintZoneDisposition::InService,
1230+
id: pantry_id,
1231+
underlay_address: ip,
1232+
filesystem_pool: Some(filesystem_pool),
1233+
zone_type,
1234+
};
1235+
self.sled_add_zone(sled_id, zone)?;
1236+
}
1237+
1238+
Ok(EnsureMultiple::Changed { added: num_pantry_to_add, removed: 0 })
1239+
}
1240+
11961241
pub fn cockroachdb_preserve_downgrade(
11971242
&mut self,
11981243
version: CockroachDbPreserveDowngrade,

nexus/reconfigurator/planning/src/example.rs

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use nexus_types::deployment::OmicronZoneNic;
1616
use nexus_types::deployment::PlanningInput;
1717
use nexus_types::deployment::SledFilter;
1818
use nexus_types::inventory::Collection;
19+
use omicron_common::policy::CRUCIBLE_PANTRY_REDUNDANCY;
1920
use omicron_common::policy::INTERNAL_DNS_REDUNDANCY;
2021
use omicron_uuid_kinds::GenericUuid;
2122
use omicron_uuid_kinds::SledKind;
@@ -62,6 +63,7 @@ pub struct ExampleSystemBuilder {
6263
nexus_count: Option<ZoneCount>,
6364
internal_dns_count: ZoneCount,
6465
external_dns_count: ZoneCount,
66+
crucible_pantry_count: ZoneCount,
6567
create_zones: bool,
6668
create_disks_in_blueprint: bool,
6769
}
@@ -85,6 +87,7 @@ impl ExampleSystemBuilder {
8587
nexus_count: None,
8688
internal_dns_count: ZoneCount(INTERNAL_DNS_REDUNDANCY),
8789
external_dns_count: ZoneCount(Self::DEFAULT_EXTERNAL_DNS_COUNT),
90+
crucible_pantry_count: ZoneCount(CRUCIBLE_PANTRY_REDUNDANCY),
8891
create_zones: true,
8992
create_disks_in_blueprint: true,
9093
}
@@ -163,6 +166,17 @@ impl ExampleSystemBuilder {
163166
Ok(self)
164167
}
165168

169+
/// Set the number of Crucible pantry instances in the example system.
170+
///
171+
/// If [`Self::create_zones`] is set to `false`, this is ignored.
172+
pub fn crucible_pantry_count(
173+
mut self,
174+
crucible_pantry_count: usize,
175+
) -> Self {
176+
self.crucible_pantry_count = ZoneCount(crucible_pantry_count);
177+
self
178+
}
179+
166180
/// Create zones in the example system.
167181
///
168182
/// The default is `true`.
@@ -200,6 +214,7 @@ impl ExampleSystemBuilder {
200214
"nexus_count" => nexus_count.0,
201215
"internal_dns_count" => self.internal_dns_count.0,
202216
"external_dns_count" => self.external_dns_count.0,
217+
"crucible_pantry_count" => self.crucible_pantry_count.0,
203218
"create_zones" => self.create_zones,
204219
"create_disks_in_blueprint" => self.create_disks_in_blueprint,
205220
);
@@ -209,7 +224,8 @@ impl ExampleSystemBuilder {
209224
// there's no external DNS count.)
210225
system
211226
.target_nexus_zone_count(nexus_count.0)
212-
.target_internal_dns_zone_count(self.internal_dns_count.0);
227+
.target_internal_dns_zone_count(self.internal_dns_count.0)
228+
.target_crucible_pantry_zone_count(self.crucible_pantry_count.0);
213229
let mut sled_rng =
214230
TypedUuidRng::from_seed(&self.test_name, "ExampleSystem");
215231
let sled_ids: Vec<_> =
@@ -301,6 +317,12 @@ impl ExampleSystemBuilder {
301317
self.external_dns_count.on(i, self.nsleds),
302318
)
303319
.unwrap();
320+
let _ = builder
321+
.sled_ensure_zone_multiple_crucible_pantry(
322+
sled_id,
323+
self.crucible_pantry_count.on(i, self.nsleds),
324+
)
325+
.unwrap();
304326
}
305327
if self.create_disks_in_blueprint {
306328
let _ =
@@ -426,6 +448,7 @@ mod tests {
426448
ExampleSystemBuilder::new(&logctx.log, TEST_NAME)
427449
.nsleds(5)
428450
.nexus_count(6)
451+
.crucible_pantry_count(5)
429452
.internal_dns_count(2)
430453
.unwrap()
431454
.external_dns_count(10)
@@ -444,9 +467,10 @@ mod tests {
444467
// Check that the system's target counts are set correctly.
445468
assert_eq!(example.system.get_target_nexus_zone_count(), 6);
446469
assert_eq!(example.system.get_target_internal_dns_zone_count(), 2);
470+
assert_eq!(example.system.get_target_crucible_pantry_zone_count(), 5);
447471

448-
// Check that the right number of internal and external DNS zones are
449-
// present in both the blueprint and in the collection.
472+
// Check that the right number of zones are present in both the
473+
// blueprint and in the collection.
450474
let nexus_zones = blueprint_zones_of_kind(&blueprint, ZoneKind::Nexus);
451475
assert_eq!(
452476
nexus_zones.len(),
@@ -507,6 +531,27 @@ mod tests {
507531
external_dns_zones,
508532
);
509533

534+
let crucible_pantry_zones =
535+
blueprint_zones_of_kind(&blueprint, ZoneKind::CruciblePantry);
536+
assert_eq!(
537+
crucible_pantry_zones.len(),
538+
5,
539+
"expected 5 Crucible pantry zones in blueprint, got {}: {:#?}",
540+
crucible_pantry_zones.len(),
541+
crucible_pantry_zones,
542+
);
543+
let crucible_pantry_zones = collection_zones_of_kind(
544+
&example.collection,
545+
ZoneKind::CruciblePantry,
546+
);
547+
assert_eq!(
548+
crucible_pantry_zones.len(),
549+
5,
550+
"expected 5 Crucible pantry zones in collection, got {}: {:#?}",
551+
crucible_pantry_zones.len(),
552+
crucible_pantry_zones,
553+
);
554+
510555
logctx.cleanup_successful();
511556
}
512557

nexus/reconfigurator/planning/src/planner.rs

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,7 @@ impl<'a> Planner<'a> {
361361
DiscretionaryOmicronZone::ClickhouseKeeper,
362362
DiscretionaryOmicronZone::ClickhouseServer,
363363
DiscretionaryOmicronZone::CockroachDb,
364+
DiscretionaryOmicronZone::CruciblePantry,
364365
DiscretionaryOmicronZone::InternalDns,
365366
DiscretionaryOmicronZone::ExternalDns,
366367
DiscretionaryOmicronZone::Nexus,
@@ -451,6 +452,9 @@ impl<'a> Planner<'a> {
451452
DiscretionaryOmicronZone::CockroachDb => {
452453
self.input.target_cockroachdb_zone_count()
453454
}
455+
DiscretionaryOmicronZone::CruciblePantry => {
456+
self.input.target_crucible_pantry_zone_count()
457+
}
454458
DiscretionaryOmicronZone::InternalDns => {
455459
self.input.target_internal_dns_zone_count()
456460
}
@@ -562,6 +566,12 @@ impl<'a> Planner<'a> {
562566
new_total_zone_count,
563567
)?
564568
}
569+
DiscretionaryOmicronZone::CruciblePantry => {
570+
self.blueprint.sled_ensure_zone_multiple_crucible_pantry(
571+
sled_id,
572+
new_total_zone_count,
573+
)?
574+
}
565575
DiscretionaryOmicronZone::InternalDns => {
566576
self.blueprint.sled_ensure_zone_multiple_internal_dns(
567577
sled_id,
@@ -823,6 +833,7 @@ mod test {
823833
use nexus_types::external_api::views::SledState;
824834
use omicron_common::api::external::Generation;
825835
use omicron_common::disk::DiskIdentity;
836+
use omicron_common::policy::CRUCIBLE_PANTRY_REDUNDANCY;
826837
use omicron_test_utils::dev::test_setup_log;
827838
use omicron_uuid_kinds::PhysicalDiskUuid;
828839
use omicron_uuid_kinds::SledUuid;
@@ -1965,8 +1976,9 @@ mod test {
19651976
// * each of those 2 sleds should get exactly 3 new Nexuses
19661977
builder.policy_mut().target_nexus_zone_count = 9;
19671978

1968-
// Disable addition of internal DNS zones.
1979+
// Disable addition of zone types we're not checking for below.
19691980
builder.policy_mut().target_internal_dns_zone_count = 0;
1981+
builder.policy_mut().target_crucible_pantry_zone_count = 0;
19701982

19711983
let input = builder.build();
19721984
let mut blueprint2 = Planner::new_based_on(
@@ -2451,6 +2463,76 @@ mod test {
24512463
logctx.cleanup_successful();
24522464
}
24532465

2466+
#[test]
2467+
fn test_crucible_pantry() {
2468+
static TEST_NAME: &str = "test_crucible_pantry";
2469+
let logctx = test_setup_log(TEST_NAME);
2470+
2471+
// Use our example system as a starting point.
2472+
let (collection, input, blueprint1) = example(&logctx.log, TEST_NAME);
2473+
2474+
// We should start with CRUCIBLE_PANTRY_REDUNDANCY pantries spread out
2475+
// to at most 1 per sled. Find one of the sleds running one.
2476+
let pantry_sleds = blueprint1
2477+
.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
2478+
.filter_map(|(sled_id, zone)| {
2479+
zone.zone_type.is_crucible_pantry().then_some(sled_id)
2480+
})
2481+
.collect::<Vec<_>>();
2482+
assert_eq!(
2483+
pantry_sleds.len(),
2484+
CRUCIBLE_PANTRY_REDUNDANCY,
2485+
"expected {CRUCIBLE_PANTRY_REDUNDANCY} pantries, but found {}",
2486+
pantry_sleds.len(),
2487+
);
2488+
2489+
// Expunge one of the pantry-hosting sleds and re-plan. The planner
2490+
// should immediately replace the zone with one on another
2491+
// (non-expunged) sled.
2492+
let expunged_sled_id = pantry_sleds[0];
2493+
2494+
let mut input_builder = input.into_builder();
2495+
input_builder
2496+
.sleds_mut()
2497+
.get_mut(&expunged_sled_id)
2498+
.expect("can't find sled")
2499+
.policy = SledPolicy::Expunged;
2500+
let input = input_builder.build();
2501+
let blueprint2 = Planner::new_based_on(
2502+
logctx.log.clone(),
2503+
&blueprint1,
2504+
&input,
2505+
"test_blueprint2",
2506+
&collection,
2507+
)
2508+
.expect("failed to create planner")
2509+
.with_rng_seed((TEST_NAME, "bp2"))
2510+
.plan()
2511+
.expect("failed to re-plan");
2512+
2513+
let diff = blueprint2.diff_since_blueprint(&blueprint1);
2514+
println!("1 -> 2 (expunged sled):\n{}", diff.display());
2515+
assert_eq!(
2516+
blueprint2
2517+
.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
2518+
.filter(|(sled_id, zone)| *sled_id != expunged_sled_id
2519+
&& zone.zone_type.is_crucible_pantry())
2520+
.count(),
2521+
CRUCIBLE_PANTRY_REDUNDANCY,
2522+
"can't find replacement pantry zone"
2523+
);
2524+
2525+
// Test a no-op planning iteration.
2526+
assert_planning_makes_no_changes(
2527+
&logctx.log,
2528+
&blueprint2,
2529+
&input,
2530+
TEST_NAME,
2531+
);
2532+
2533+
logctx.cleanup_successful();
2534+
}
2535+
24542536
/// Check that the planner can replace a single-node ClickHouse zone.
24552537
/// This is completely distinct from (and much simpler than) the replicated
24562538
/// (multi-node) case.

nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ pub(crate) enum DiscretionaryOmicronZone {
1919
ClickhouseKeeper,
2020
ClickhouseServer,
2121
CockroachDb,
22+
CruciblePantry,
2223
InternalDns,
2324
ExternalDns,
2425
Nexus,
2526
Oximeter,
26-
// TODO expand this enum as we start to place more services
2727
}
2828

2929
impl DiscretionaryOmicronZone {
@@ -40,16 +40,15 @@ impl DiscretionaryOmicronZone {
4040
Some(Self::ClickhouseServer)
4141
}
4242
BlueprintZoneType::CockroachDb(_) => Some(Self::CockroachDb),
43+
BlueprintZoneType::CruciblePantry(_) => Some(Self::CruciblePantry),
4344
BlueprintZoneType::InternalDns(_) => Some(Self::InternalDns),
4445
BlueprintZoneType::ExternalDns(_) => Some(Self::ExternalDns),
4546
BlueprintZoneType::Nexus(_) => Some(Self::Nexus),
4647
BlueprintZoneType::Oximeter(_) => Some(Self::Oximeter),
47-
// Zones that we should place but don't yet.
48-
| BlueprintZoneType::CruciblePantry(_)
4948
// Zones that get special handling for placement (all sleds get
5049
// them, although internal NTP has some interactions with boundary
5150
// NTP that are handled separately).
52-
| BlueprintZoneType::Crucible(_)
51+
BlueprintZoneType::Crucible(_)
5352
| BlueprintZoneType::InternalNtp(_) => None,
5453
}
5554
}
@@ -67,6 +66,7 @@ impl From<DiscretionaryOmicronZone> for ZoneKind {
6766
Self::ClickhouseServer
6867
}
6968
DiscretionaryOmicronZone::CockroachDb => Self::CockroachDb,
69+
DiscretionaryOmicronZone::CruciblePantry => Self::CruciblePantry,
7070
DiscretionaryOmicronZone::InternalDns => Self::InternalDns,
7171
DiscretionaryOmicronZone::ExternalDns => Self::ExternalDns,
7272
DiscretionaryOmicronZone::Nexus => Self::Nexus,

0 commit comments

Comments
 (0)