From 051ecaf3e2dce13e62bb2dd32327697c06b0a301 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 20 Aug 2025 14:47:03 -0700 Subject: [PATCH 01/10] update quiesce states to reflect RFD 588 --- nexus/reconfigurator/execution/src/lib.rs | 72 ++-- nexus/src/app/background/init.rs | 5 +- .../background/tasks/blueprint_execution.rs | 66 +++- .../app/background/tasks/blueprint_planner.rs | 13 +- nexus/src/app/mod.rs | 23 +- nexus/src/app/quiesce.rs | 177 ++++++--- nexus/types/src/deployment.rs | 22 ++ nexus/types/src/internal_api/views.rs | 78 ++-- nexus/types/src/quiesce.rs | 364 +++++++++++------- 9 files changed, 522 insertions(+), 298 deletions(-) diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 43c0948555..b9f83912bc 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -611,50 +611,36 @@ fn register_reassign_sagas_step<'a>( .into(); }; - // Re-assign sagas, but only if we're allowed to. If Nexus is - // quiescing, we don't want to assign any new sagas to - // ourselves. - let result = saga_quiesce.reassign_if_possible(async || { - // For any expunged Nexus zones, re-assign in-progress sagas - // to some other Nexus. If this fails for some reason, it - // doesn't affect anything else. - let sec_id = nexus_db_model::SecId::from(nexus_id); - let reassigned = sagas::reassign_sagas_from_expunged( - opctx, datastore, blueprint, sec_id, - ) - .await - .context("failed to re-assign sagas"); - match reassigned { - Ok(needs_saga_recovery) => ( - StepSuccess::new(needs_saga_recovery).build(), - needs_saga_recovery, - ), - Err(error) => { - // It's possible that we failed after having - // re-assigned sagas in the database. - let maybe_reassigned = true; - ( - StepWarning::new(false, error.to_string()) - .build(), - maybe_reassigned, - ) + // Re-assign sagas. + Ok(saga_quiesce + .reassign_sagas(async || { + // For any expunged Nexus zones, re-assign in-progress + // sagas to some other Nexus. If this fails for some + // reason, it doesn't affect anything else. + let sec_id = nexus_db_model::SecId::from(nexus_id); + let reassigned = sagas::reassign_sagas_from_expunged( + opctx, datastore, blueprint, sec_id, + ) + .await + .context("failed to re-assign sagas"); + match reassigned { + Ok(needs_saga_recovery) => ( + StepSuccess::new(needs_saga_recovery).build(), + needs_saga_recovery, + ), + Err(error) => { + // It's possible that we failed after having + // re-assigned sagas in the database. + let maybe_reassigned = true; + ( + StepWarning::new(false, error.to_string()) + .build(), + maybe_reassigned, + ) + } } - } - }); - - match result.await { - // Re-assignment is allowed, and we did try. It may or may - // not have succeeded. Either way, that's reflected in - // `step_result`. - Ok(step_result) => Ok(step_result), - // Re-assignment is disallowed. Report this step skipped - // with an explanation of why. - Err(error) => StepSkipped::new( - false, - InlineErrorChain::new(&error).to_string(), - ) - .into(), - } + }) + .await) }, ) .register() diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 78f1ef5661..495930b0b7 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -131,6 +131,7 @@ use super::tasks::vpc_routes; use super::tasks::webhook_deliverator; use crate::Nexus; use crate::app::oximeter::PRODUCER_LEASE_DURATION; +use crate::app::quiesce::NexusQuiesceHandle; use crate::app::saga::StartSaga; use nexus_background_task_interface::Activator; use nexus_background_task_interface::BackgroundTasks; @@ -437,7 +438,7 @@ impl BackgroundTasksInitializer { nexus_id, task_saga_recovery.clone(), args.mgs_updates_tx, - args.saga_recovery.quiesce.clone(), + args.nexus_quiesce, ); let rx_blueprint_exec = blueprint_executor.watcher(); driver.register(TaskDefinition { @@ -1029,6 +1030,8 @@ pub struct BackgroundTasksData { pub webhook_delivery_client: reqwest::Client, /// Channel for configuring pending MGS updates pub mgs_updates_tx: watch::Sender, + /// handle for controlling Nexus quiesce + pub nexus_quiesce: NexusQuiesceHandle, } /// Starts the three DNS-propagation-related background tasks for either diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index 2731520271..b930fc5107 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -4,7 +4,10 @@ //! Background task for realizing a plan blueprint -use crate::app::background::{Activator, BackgroundTask}; +use crate::app::{ + background::{Activator, BackgroundTask}, + quiesce::NexusQuiesceHandle, +}; use futures::FutureExt; use futures::future::BoxFuture; use internal_dns_resolver::Resolver; @@ -13,14 +16,12 @@ use nexus_db_queries::db::DataStore; use nexus_reconfigurator_execution::{ RealizeBlueprintOutput, RequiredRealizeArgs, }; -use nexus_types::{ - deployment::{ - Blueprint, BlueprintTarget, PendingMgsUpdates, execution::EventBuffer, - }, - quiesce::SagaQuiesceHandle, +use nexus_types::deployment::{ + Blueprint, BlueprintTarget, PendingMgsUpdates, execution::EventBuffer, }; use omicron_uuid_kinds::OmicronZoneUuid; use serde_json::json; +use slog_error_chain::InlineErrorChain; use std::sync::Arc; use tokio::sync::watch; use update_engine::NestedError; @@ -35,7 +36,7 @@ pub struct BlueprintExecutor { tx: watch::Sender, saga_recovery: Activator, mgs_update_tx: watch::Sender, - saga_quiesce: SagaQuiesceHandle, + nexus_quiesce: NexusQuiesceHandle, } impl BlueprintExecutor { @@ -48,7 +49,7 @@ impl BlueprintExecutor { nexus_id: OmicronZoneUuid, saga_recovery: Activator, mgs_update_tx: watch::Sender, - saga_quiesce: SagaQuiesceHandle, + nexus_quiesce: NexusQuiesceHandle, ) -> BlueprintExecutor { let (tx, _) = watch::channel(0); BlueprintExecutor { @@ -59,7 +60,7 @@ impl BlueprintExecutor { tx, saga_recovery, mgs_update_tx, - saga_quiesce, + nexus_quiesce, } } @@ -87,6 +88,47 @@ impl BlueprintExecutor { }; let (bp_target, blueprint) = &*update; + + // Regardless of anything else: propagate whatever this blueprint + // says about our quiescing state. + // + // During startup under normal operation, the blueprint will reflect + // that we're not quiescing. Propagating this will enable sagas to + // be created elsewhere in Nexus. + // + // At some point during an upgrade, we'll encounter a blueprint that + // reflects that we are quiescing. Propagating this will disable sagas + // from being created. + // + // In all other cases, this will have no effect. + // + // We do this now, before doing anything else, for two reasons: (1) + // during startup, we want to do this ASAP to minimize unnecessary saga + // creation failures (i.e., don't wait until we try to execute the + // blueprint before enabling sagas, since we already know if we're + // quiescing or not); and (2) because we want to do it even if blueprint + // execution is disabled. + match blueprint.nexus_quiescing(self.nexus_id) { + Ok(quiescing) => { + debug!( + &opctx.log, + "blueprint execution: quiesce check"; + "quiescing" => quiescing + ); + self.nexus_quiesce.set_quiescing(quiescing); + } + Err(error) => { + // This should be impossible. But it doesn't really affect + // anything else so there's no reason to stop execution. + error!( + &opctx.log, + "blueprint execution: failed to determine if this Nexus \ + is quiescing"; + InlineErrorChain::new(&*error) + ); + } + }; + if !bp_target.enabled { warn!(&opctx.log, "Blueprint execution: skipped"; @@ -119,7 +161,7 @@ impl BlueprintExecutor { blueprint, sender, mgs_updates: self.mgs_update_tx.clone(), - saga_quiesce: self.saga_quiesce.clone(), + saga_quiesce: self.nexus_quiesce.sagas(), } .as_nexus(self.nexus_id), ) @@ -181,6 +223,7 @@ impl BackgroundTask for BlueprintExecutor { mod test { use super::BlueprintExecutor; use crate::app::background::{Activator, BackgroundTask}; + use crate::app::quiesce::NexusQuiesceHandle; use httptest::Expectation; use httptest::matchers::{not, request}; use httptest::responders::status_code; @@ -207,7 +250,6 @@ mod test { PlanningReport, blueprint_zone_type, }; use nexus_types::external_api::views::SledState; - use nexus_types::quiesce::SagaQuiesceHandle; use omicron_common::api::external; use omicron_common::api::external::Generation; use omicron_common::zpool_name::ZpoolName; @@ -390,7 +432,7 @@ mod test { OmicronZoneUuid::new_v4(), Activator::new(), dummy_tx, - SagaQuiesceHandle::new(opctx.log.clone()), + NexusQuiesceHandle::new(&opctx.log, datastore.clone()), ); // Now we're ready. diff --git a/nexus/src/app/background/tasks/blueprint_planner.rs b/nexus/src/app/background/tasks/blueprint_planner.rs index 9ae27a227f..752632e473 100644 --- a/nexus/src/app/background/tasks/blueprint_planner.rs +++ b/nexus/src/app/background/tasks/blueprint_planner.rs @@ -278,18 +278,15 @@ impl BackgroundTask for BlueprintPlanner { #[cfg(test)] mod test { use super::*; - use crate::app::background::Activator; use crate::app::background::tasks::blueprint_execution::BlueprintExecutor; use crate::app::background::tasks::blueprint_load::TargetBlueprintLoader; use crate::app::background::tasks::inventory_collection::InventoryCollector; + use crate::app::{background::Activator, quiesce::NexusQuiesceHandle}; use nexus_inventory::now_db_precision; use nexus_test_utils_macros::nexus_test; - use nexus_types::{ - deployment::{ - PendingMgsUpdates, PlannerChickenSwitches, - ReconfiguratorChickenSwitches, - }, - quiesce::SagaQuiesceHandle, + use nexus_types::deployment::{ + PendingMgsUpdates, PlannerChickenSwitches, + ReconfiguratorChickenSwitches, }; use omicron_uuid_kinds::OmicronZoneUuid; @@ -429,7 +426,7 @@ mod test { OmicronZoneUuid::new_v4(), Activator::new(), dummy_tx, - SagaQuiesceHandle::new(opctx.log.clone()), + NexusQuiesceHandle::new(&opctx.log, datastore.clone()), ); let value = executor.activate(&opctx).await; let value = value.as_object().expect("response is not a JSON object"); diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index cb3ca045cf..c8abeac6e0 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -27,7 +27,6 @@ use nexus_db_queries::db; use nexus_mgs_updates::ArtifactCache; use nexus_mgs_updates::MgsUpdateDriver; use nexus_types::deployment::PendingMgsUpdates; -use nexus_types::quiesce::SagaQuiesceHandle; use omicron_common::address::DENDRITE_PORT; use omicron_common::address::MGD_PORT; use omicron_common::address::MGS_PORT; @@ -111,11 +110,11 @@ pub(crate) mod sagas; // TODO: When referring to API types, we should try to include // the prefix unless it is unambiguous. +use crate::app::quiesce::NexusQuiesceHandle; pub(crate) use nexus_db_model::MAX_NICS_PER_INSTANCE; pub(crate) use nexus_db_queries::db::queries::disk::MAX_DISKS_PER_INSTANCE; use nexus_mgs_updates::DEFAULT_RETRY_TIMEOUT; use nexus_types::internal_api::views::MgsUpdateDriverStatus; -use nexus_types::internal_api::views::QuiesceState; use sagas::demo::CompletingDemoSagas; // XXX: Might want to recast as max *floating* IPs, we have at most one @@ -280,11 +279,8 @@ pub struct Nexus { #[allow(dead_code)] repo_depot_resolver: Box, - /// whether Nexus is quiescing, and how far it's gotten - quiesce: watch::Sender, - - /// details about saga quiescing - saga_quiesce: SagaQuiesceHandle, + /// state of overall Nexus quiesce activity + quiesce: NexusQuiesceHandle, } impl Nexus { @@ -336,6 +332,8 @@ impl Nexus { sec_store, )); + let quiesce = NexusQuiesceHandle::new(&log, db_datastore.clone()); + // It's a bit of a red flag to use an unbounded channel. // // This particular channel is used to send a Uuid from the saga executor @@ -360,14 +358,11 @@ impl Nexus { // task. If someone changed the config, they'd have to remember to // update this here. This doesn't seem worth it. let (saga_create_tx, saga_recovery_rx) = mpsc::unbounded_channel(); - let saga_quiesce = SagaQuiesceHandle::new( - log.new(o!("component" => "SagaQuiesceHandle")), - ); let sagas = Arc::new(SagaExecutor::new( Arc::clone(&sec_client), log.new(o!("component" => "SagaExecutor")), saga_create_tx, - saga_quiesce.clone(), + quiesce.sagas(), )); // Create a channel for replicating repository artifacts. 16 is a @@ -465,8 +460,6 @@ impl Nexus { let mgs_update_status_rx = mgs_update_driver.status_rx(); let _mgs_driver_task = tokio::spawn(mgs_update_driver.run()); - let (quiesce, _) = watch::channel(QuiesceState::running()); - let nexus = Nexus { id: config.deployment.id, rack_id, @@ -520,7 +513,6 @@ impl Nexus { mgs_resolver, repo_depot_resolver, quiesce, - saga_quiesce, }; // TODO-cleanup all the extra Arcs here seems wrong @@ -570,6 +562,7 @@ impl Nexus { webhook_delivery_client: task_nexus .webhook_delivery_client .clone(), + nexus_quiesce: task_nexus.quiesce.clone(), saga_recovery: SagaRecoveryHelpers { recovery_opctx: saga_recovery_opctx, @@ -577,7 +570,7 @@ impl Nexus { sec_client: sec_client.clone(), registry: sagas::ACTION_REGISTRY.clone(), sagas_started_rx: saga_recovery_rx, - quiesce: task_nexus.saga_quiesce.clone(), + quiesce: task_nexus.quiesce.sagas(), }, tuf_artifact_replication_rx, mgs_updates_tx, diff --git a/nexus/src/app/quiesce.rs b/nexus/src/app/quiesce.rs index 6c8fe05dec..a4f6e18fdf 100644 --- a/nexus/src/app/quiesce.rs +++ b/nexus/src/app/quiesce.rs @@ -14,6 +14,7 @@ use nexus_types::internal_api::views::QuiesceStatus; use nexus_types::quiesce::SagaQuiesceHandle; use omicron_common::api::external::LookupResult; use omicron_common::api::external::UpdateResult; +use slog::Logger; use std::sync::Arc; use std::time::Instant; use tokio::sync::watch; @@ -21,26 +22,7 @@ use tokio::sync::watch; impl super::Nexus { pub async fn quiesce_start(&self, opctx: &OpContext) -> UpdateResult<()> { opctx.authorize(authz::Action::Modify, &authz::QUIESCE_STATE).await?; - let started = self.quiesce.send_if_modified(|q| { - if let QuiesceState::Running = q { - let time_requested = Utc::now(); - let time_waiting_for_sagas = Instant::now(); - *q = QuiesceState::WaitingForSagas { - time_requested, - time_waiting_for_sagas, - }; - true - } else { - false - } - }); - if started { - tokio::spawn(do_quiesce( - self.quiesce.clone(), - self.saga_quiesce.clone(), - self.datastore().clone(), - )); - } + self.quiesce.set_quiescing(true); Ok(()) } @@ -49,56 +31,163 @@ impl super::Nexus { opctx: &OpContext, ) -> LookupResult { opctx.authorize(authz::Action::Read, &authz::QUIESCE_STATE).await?; - let state = self.quiesce.borrow().clone(); - let sagas_pending = self.saga_quiesce.sagas_pending(); + let state = self.quiesce.state(); + let sagas_pending = self.quiesce.sagas().sagas_pending(); let db_claims = self.datastore().claims_held(); Ok(QuiesceStatus { state, sagas_pending, db_claims }) } } -async fn do_quiesce( - quiesce: watch::Sender, - saga_quiesce: SagaQuiesceHandle, +/// Describes the configuration and state around quiescing Nexus +#[derive(Clone)] +pub struct NexusQuiesceHandle { + log: Logger, datastore: Arc, -) { - assert_matches!(*quiesce.borrow(), QuiesceState::WaitingForSagas { .. }); - saga_quiesce.quiesce(); - saga_quiesce.wait_for_quiesced().await; - quiesce.send_modify(|q| { - let QuiesceState::WaitingForSagas { + sagas: SagaQuiesceHandle, + state: watch::Sender, +} + +impl NexusQuiesceHandle { + pub fn new(log: &Logger, datastore: Arc) -> NexusQuiesceHandle { + let my_log = log.new(o!("component" => "NexusQuiesceHandle")); + let saga_quiesce_log = log.new(o!("component" => "SagaQuiesceHandle")); + let sagas = SagaQuiesceHandle::new(saga_quiesce_log); + let (state, _) = watch::channel(QuiesceState::Undetermined); + NexusQuiesceHandle { log: my_log, datastore, sagas, state } + } + + pub fn sagas(&self) -> SagaQuiesceHandle { + self.sagas.clone() + } + + pub fn state(&self) -> QuiesceState { + self.state.borrow().clone() + } + + pub fn set_quiescing(&self, quiescing: bool) { + let new_state = if quiescing { + let time_requested = Utc::now(); + let time_draining_sagas = Instant::now(); + QuiesceState::DrainingSagas { time_requested, time_draining_sagas } + } else { + QuiesceState::Running + }; + + let changed = self.state.send_if_modified(|q| { + match q { + QuiesceState::Undetermined => { + info!(&self.log, "initial state"; "state" => ?new_state); + *q = new_state; + true + } + QuiesceState::Running if quiescing => { + info!(&self.log, "quiesce starting"); + *q = new_state; + true + } + _ => { + // All other cases are either impossible or no-ops. + false + } + } + }); + + if changed && quiescing { + // Immediately quiesce sagas. + self.sagas.set_quiescing(quiescing); + // Asynchronously complete the rest of the quiesce process. + if quiescing { + tokio::spawn(do_quiesce(self.clone())); + } + } + } +} + +async fn do_quiesce(quiesce: NexusQuiesceHandle) { + let saga_quiesce = quiesce.sagas.clone(); + let datastore = quiesce.datastore.clone(); + + // NOTE: This sequence will change as we implement RFD 588. + // We will need to use the datastore to report our saga drain status and + // also to see when other Nexus instances have finished draining their + // sagas. For now, this implementation begins quiescing its database as + // soon as its sagas are locally drained. + assert_matches!( + *quiesce.state.borrow(), + QuiesceState::DrainingSagas { .. } + ); + + // TODO per RFD 588, this is where we will enter a loop, pausing either on + // timeout or when our local quiesce state changes. At each pause: if we + // need to update our db_metadata_nexus record, do so. Then load the + // current blueprint and check the records for all nexus instances. + // + // For now, we skip the cross-Nexus coordination and simply wait for our own + // Nexus to finish what it's doing. + saga_quiesce.wait_for_drained().await; + + quiesce.state.send_modify(|q| { + let QuiesceState::DrainingSagas { time_requested, - time_waiting_for_sagas, + time_draining_sagas, } = *q else { panic!("wrong state in do_quiesce(): {:?}", q); }; - *q = QuiesceState::WaitingForDb { + let time_draining_db = Instant::now(); + *q = QuiesceState::DrainingDb { time_requested, - time_waiting_for_sagas, - duration_waiting_for_sagas: time_waiting_for_sagas.elapsed(), - time_waiting_for_db: Instant::now(), + time_draining_sagas, + duration_draining_sagas: time_draining_db - time_draining_sagas, + time_draining_db, }; }); datastore.quiesce(); datastore.wait_for_quiesced().await; - quiesce.send_modify(|q| { - let QuiesceState::WaitingForDb { + quiesce.state.send_modify(|q| { + let QuiesceState::DrainingDb { time_requested, - time_waiting_for_sagas, - duration_waiting_for_sagas, - time_waiting_for_db, + time_draining_sagas, + duration_draining_sagas, + time_draining_db, + } = *q + else { + panic!("wrong state in do_quiesce(): {:?}", q); + }; + let time_recording_quiesce = Instant::now(); + *q = QuiesceState::RecordingQuiesce { + time_requested, + time_draining_sagas, + duration_draining_sagas, + duration_draining_db: time_recording_quiesce - time_draining_db, + time_recording_quiesce, + }; + }); + + // TODO per RFD 588, this is where we will enter a loop trying to update our + // database record for the last time. + + quiesce.state.send_modify(|q| { + let QuiesceState::RecordingQuiesce { + time_requested, + time_draining_sagas, + duration_draining_sagas, + duration_draining_db, + time_recording_quiesce, } = *q else { panic!("wrong state in do_quiesce(): {:?}", q); }; + let finished = Instant::now(); *q = QuiesceState::Quiesced { time_requested, - duration_waiting_for_sagas, - duration_waiting_for_db: finished - time_waiting_for_db, - duration_total: finished - time_waiting_for_sagas, time_quiesced: Utc::now(), + duration_draining_sagas, + duration_draining_db, + duration_recording_quiesce: finished - time_recording_quiesce, + duration_total: finished - time_draining_sagas, }; }); } diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 23094feb17..92fcbb2438 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -76,6 +76,8 @@ mod planning_report; mod zone_type; use crate::inventory::BaseboardId; +use anyhow::anyhow; +use anyhow::bail; pub use blueprint_diff::BlueprintDiffSummary; use blueprint_display::BpPendingMgsUpdates; pub use chicken_switches::PlannerChickenSwitches; @@ -383,6 +385,26 @@ impl Blueprint { pub fn display(&self) -> BlueprintDisplay<'_> { BlueprintDisplay { blueprint: self } } + + /// Returns whether the given Nexus instance should be quiescing or quiesced + /// in preparation for handoff to the next generation + pub fn nexus_quiescing( + &self, + nexus_id: OmicronZoneUuid, + ) -> Result { + let zone = self + .all_omicron_zones(|_z| true) + .find(|(_sled_id, zone_config)| zone_config.id == nexus_id) + .ok_or_else(|| { + anyhow!("zone {} does not exist in blueprint", nexus_id) + })? + .1; + let BlueprintZoneType::Nexus(zone_config) = &zone.zone_type else { + bail!("zone {} is not a Nexus zone", nexus_id); + }; + + Ok(zone_config.nexus_generation < self.nexus_generation) + } } /// Wrapper to display a table of a `BlueprintSledConfig`'s host phase 2 diff --git a/nexus/types/src/internal_api/views.rs b/nexus/types/src/internal_api/views.rs index f7db6d8661..972a0b92df 100644 --- a/nexus/types/src/internal_api/views.rs +++ b/nexus/types/src/internal_api/views.rs @@ -739,19 +739,28 @@ pub struct QuiesceStatus { /// At any given time, Nexus is always in one of these states: /// /// ```text +/// Undetermined (have not loaded persistent state; don't know yet) +/// | +/// | load persistent state and find we're not quiescing +/// v /// Running (normal operation) /// | /// | quiesce starts /// v -/// WaitingForSagas (no new sagas are allowed, but some are still running) +/// DrainingSagas (no new sagas are allowed, but some are still running) /// | /// | no more sagas running /// v -/// WaitingForDb (no sagas running; no new db connections may be -/// acquired by Nexus at-large, but some are still held) +/// DrainingDb (no sagas running; no new db connections may be +/// | acquired by Nexus at-large, but some are still held) /// | /// | no more database connections held /// v +/// RecordingQuiesce (everything is quiesced aside from one connection being +/// | used to record our final quiesced state) +/// | +/// | finish recording quiesce state in database +/// v /// Quiesced (no sagas running, no database connections in use) /// ``` /// @@ -762,58 +771,51 @@ pub struct QuiesceStatus { #[serde(rename_all = "snake_case")] #[serde(tag = "state", content = "quiesce_details")] pub enum QuiesceState { + /// We have not yet determined based on persistent state if we're supposed + /// to be quiesced or not + Undetermined, /// Normal operation Running, - /// New sagas disallowed, but some are still running. - WaitingForSagas { + /// New sagas disallowed, but some are still running on some Nexus instances + DrainingSagas { + time_requested: DateTime, + #[serde(skip)] + time_draining_sagas: Instant, + }, + /// No sagas running on any Nexus instances + /// + /// No new database connections may be claimed, but some database + /// connections are still held. + DrainingDb { time_requested: DateTime, #[serde(skip)] - time_waiting_for_sagas: Instant, + time_draining_sagas: Instant, + duration_draining_sagas: Duration, + #[serde(skip)] + time_draining_db: Instant, }, - /// No sagas running, no new database connections may be claimed, but some - /// database connections are still held. - WaitingForDb { + /// No database connections in use except to record the final "quiesced" + /// state + RecordingQuiesce { time_requested: DateTime, #[serde(skip)] - time_waiting_for_sagas: Instant, - duration_waiting_for_sagas: Duration, + time_draining_sagas: Instant, + duration_draining_sagas: Duration, + duration_draining_db: Duration, #[serde(skip)] - time_waiting_for_db: Instant, + time_recording_quiesce: Instant, }, /// Nexus has no sagas running and is not using the database Quiesced { time_requested: DateTime, time_quiesced: DateTime, - duration_waiting_for_sagas: Duration, - duration_waiting_for_db: Duration, + duration_draining_sagas: Duration, + duration_draining_db: Duration, + duration_recording_quiesce: Duration, duration_total: Duration, }, } -impl QuiesceState { - pub fn running() -> QuiesceState { - QuiesceState::Running - } - - pub fn quiescing(&self) -> bool { - match self { - QuiesceState::Running => false, - QuiesceState::WaitingForSagas { .. } - | QuiesceState::WaitingForDb { .. } - | QuiesceState::Quiesced { .. } => true, - } - } - - pub fn fully_quiesced(&self) -> bool { - match self { - QuiesceState::Running - | QuiesceState::WaitingForSagas { .. } - | QuiesceState::WaitingForDb { .. } => false, - QuiesceState::Quiesced { .. } => true, - } - } -} - /// Describes a pending saga (for debugging why quiesce is stuck) #[derive(Debug, Clone, Serialize, JsonSchema)] pub struct PendingSagaInfo { diff --git a/nexus/types/src/quiesce.rs b/nexus/types/src/quiesce.rs index 76df318d80..378011be2c 100644 --- a/nexus/types/src/quiesce.rs +++ b/nexus/types/src/quiesce.rs @@ -12,6 +12,7 @@ use iddqd::IdOrdMap; use omicron_common::api::external::Error; use omicron_common::api::external::Generation; use slog::Logger; +use slog::error; use slog::info; use slog::o; use slog_error_chain::InlineErrorChain; @@ -27,15 +28,20 @@ use tokio::sync::watch; enum SagasAllowed { /// New sagas may be started (normal condition) Allowed, - /// New sagas may not be started (happens during quiesce) - Disallowed, + /// New sagas may not be started because we're quiescing or quiesced + DisallowedQuiesce, + /// New sagas may not be started because we just started up and haven't + /// determined if we're quiescing yet + DisallowedUnknown, } #[derive(Debug, Error)] -#[error( - "saga creation and reassignment are disallowed (Nexus quiescing/quiesced)" -)] -pub struct NoSagasAllowedError; +pub enum NoSagasAllowedError { + #[error("saga creation is disallowed (quiescing/quiesced)")] + Quiescing, + #[error("saga creation is disallowed (unknown yet if we're quiescing)")] + Unknown, +} impl From for Error { fn from(value: NoSagasAllowedError) -> Self { Error::unavail(&value.to_string()) @@ -80,7 +86,7 @@ pub struct SagaQuiesceHandle { // mutate the data, using it to protect data and not code. // // (2) `watch::Receiver` provides a really handy `wait_for()` method` that - // we use in `wait_for_quiesced()`. Besides being convenient, this + // we use in `wait_for_drained()`. Besides being convenient, this // would be surprisingly hard for us to implement ourselves with a // `Mutex`. Traditionally, you'd use a combination Mutex/Condvar for // this. But we'd want to use a `std` Mutex (since tokio Mutex's @@ -140,7 +146,7 @@ struct SagaQuiesceInner { impl SagaQuiesceHandle { pub fn new(log: Logger) -> SagaQuiesceHandle { let (inner, _) = watch::channel(SagaQuiesceInner { - new_sagas_allowed: SagasAllowed::Allowed, + new_sagas_allowed: SagasAllowed::DisallowedUnknown, sagas_pending: IdOrdMap::new(), first_recovery_complete: false, reassignment_generation: Generation::new(), @@ -151,26 +157,65 @@ impl SagaQuiesceHandle { SagaQuiesceHandle { log, inner } } - /// Disallow new sagas from being started or re-assigned to this Nexus + /// Set the intended quiescing state /// - /// This is currently a one-way trip. Sagas cannot be un-quiesced. - pub fn quiesce(&self) { - // Log this before changing the config to make sure this message - // appears before messages from code paths that saw this change. - info!(&self.log, "starting saga quiesce"); - self.inner - .send_modify(|q| q.new_sagas_allowed = SagasAllowed::Disallowed); + /// Quiescing is currently a one-way trip. Once we start quiescing, we + /// cannot then re-enable sagas. + pub fn set_quiescing(&self, quiescing: bool) { + self.inner.send_if_modified(|q| { + let new_state = if quiescing { + SagasAllowed::DisallowedQuiesce + } else { + SagasAllowed::Allowed + }; + + match q.new_sagas_allowed { + SagasAllowed::DisallowedUnknown => { + info!( + &self.log, + "initial quiesce state"; + "initial_state" => ?new_state + ); + q.new_sagas_allowed = new_state; + true + } + SagasAllowed::Allowed if quiescing => { + info!(&self.log, "saga quiesce starting"); + q.new_sagas_allowed = SagasAllowed::DisallowedQuiesce; + true + } + SagasAllowed::DisallowedQuiesce if !quiescing => { + // This should be impossible. Report a problem. + error!( + &self.log, + "asked to stop quiescing after previously quiescing" + ); + false + } + _ => { + // There's no transition happening in these cases: + // - SagasAllowed::Allowed and we're not quiescing + // - SagasAllowed::DisallowedQuiesce and we're now quiescing + false + } + } + }); } - /// Returns whether sagas are fully quiesced - pub fn is_fully_quiesced(&self) -> bool { - self.inner.borrow().is_fully_quiesced() + /// Returns whether sagas are fully drained + /// + /// Note that this state can change later if new sagas get assigned to this + /// Nexus. + pub fn is_fully_drained(&self) -> bool { + self.inner.borrow().is_fully_drained() } - /// Wait for sagas to be quiesced - pub async fn wait_for_quiesced(&self) { - let _ = - self.inner.subscribe().wait_for(|q| q.is_fully_quiesced()).await; + /// Wait for sagas to become drained + /// + /// Note that new sagas can still be assigned to this Nexus, resulting in it + /// no longer being fully drained. + pub async fn wait_for_drained(&self) { + let _ = self.inner.subscribe().wait_for(|q| q.is_fully_drained()).await; } /// Returns information about running sagas (involves a clone) @@ -180,13 +225,10 @@ impl SagaQuiesceHandle { /// Record an operation that might assign sagas to this Nexus /// - /// If reassignment is currently allowed, `f` will be invoked to potentially - /// re-assign sagas. `f` returns `(T, bool)`, where `T` is whatever value - /// you want and is returned back from this function. The boolean indicates - /// whether any sagas may have been assigned to the current Nexus. - /// - /// If reassignment is currently disallowed (because Nexus is quiescing), - /// `f` is not invoked and an error describing this condition is returned. + /// `f` will be invoked to potentially re-assign sagas. `f` returns `(T, + /// bool)`, where `T` is whatever value you want and is returned back from + /// this function. The boolean indicates whether any sagas may have been + /// assigned to the current Nexus. /// /// Only one of these may be outstanding at a time. It should not be called /// concurrently. This is easy today because this is only invoked by a few @@ -204,27 +246,22 @@ impl SagaQuiesceHandle { // mis-use (e.g., by forgetting to call `reassignment_done()`). But we keep // the other two functions around because it's easier to write tests against // those. - pub async fn reassign_if_possible( - &self, - f: F, - ) -> Result + pub async fn reassign_sagas(&self, f: F) -> T where F: AsyncFnOnce() -> (T, bool), { - let in_progress = self.reassignment_start()?; + let in_progress = self.reassignment_start(); let (result, maybe_reassigned) = f().await; in_progress.reassignment_done(maybe_reassigned); - Ok(result) + result } /// Record that we've begun a re-assignment operation. /// /// Only one of these may be outstanding at a time. The caller must call /// `reassignment_done()` before starting another one of these. - fn reassignment_start( - &self, - ) -> Result { - let okay = self.inner.send_if_modified(|q| { + fn reassignment_start(&self) -> SagaReassignmentInProgress { + self.inner.send_modify(|q| { assert!( !q.reassignment_pending, "two calls to reassignment_start() without intervening call \ @@ -232,21 +269,11 @@ impl SagaQuiesceHandle { reassign_if_possible()?)" ); - if q.new_sagas_allowed != SagasAllowed::Allowed { - return false; - } - q.reassignment_pending = true; - true }); - if okay { - info!(&self.log, "allowing saga re-assignment pass"); - Ok(SagaReassignmentInProgress { q: self.clone() }) - } else { - info!(&self.log, "disallowing saga re-assignment pass"); - Err(NoSagasAllowedError) - } + info!(&self.log, "starting saga re-assignment pass"); + SagaReassignmentInProgress { q: self.clone() } } /// Record that we've finished an operation that might assign new sagas to @@ -262,10 +289,10 @@ impl SagaQuiesceHandle { q.reassignment_pending = false; // If we may have assigned new sagas to ourselves, bump the - // generation number. We won't quiesce until a recovery pass has - // finished that *started* with this generation number. So this - // ensures that we won't quiesce until any sagas that may have been - // assigned to us have been recovered. + // generation number. We won't report being drained until a + // recovery pass has finished that *started* with this generation + // number. So this ensures that we won't report being drained until + // any sagas that may have been assigned to us have been recovered. if maybe_reassigned { q.reassignment_generation = q.reassignment_generation.next(); } @@ -344,7 +371,7 @@ impl SagaQuiesceHandle { /// Report that a saga has started running /// - /// This fails if sagas are quiesced. + /// This fails if sagas are quiescing or quiesced. /// /// Callers must also call `saga_completion_future()` to make sure it's /// recorded when this saga finishes. @@ -353,9 +380,18 @@ impl SagaQuiesceHandle { saga_id: steno::SagaId, saga_name: &steno::SagaName, ) -> Result { + let mut error: Option = None; let okay = self.inner.send_if_modified(|q| { - if q.new_sagas_allowed != SagasAllowed::Allowed { - return false; + match q.new_sagas_allowed { + SagasAllowed::Allowed => (), + SagasAllowed::DisallowedQuiesce => { + error = Some(NoSagasAllowedError::Quiescing); + return false; + } + SagasAllowed::DisallowedUnknown => { + error = Some(NoSagasAllowedError::Unknown); + return false; + } } q.sagas_pending @@ -379,12 +415,15 @@ impl SagaQuiesceHandle { init_finished: false, }) } else { + let error = + error.expect("error is always set when disallowing sagas"); info!( &self.log, "disallowing saga creation"; - "saga_id" => saga_id.to_string() + "saga_id" => saga_id.to_string(), + InlineErrorChain::new(&error), ); - Err(NoSagasAllowedError) + Err(error) } } @@ -403,8 +442,8 @@ impl SagaQuiesceHandle { /// sagas that might possibly have finished already.) /// /// Unlike `saga_created()`, this cannot fail as a result of sagas being - /// quiesced. That's because a saga that *needs* to be recovered is a - /// blocker for quiesce, whether it's running or not. So we need to + /// quiescing/quiesced. That's because a saga that *needs* to be recovered + /// is a blocker for quiesce, whether it's running or not. So we need to /// actually run and finish it. We do still want to prevent ourselves from /// taking on sagas needing recovery -- that's why we fail /// `reassign_if_possible()` when saga creation is disallowed. @@ -438,10 +477,13 @@ impl SagaQuiesceHandle { } impl SagaQuiesceInner { - /// Returns whether sagas are fully and permanently quiesced - pub fn is_fully_quiesced(&self) -> bool { + /// Returns whether sagas are fully drained + /// + /// This condition is not permanent. New sagas can be re-assigned to this + /// Nexus. + pub fn is_fully_drained(&self) -> bool { // No new sagas may be created - self.new_sagas_allowed == SagasAllowed::Disallowed + self.new_sagas_allowed == SagasAllowed::DisallowedQuiesce // and there are none currently running && self.sagas_pending.is_empty() // and there are none from a previous lifetime that still need to be @@ -640,32 +682,30 @@ mod test { // Set up a new handle. Complete the first saga recovery immediately so // that that doesn't block quiescing. let qq = SagaQuiesceHandle::new(log.clone()); + qq.set_quiescing(false); let recovery = qq.recovery_start(); recovery.recovery_done(true); - // It's still not fully quiesced because we haven't asked it to quiesce + // It's still not fully drained because we haven't asked it to quiesce // yet. assert!(qq.sagas_pending().is_empty()); - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); - // Now start quiescing. It should immediately report itself as - // quiesced. There's nothing asynchronous in this path. (It would be - // okay if there were.) - qq.quiesce(); - assert!(qq.is_fully_quiesced()); + // Now start quiescing. It should immediately report itself as drained. + // There's nothing asynchronous in this path. (It would be okay if + // there were.) + qq.set_quiescing(true); + assert!(qq.is_fully_drained()); - // It's not allowed to create sagas or begin re-assignment after - // quiescing has started, let alone finished. + // It's not allowed to create sagas after quiescing has started, let + // alone finished. let _ = qq .saga_create(*SAGA_ID, &SAGA_NAME) .expect_err("cannot create saga after quiescing started"); - let _ = qq - .reassignment_start() - .expect_err("cannot start re-assignment after quiescing started"); - // Waiting for quiesce should complete immediately. - qq.wait_for_quiesced().await; - assert!(qq.is_fully_quiesced()); + // Waiting for drain should complete immediately. + qq.wait_for_drained().await; + assert!(qq.is_fully_drained()); logctx.cleanup_successful(); } @@ -680,6 +720,7 @@ mod test { // Set up a new handle. Complete the first saga recovery immediately so // that that doesn't block quiescing. let qq = SagaQuiesceHandle::new(log.clone()); + qq.set_quiescing(false); let recovery = qq.recovery_start(); recovery.recovery_done(true); @@ -690,15 +731,15 @@ mod test { assert!(!qq.sagas_pending().is_empty()); // Start quiescing. - qq.quiesce(); - assert!(!qq.is_fully_quiesced()); + qq.set_quiescing(true); + assert!(!qq.is_fully_drained()); // Dropping the returned handle is as good as completing the saga. drop(started); assert!(qq.sagas_pending().is_empty()); - assert!(qq.is_fully_quiesced()); - qq.wait_for_quiesced().await; - assert!(qq.is_fully_quiesced()); + assert!(qq.is_fully_drained()); + qq.wait_for_drained().await; + assert!(qq.is_fully_drained()); logctx.cleanup_successful(); } @@ -715,6 +756,7 @@ mod test { // Set up a new handle. Complete the first saga recovery immediately so // that that doesn't block quiescing. let qq = SagaQuiesceHandle::new(log.clone()); + qq.set_quiescing(false); let recovery = qq.recovery_start(); recovery.recovery_done(true); @@ -730,8 +772,8 @@ mod test { assert!(!qq.sagas_pending().is_empty()); // Quiesce should block on the saga finishing. - qq.quiesce(); - assert!(!qq.is_fully_quiesced()); + qq.set_quiescing(true); + assert!(!qq.is_fully_drained()); // "Finish" the saga. tx.send(saga_result()).unwrap(); @@ -740,15 +782,15 @@ mod test { // able to notice that the saga finished yet. It's not that important // to assert this but it emphasizes that it really is waiting for // something to happen. - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); // The consumer's completion future ought to be unblocked now. let _ = consumer_completion.await; // Wait for quiescing to finish. This should be immediate. - qq.wait_for_quiesced().await; + qq.wait_for_drained().await; assert!(qq.sagas_pending().is_empty()); - assert!(qq.is_fully_quiesced()); + assert!(qq.is_fully_drained()); logctx.cleanup_successful(); } @@ -761,24 +803,25 @@ mod test { // Set up a new handle. let qq = SagaQuiesceHandle::new(log.clone()); + qq.set_quiescing(false); - // Quiesce should block on recovery having completed successfully once. - qq.quiesce(); - assert!(!qq.is_fully_quiesced()); + // Drain should block on recovery having completed successfully once. + qq.set_quiescing(true); + assert!(!qq.is_fully_drained()); // Act like the first recovery failed. Quiescing should still be // blocked. let recovery = qq.recovery_start(); recovery.recovery_done(false); - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); // Finish a normal saga recovery. Quiescing should proceed. // This happens synchronously (though it doesn't have to). let recovery = qq.recovery_start(); recovery.recovery_done(true); - assert!(qq.is_fully_quiesced()); - qq.wait_for_quiesced().await; - assert!(qq.is_fully_quiesced()); + assert!(qq.is_fully_drained()); + qq.wait_for_drained().await; + assert!(qq.is_fully_drained()); logctx.cleanup_successful(); } @@ -792,25 +835,25 @@ mod test { // Set up a new handle. Complete the first saga recovery immediately so // that that doesn't block quiescing. let qq = SagaQuiesceHandle::new(log.clone()); + qq.set_quiescing(false); let recovery = qq.recovery_start(); recovery.recovery_done(true); // Begin saga re-assignment. - let reassignment = - qq.reassignment_start().expect("can re-assign when not quiescing"); + let reassignment = qq.reassignment_start(); // Begin quiescing. - qq.quiesce(); + qq.set_quiescing(true); // Quiescing is blocked. - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); // When re-assignment finishes *without* having re-assigned anything, // then we're immediately all set. reassignment.reassignment_done(false); - assert!(qq.is_fully_quiesced()); - qq.wait_for_quiesced().await; - assert!(qq.is_fully_quiesced()); + assert!(qq.is_fully_drained()); + qq.wait_for_drained().await; + assert!(qq.is_fully_drained()); logctx.cleanup_successful(); } @@ -826,36 +869,36 @@ mod test { // Set up a new handle. Complete the first saga recovery immediately so // that that doesn't block quiescing. let qq = SagaQuiesceHandle::new(log.clone()); + qq.set_quiescing(false); let recovery = qq.recovery_start(); recovery.recovery_done(true); // Begin saga re-assignment. - let reassignment = - qq.reassignment_start().expect("can re-assign when not quiescing"); + let reassignment = qq.reassignment_start(); // Begin quiescing. - qq.quiesce(); + qq.set_quiescing(true); // Quiescing is blocked. - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); // When re-assignment finishes and re-assigned sagas, we're still // blocked. reassignment.reassignment_done(true); - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); // If the next recovery pass fails, we're still blocked. let recovery = qq.recovery_start(); recovery.recovery_done(false); - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); // Once a recovery pass succeeds, we're good. let recovery = qq.recovery_start(); recovery.recovery_done(true); - assert!(qq.is_fully_quiesced()); + assert!(qq.is_fully_drained()); - qq.wait_for_quiesced().await; - assert!(qq.is_fully_quiesced()); + qq.wait_for_drained().await; + assert!(qq.is_fully_drained()); logctx.cleanup_successful(); } @@ -874,18 +917,18 @@ mod test { // Set up a new handle. Complete the first saga recovery immediately so // that that doesn't block quiescing. let qq = SagaQuiesceHandle::new(log.clone()); + qq.set_quiescing(false); let recovery = qq.recovery_start(); recovery.recovery_done(true); // Begin saga re-assignment. - let reassignment = - qq.reassignment_start().expect("can re-assign when not quiescing"); + let reassignment = qq.reassignment_start(); // Begin quiescing. - qq.quiesce(); + qq.set_quiescing(true); // Quiescing is blocked. - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); // Start a recovery pass. let recovery = qq.recovery_start(); @@ -893,25 +936,25 @@ mod test { // When re-assignment finishes and re-assigned sagas, we're still // blocked. reassignment.reassignment_done(true); - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); // Even if this recovery pass succeeds, we're still blocked, because it // started before re-assignment finished and so isn't guaranteed to have // seen all the re-assigned sagas. recovery.recovery_done(true); - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); // If the next pass fails, we're still blocked. let recovery = qq.recovery_start(); recovery.recovery_done(false); - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); // Finally, we have a successful pass that unblocks us. let recovery = qq.recovery_start(); recovery.recovery_done(true); - assert!(qq.is_fully_quiesced()); - qq.wait_for_quiesced().await; - assert!(qq.is_fully_quiesced()); + assert!(qq.is_fully_drained()); + qq.wait_for_drained().await; + assert!(qq.is_fully_drained()); logctx.cleanup_successful(); } @@ -930,23 +973,23 @@ mod test { // Set up a new handle. Complete the first saga recovery immediately so // that that doesn't block quiescing. let qq = SagaQuiesceHandle::new(log.clone()); + qq.set_quiescing(false); let recovery = qq.recovery_start(); recovery.recovery_done(true); // Begin saga re-assignment. - let reassignment = - qq.reassignment_start().expect("can re-assign when not quiescing"); + let reassignment = qq.reassignment_start(); // Begin quiescing. - qq.quiesce(); + qq.set_quiescing(true); // Quiescing is blocked. - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); // When re-assignment finishes and re-assigned sagas, we're still // blocked because we haven't run recovery. reassignment.reassignment_done(true); - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); // Start a recovery pass. Pretend like we found something. let recovery = qq.recovery_start(); @@ -958,7 +1001,7 @@ mod test { recovery.recovery_done(true); // We're still not quiesced because that saga is still running. - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); // Finish the recovered saga. That should unblock quiesce. tx.send(saga_result()).unwrap(); @@ -966,8 +1009,8 @@ mod test { // The consumer's completion future ought to be unblocked now. let _ = consumer_completion.await; - qq.wait_for_quiesced().await; - assert!(qq.is_fully_quiesced()); + qq.wait_for_drained().await; + assert!(qq.is_fully_drained()); logctx.cleanup_successful(); } @@ -983,6 +1026,7 @@ mod test { // Set up a new handle. let qq = SagaQuiesceHandle::new(log.clone()); + qq.set_quiescing(false); // Start a recovery pass. Pretend like we found something. let recovery = qq.recovery_start(); let pending = recovery.record_saga_recovery(*SAGA_ID, &SAGA_NAME); @@ -993,20 +1037,66 @@ mod test { recovery.recovery_done(true); // Begin quiescing. - qq.quiesce(); + qq.set_quiescing(true); // Quiescing is blocked. - assert!(!qq.is_fully_quiesced()); + assert!(!qq.is_fully_drained()); - // Finish the recovered saga. That should unblock quiesce. + // Finish the recovered saga. That should unblock drain. tx.send(saga_result()).unwrap(); - qq.wait_for_quiesced().await; - assert!(qq.is_fully_quiesced()); + qq.wait_for_drained().await; + assert!(qq.is_fully_drained()); // The consumer's completion future ought to be unblocked now. let _ = consumer_completion.await; logctx.cleanup_successful(); } + + /// Tests that sagas are disabled at the start + #[tokio::test] + async fn test_quiesce_sagas_disabled_on_startup() { + let logctx = test_setup_log("test_quiesce_block_on_recovered_sagas"); + let log = &logctx.log; + + let qq = SagaQuiesceHandle::new(log.clone()); + assert!(!qq.is_fully_drained()); + let _ = qq + .saga_create(*SAGA_ID, &SAGA_NAME) + .expect_err("cannot create saga in initial state"); + qq.recovery_start().recovery_done(true); + qq.set_quiescing(true); + assert!(qq.is_fully_drained()); + let _ = qq + .saga_create(*SAGA_ID, &SAGA_NAME) + .expect_err("cannot create saga after quiescing"); + + // It's allowed to start a new re-assignment pass. That prevents us + // from being drained. + let reassignment = qq.reassignment_start(); + assert!(!qq.is_fully_drained()); + reassignment.reassignment_done(false); + // We're fully drained as soon as this one is done, since we know we + // didn't assign any sagas. + assert!(qq.is_fully_drained()); + + // Try again. This time, we'll act like we did reassign sagas. + let reassignment = qq.reassignment_start(); + assert!(!qq.is_fully_drained()); + reassignment.reassignment_done(true); + assert!(!qq.is_fully_drained()); + // Do a failed recovery pass. We still won't be fully drained. + let recovery = qq.recovery_start(); + assert!(!qq.is_fully_drained()); + recovery.recovery_done(false); + assert!(!qq.is_fully_drained()); + // Do a successful recovery pass. We'll be drained again. + let recovery = qq.recovery_start(); + assert!(!qq.is_fully_drained()); + recovery.recovery_done(true); + assert!(qq.is_fully_drained()); + + logctx.cleanup_successful(); + } } From a8862d59bc38862f809285bbed97224a6af628f0 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 20 Aug 2025 16:07:49 -0700 Subject: [PATCH 02/10] self-review + regenerate API spec --- dev-tools/omdb/src/bin/omdb/nexus/quiesce.rs | 44 +++++++++-- .../background/tasks/blueprint_execution.rs | 2 +- nexus/src/app/quiesce.rs | 21 ++--- nexus/types/src/deployment.rs | 2 +- openapi/nexus-internal.json | 79 ++++++++++++++++--- 5 files changed, 117 insertions(+), 31 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/nexus/quiesce.rs b/dev-tools/omdb/src/bin/omdb/nexus/quiesce.rs index b27c2c22fb..76c0a229c3 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus/quiesce.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus/quiesce.rs @@ -61,10 +61,13 @@ async fn quiesce_show( .context("fetching quiesce state")? .into_inner(); match quiesce.state { + QuiesceState::Undetermined => { + println!("has not yet determined if it is quiescing"); + } QuiesceState::Running => { println!("running normally (not quiesced, not quiescing)"); } - QuiesceState::WaitingForSagas { time_requested } => { + QuiesceState::DrainingSagas { time_requested } => { println!( "quiescing since {} ({} ago)", humantime::format_rfc3339_millis(time_requested.into()), @@ -72,9 +75,9 @@ async fn quiesce_show( ); println!("details: waiting for running sagas to finish"); } - QuiesceState::WaitingForDb { + QuiesceState::DrainingDb { time_requested, - duration_waiting_for_sagas, + duration_draining_sagas, .. } => { println!( @@ -87,13 +90,34 @@ async fn quiesce_show( ); println!( " previously: waiting for sagas took {}", - format_duration_ms(duration_waiting_for_sagas.into()), + format_duration_ms(duration_draining_sagas.into()), + ); + } + QuiesceState::RecordingQuiesce { + time_requested, + duration_draining_sagas, + duration_draining_db, + .. + } => { + println!( + "quiescing since {} ({} ago)", + humantime::format_rfc3339_millis(time_requested.into()), + format_time_delta(now - time_requested), + ); + println!( + " waiting for sagas took {}", + format_duration_ms(duration_draining_sagas.into()), + ); + println!( + " waiting for db quiesce took {}", + format_duration_ms(duration_draining_db.into()), ); } QuiesceState::Quiesced { time_quiesced, - duration_waiting_for_sagas, - duration_waiting_for_db, + duration_draining_sagas, + duration_draining_db, + duration_recording_quiesce, duration_total, .. } => { @@ -104,11 +128,15 @@ async fn quiesce_show( ); println!( " waiting for sagas took {}", - format_duration_ms(duration_waiting_for_sagas.into()), + format_duration_ms(duration_draining_sagas.into()), ); println!( " waiting for db quiesce took {}", - format_duration_ms(duration_waiting_for_db.into()), + format_duration_ms(duration_draining_db.into()), + ); + println!( + " recording quiesce took {}", + format_duration_ms(duration_recording_quiesce.into()), ); println!( " total quiesce time: {}", diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index b930fc5107..443ec5eec1 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -108,7 +108,7 @@ impl BlueprintExecutor { // blueprint before enabling sagas, since we already know if we're // quiescing or not); and (2) because we want to do it even if blueprint // execution is disabled. - match blueprint.nexus_quiescing(self.nexus_id) { + match blueprint.is_nexus_quiescing(self.nexus_id) { Ok(quiescing) => { debug!( &opctx.log, diff --git a/nexus/src/app/quiesce.rs b/nexus/src/app/quiesce.rs index a4f6e18fdf..17b28e7fc9 100644 --- a/nexus/src/app/quiesce.rs +++ b/nexus/src/app/quiesce.rs @@ -250,24 +250,27 @@ mod test { let QuiesceState::Quiesced { time_requested, time_quiesced, - duration_waiting_for_sagas, - duration_waiting_for_db, + duration_draining_sagas, + duration_draining_db, + duration_recording_quiesce, duration_total, } = status.state else { panic!("not quiesced"); }; let duration_total = Duration::from(duration_total); - let duration_waiting_for_sagas = - Duration::from(duration_waiting_for_sagas); - let duration_waiting_for_db = Duration::from(duration_waiting_for_db); + let duration_draining_sagas = Duration::from(duration_draining_sagas); + let duration_draining_db = Duration::from(duration_draining_db); + let duration_recording_quiesce = + Duration::from(duration_recording_quiesce); assert!(time_requested >= before); assert!(time_requested <= after); assert!(time_quiesced >= before); assert!(time_quiesced <= after); assert!(time_quiesced >= time_requested); - assert!(duration_total >= duration_waiting_for_sagas); - assert!(duration_total >= duration_waiting_for_db); + assert!(duration_total >= duration_draining_sagas); + assert!(duration_total >= duration_draining_db); + assert!(duration_total >= duration_recording_quiesce); assert!(duration_total <= (after - before).to_std().unwrap()); assert!(status.sagas_pending.is_empty()); assert!(status.db_claims.is_empty()); @@ -341,7 +344,7 @@ mod test { debug!(log, "found quiesce status"; "status" => ?quiesce_status); assert_matches!( quiesce_status.state, - QuiesceState::WaitingForSagas { .. } + QuiesceState::DrainingSagas { .. } ); assert!(quiesce_status.sagas_pending.contains_key(&demo_saga.saga_id)); // We should see at least one held database claim from the one we took @@ -404,7 +407,7 @@ mod test { .map_err(|e| CondCheckError::Failed(e))? .into_inner(); debug!(log, "found quiesce state"; "state" => ?rv); - if !matches!(rv.state, QuiesceState::WaitingForDb { .. }) { + if !matches!(rv.state, QuiesceState::DrainingDb { .. }) { return Err(CondCheckError::::NotYet); } assert!(rv.sagas_pending.is_empty()); diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 92fcbb2438..b6fd344adf 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -388,7 +388,7 @@ impl Blueprint { /// Returns whether the given Nexus instance should be quiescing or quiesced /// in preparation for handoff to the next generation - pub fn nexus_quiescing( + pub fn is_nexus_quiescing( &self, nexus_id: OmicronZoneUuid, ) -> Result { diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 0a6ae8c075..ea14b4c2d5 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -7477,8 +7477,23 @@ ] }, "QuiesceState": { - "description": "See [`QuiesceStatus`] for more on Nexus quiescing.\n\nAt any given time, Nexus is always in one of these states:\n\n```text Running (normal operation) | | quiesce starts v WaitingForSagas (no new sagas are allowed, but some are still running) | | no more sagas running v WaitingForDb (no sagas running; no new db connections may be acquired by Nexus at-large, but some are still held) | | no more database connections held v Quiesced (no sagas running, no database connections in use) ```\n\nQuiescing is (currently) a one-way trip: once a Nexus process starts quiescing, it will never go back to normal operation. It will never go back to an earlier stage, either.", + "description": "See [`QuiesceStatus`] for more on Nexus quiescing.\n\nAt any given time, Nexus is always in one of these states:\n\n```text Undetermined (have not loaded persistent state; don't know yet) | | load persistent state and find we're not quiescing v Running (normal operation) | | quiesce starts v DrainingSagas (no new sagas are allowed, but some are still running) | | no more sagas running v DrainingDb (no sagas running; no new db connections may be | acquired by Nexus at-large, but some are still held) | | no more database connections held v RecordingQuiesce (everything is quiesced aside from one connection being | used to record our final quiesced state) | | finish recording quiesce state in database v Quiesced (no sagas running, no database connections in use) ```\n\nQuiescing is (currently) a one-way trip: once a Nexus process starts quiescing, it will never go back to normal operation. It will never go back to an earlier stage, either.", "oneOf": [ + { + "description": "We have not yet determined based on persistent state if we're supposed to be quiesced or not", + "type": "object", + "properties": { + "state": { + "type": "string", + "enum": [ + "undetermined" + ] + } + }, + "required": [ + "state" + ] + }, { "description": "Normal operation", "type": "object", @@ -7495,7 +7510,7 @@ ] }, { - "description": "New sagas disallowed, but some are still running.", + "description": "New sagas disallowed, but some are still running on some Nexus instances", "type": "object", "properties": { "quiesce_details": { @@ -7513,7 +7528,7 @@ "state": { "type": "string", "enum": [ - "waiting_for_sagas" + "draining_sagas" ] } }, @@ -7523,13 +7538,13 @@ ] }, { - "description": "No sagas running, no new database connections may be claimed, but some database connections are still held.", + "description": "No sagas running on any Nexus instances\n\nNo new database connections may be claimed, but some database connections are still held.", "type": "object", "properties": { "quiesce_details": { "type": "object", "properties": { - "duration_waiting_for_sagas": { + "duration_draining_sagas": { "$ref": "#/components/schemas/Duration" }, "time_requested": { @@ -7538,14 +7553,50 @@ } }, "required": [ - "duration_waiting_for_sagas", + "duration_draining_sagas", "time_requested" ] }, "state": { "type": "string", "enum": [ - "waiting_for_db" + "draining_db" + ] + } + }, + "required": [ + "quiesce_details", + "state" + ] + }, + { + "description": "No database connections in use except to record the final \"quiesced\" state", + "type": "object", + "properties": { + "quiesce_details": { + "type": "object", + "properties": { + "duration_draining_db": { + "$ref": "#/components/schemas/Duration" + }, + "duration_draining_sagas": { + "$ref": "#/components/schemas/Duration" + }, + "time_requested": { + "type": "string", + "format": "date-time" + } + }, + "required": [ + "duration_draining_db", + "duration_draining_sagas", + "time_requested" + ] + }, + "state": { + "type": "string", + "enum": [ + "recording_quiesce" ] } }, @@ -7561,13 +7612,16 @@ "quiesce_details": { "type": "object", "properties": { - "duration_total": { + "duration_draining_db": { "$ref": "#/components/schemas/Duration" }, - "duration_waiting_for_db": { + "duration_draining_sagas": { "$ref": "#/components/schemas/Duration" }, - "duration_waiting_for_sagas": { + "duration_recording_quiesce": { + "$ref": "#/components/schemas/Duration" + }, + "duration_total": { "$ref": "#/components/schemas/Duration" }, "time_quiesced": { @@ -7580,9 +7634,10 @@ } }, "required": [ + "duration_draining_db", + "duration_draining_sagas", + "duration_recording_quiesce", "duration_total", - "duration_waiting_for_db", - "duration_waiting_for_sagas", "time_quiesced", "time_requested" ] From 356d60b7681c6382da883bfaeee566b2a5e07fda Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 20 Aug 2025 16:29:50 -0700 Subject: [PATCH 03/10] tests need to wait for sagas to be enabled --- nexus/src/app/mod.rs | 8 ++++++++ nexus/src/app/quiesce.rs | 10 +++++----- nexus/src/lib.rs | 19 +++++++++++++++++++ nexus/types/src/quiesce.rs | 12 ++++++++++++ 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index c8abeac6e0..c9c9e15161 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -621,6 +621,14 @@ impl Nexus { } } + // Waits for Nexus to determine whether sagas are supposed to be quiesced + // + // This is used by the test suite because most tests assume that sagas are + // operational as soon as they start. + pub(crate) async fn wait_for_saga_determination(&self) { + self.quiesce.sagas().wait_for_determination().await; + } + pub(crate) async fn external_tls_config( &self, tls_enabled: bool, diff --git a/nexus/src/app/quiesce.rs b/nexus/src/app/quiesce.rs index 17b28e7fc9..cd5e3242ce 100644 --- a/nexus/src/app/quiesce.rs +++ b/nexus/src/app/quiesce.rs @@ -92,13 +92,13 @@ impl NexusQuiesceHandle { } }); + // Immediately (synchronously) update the saga quiesce status. It's + // okay to do this even if there wasn't a change. + self.sagas.set_quiescing(quiescing); + if changed && quiescing { - // Immediately quiesce sagas. - self.sagas.set_quiescing(quiescing); // Asynchronously complete the rest of the quiesce process. - if quiescing { - tokio::spawn(do_quiesce(self.clone())); - } + tokio::spawn(do_quiesce(self.clone())); } } } diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index fc32a4824f..49010a87bf 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -138,6 +138,15 @@ impl Server { // the external server we're about to start. apictx.context.nexus.await_ip_allowlist_plumbing().await; + // Wait until Nexus has determined if sagas are supposed to be quiesced. + // This is not strictly necessary. The goal here is to prevent 503 + // errors to clients that reach this Nexus while it's starting up and + // before it's figured out that it doesn't need to quiesce. The risk of + // doing this is that Nexus gets stuck here, but that should only happen + // if it's unable to load the current blueprint, in which case + // something's pretty wrong and it's likely pretty stuck anyway. + apictx.context.nexus.wait_for_saga_determination().await; + // Launch the external server. let tls_config = apictx .context @@ -332,6 +341,16 @@ impl nexus_test_interface::NexusServer for Server { .await .expect("Could not initialize rack"); + // Now that we have a blueprint, determination of whether sagas are + // quiesced can complete. Wait for that so that tests can assume they + // can immediately kick off sagas. + internal_server + .apictx + .context + .nexus + .wait_for_saga_determination() + .await; + // Start the Nexus external API. Server::start(internal_server).await.unwrap() } diff --git a/nexus/types/src/quiesce.rs b/nexus/types/src/quiesce.rs index 378011be2c..072a4ca3b1 100644 --- a/nexus/types/src/quiesce.rs +++ b/nexus/types/src/quiesce.rs @@ -218,6 +218,18 @@ impl SagaQuiesceHandle { let _ = self.inner.subscribe().wait_for(|q| q.is_fully_drained()).await; } + /// Wait for the initial determination to be made about whether sagas are + /// allowed or not. + pub async fn wait_for_determination(&self) { + let _ = self + .inner + .subscribe() + .wait_for(|q| { + q.new_sagas_allowed != SagasAllowed::DisallowedUnknown + }) + .await; + } + /// Returns information about running sagas (involves a clone) pub fn sagas_pending(&self) -> IdOrdMap { self.inner.borrow().sagas_pending.clone() From 5f43b6042c6e8df23abba9ed3b0c51a2b0b53da1 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 21 Aug 2025 11:47:43 -0700 Subject: [PATCH 04/10] need to activate blueprint loader after inserting initial blueprint --- nexus/src/app/rack.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 6f71055e8d..55e8ecc360 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -740,7 +740,8 @@ impl super::Nexus { // We've potentially updated the list of DNS servers and the DNS // configuration for both internal and external DNS, plus the Silo - // certificates. Activate the relevant background tasks. + // certificates and target blueprint. Activate the relevant background + // tasks. for task in &[ &self.background_tasks.task_internal_dns_config, &self.background_tasks.task_internal_dns_servers, @@ -748,6 +749,7 @@ impl super::Nexus { &self.background_tasks.task_external_dns_servers, &self.background_tasks.task_external_endpoints, &self.background_tasks.task_inventory_collection, + &self.background_tasks.task_blueprint_loader, ] { self.background_tasks.activate(task); } From 127d5a805de428b0f0a0884d16e9868f130b4eb0 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 21 Aug 2025 14:38:51 -0700 Subject: [PATCH 05/10] add the "second" Nexus to the test suite blueprint; fix omdb tests --- common/src/address.rs | 26 +++- dev-tools/omdb/src/bin/omdb/db.rs | 12 +- dev-tools/omdb/tests/successes.out | 182 +++++++++++++++--------- dev-tools/omdb/tests/test_all_output.rs | 42 +++--- nexus/test-utils/src/lib.rs | 99 +++++++++---- 5 files changed, 238 insertions(+), 123 deletions(-) diff --git a/common/src/address.rs b/common/src/address.rs index 92863c44a4..0efc485ae8 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -373,7 +373,9 @@ pub fn get_64_subnet( /// /// The first address in the range is guaranteed to be no greater than the last /// address. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize)] +#[derive( + Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize, Ord, PartialOrd, +)] #[serde(untagged)] pub enum IpRange { V4(Ipv4Range), @@ -507,7 +509,16 @@ impl From for IpRange { /// /// The first address must be less than or equal to the last address. #[derive( - Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize, JsonSchema, + Clone, + Copy, + Debug, + PartialEq, + Eq, + Deserialize, + Serialize, + JsonSchema, + PartialOrd, + Ord, )] #[serde(try_from = "AnyIpv4Range")] pub struct Ipv4Range { @@ -571,7 +582,16 @@ impl TryFrom for Ipv4Range { /// /// The first address must be less than or equal to the last address. #[derive( - Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize, JsonSchema, + PartialOrd, + Ord, + Clone, + Copy, + Debug, + PartialEq, + Eq, + Deserialize, + Serialize, + JsonSchema, )] #[serde(try_from = "AnyIpv6Range")] pub struct Ipv6Range { diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 8290739a73..1ca878908b 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -5016,7 +5016,7 @@ async fn cmd_db_dns_diff( // Load the added and removed items. use nexus_db_schema::schema::dns_name::dsl; - let added = dsl::dns_name + let mut added = dsl::dns_name .filter(dsl::dns_zone_id.eq(zone.id)) .filter(dsl::version_added.eq(version.version)) .limit(i64::from(u32::from(limit))) @@ -5026,7 +5026,7 @@ async fn cmd_db_dns_diff( .context("loading added names")?; check_limit(&added, limit, || "loading added names"); - let removed = dsl::dns_name + let mut removed = dsl::dns_name .filter(dsl::dns_zone_id.eq(zone.id)) .filter(dsl::version_removed.eq(version.version)) .limit(i64::from(u32::from(limit))) @@ -5042,6 +5042,11 @@ async fn cmd_db_dns_diff( ); println!(""); + // This is kind of stupid-expensive, but there aren't a lot of records + // here and it's helpful for this output to be stable. + added.sort_by_cached_key(|k| format!("{} {:?}", k.name, k.records())); + removed.sort_by_cached_key(|k| format!("{} {:?}", k.name, k.records())); + for a in added { print_name("+", &a.name, a.records().context("parsing records")); } @@ -5097,7 +5102,8 @@ async fn cmd_db_dns_names( } }); - for (name, records) in names { + for (name, mut records) in names { + records.sort(); print_name("", &name, Ok(records)); } } diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 82577f26f5..bb8f36d4ef 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -32,7 +32,9 @@ changes: names added: 3, names removed: 0 + @ NS ns1.oxide-dev.test + ns1 AAAA ::1 -+ test-suite-silo.sys A 127.0.0.1 ++ test-suite-silo.sys (records: 2) ++ A 127.0.0.1 ++ AAAA 100::1 --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable @@ -46,7 +48,9 @@ External zone: oxide-dev.test NAME RECORDS @ NS ns1.oxide-dev.test ns1 AAAA ::1 - test-suite-silo.sys A 127.0.0.1 + test-suite-silo.sys (records: 2) + A 127.0.0.1 + AAAA 100::1 --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable @@ -489,15 +493,21 @@ task: "nat_garbage_collector" task: "blueprint_loader" configured period: every m s - last completed activation: , triggered by a periodic timer firing + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms - last completion reported error: failed to read target blueprint: Internal Error: no target blueprint set + target blueprint: ............. + execution: disabled + created at: + status: first target blueprint task: "blueprint_executor" configured period: every m - last completed activation: , triggered by a periodic timer firing + last completed activation: , triggered by a dependent task completing started at (s ago) and ran for ms - last completion reported error: no blueprint + target blueprint: ............. + execution: disabled + status: (no event report found) + error: (none) task: "abandoned_vmm_reaper" configured period: every m @@ -531,7 +541,18 @@ task: "blueprint_rendezvous" configured period: every m last completed activation: , triggered by a dependent task completing started at (s ago) and ran for ms - last completion reported error: no blueprint + target blueprint: ............. + inventory collection: ..................... + debug_dataset rendezvous counts: + num_inserted: 0 + num_already_exist: 0 + num_not_in_inventory: 0 + num_tombstoned: 0 + num_already_tombstoned: 0 + crucible_dataset rendezvous counts: + num_inserted: 0 + num_already_exist: 0 + num_not_in_inventory: 0 task: "chicken_switches_watcher" configured period: every s @@ -541,9 +562,9 @@ warning: unknown background task: "chicken_switches_watcher" (don't know how to task: "crdb_node_id_collector" configured period: every m - last completed activation: , triggered by a periodic timer firing + last completed activation: , triggered by a dependent task completing started at (s ago) and ran for ms - last completion reported error: no blueprint +warning: unknown background task: "crdb_node_id_collector" (don't know how to interpret details: Object {"errors": Array [Object {"err": String("failed to fetch node ID for zone ..................... at http://[::1]:REDACTED_PORT: Communication Error: error sending request for url (http://[::1]:REDACTED_PORT/node/id): error sending request for url (http://[::1]:REDACTED_PORT/node/id): client error (Connect): tcp connect error: Connection refused (os error 146)"), "zone_id": String(".....................")}], "nsuccess": Number(0)}) task: "decommissioned_disk_cleaner" configured period: every m @@ -844,16 +865,22 @@ stdout: task: "blueprint_loader" configured period: every m s currently executing: no - last completed activation: , triggered by a periodic timer firing + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms - last completion reported error: failed to read target blueprint: Internal Error: no target blueprint set + target blueprint: ............. + execution: disabled + created at: + status: first target blueprint task: "blueprint_executor" configured period: every m currently executing: no - last completed activation: , triggered by a periodic timer firing + last completed activation: , triggered by a dependent task completing started at (s ago) and ran for ms - last completion reported error: no blueprint + target blueprint: ............. + execution: disabled + status: (no event report found) + error: (none) --------------------------------------------- stderr: @@ -1001,16 +1028,22 @@ task: "nat_garbage_collector" task: "blueprint_loader" configured period: every m s currently executing: no - last completed activation: , triggered by a periodic timer firing + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms - last completion reported error: failed to read target blueprint: Internal Error: no target blueprint set + target blueprint: ............. + execution: disabled + created at: + status: first target blueprint task: "blueprint_executor" configured period: every m currently executing: no - last completed activation: , triggered by a periodic timer firing + last completed activation: , triggered by a dependent task completing started at (s ago) and ran for ms - last completion reported error: no blueprint + target blueprint: ............. + execution: disabled + status: (no event report found) + error: (none) task: "abandoned_vmm_reaper" configured period: every m @@ -1049,7 +1082,18 @@ task: "blueprint_rendezvous" currently executing: no last completed activation: , triggered by a dependent task completing started at (s ago) and ran for ms - last completion reported error: no blueprint + target blueprint: ............. + inventory collection: ..................... + debug_dataset rendezvous counts: + num_inserted: 0 + num_already_exist: 0 + num_not_in_inventory: 0 + num_tombstoned: 0 + num_already_tombstoned: 0 + crucible_dataset rendezvous counts: + num_inserted: 0 + num_already_exist: 0 + num_not_in_inventory: 0 task: "chicken_switches_watcher" configured period: every s @@ -1061,9 +1105,9 @@ warning: unknown background task: "chicken_switches_watcher" (don't know how to task: "crdb_node_id_collector" configured period: every m currently executing: no - last completed activation: , triggered by a periodic timer firing + last completed activation: , triggered by a dependent task completing started at (s ago) and ran for ms - last completion reported error: no blueprint +warning: unknown background task: "crdb_node_id_collector" (don't know how to interpret details: Object {"errors": Array [Object {"err": String("failed to fetch node ID for zone ..................... at http://[::1]:REDACTED_PORT: Communication Error: error sending request for url (http://[::1]:REDACTED_PORT/node/id): error sending request for url (http://[::1]:REDACTED_PORT/node/id): client error (Connect): tcp connect error: Connection refused (os error 146)"), "zone_id": String(".....................")}], "nsuccess": Number(0)}) task: "decommissioned_disk_cleaner" configured period: every m @@ -1355,53 +1399,6 @@ task: "webhook_deliverator" stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ ============================================= -EXECUTING COMMAND: omdb ["nexus", "chicken-switches", "show", "current"] -termination: Exited(0) ---------------------------------------------- -stdout: -No chicken switches enabled ---------------------------------------------- -stderr: -note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ -============================================= -EXECUTING COMMAND: omdb ["-w", "nexus", "chicken-switches", "set", "--planner-enabled", "true"] -termination: Exited(0) ---------------------------------------------- -stdout: -chicken switches updated to version 1: - planner enabled: true - planner switches: - add zones with mupdate override: true ---------------------------------------------- -stderr: -note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ -============================================= -EXECUTING COMMAND: omdb ["-w", "nexus", "chicken-switches", "set", "--add-zones-with-mupdate-override", "false"] -termination: Exited(0) ---------------------------------------------- -stdout: -chicken switches updated to version 2: - planner enabled: true (unchanged) - planner switches: - * add zones with mupdate override: true -> false ---------------------------------------------- -stderr: -note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ -============================================= -EXECUTING COMMAND: omdb ["nexus", "chicken-switches", "show", "current"] -termination: Exited(0) ---------------------------------------------- -stdout: -Reconfigurator chicken switches: - version: 2 - modified time: - planner enabled: true - planner switches: - add zones with mupdate override: false ---------------------------------------------- -stderr: -note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ -============================================= EXECUTING COMMAND: omdb ["nexus", "sagas", "list"] termination: Exited(0) --------------------------------------------- @@ -1538,6 +1535,7 @@ parent: oxp_...................../crypt/zone/oxz_external_dns_..................... ..................... in service none none off oxp_...................../crypt/zone/oxz_internal_dns_..................... ..................... in service none none off oxp_...................../crypt/zone/oxz_nexus_..................... ..................... in service none none off + oxp_...................../crypt/zone/oxz_nexus_..................... ..................... in service none none off oxp_...................../crypt/zone/oxz_ntp_..................... ..................... in service none none off @@ -1552,6 +1550,7 @@ parent: external_dns ..................... install dataset in service ::1 internal_dns ..................... install dataset in service ::1 nexus ..................... install dataset in service ::ffff:127.0.0.1 + nexus ..................... install dataset in service ::1 COCKROACHDB SETTINGS: @@ -1662,6 +1661,7 @@ parent: oxp_...................../crypt/zone/oxz_external_dns_..................... ..................... in service none none off oxp_...................../crypt/zone/oxz_internal_dns_..................... ..................... in service none none off oxp_...................../crypt/zone/oxz_nexus_..................... ..................... in service none none off + oxp_...................../crypt/zone/oxz_nexus_..................... ..................... in service none none off oxp_...................../crypt/zone/oxz_ntp_..................... ..................... in service none none off @@ -1676,6 +1676,7 @@ parent: external_dns ..................... install dataset in service ::1 internal_dns ..................... install dataset in service ::1 nexus ..................... install dataset in service ::ffff:127.0.0.1 + nexus ..................... install dataset in service ::1 COCKROACHDB SETTINGS: @@ -1739,6 +1740,53 @@ stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ Error: `blueprint2_id` was not specified and blueprint1 has no parent ============================================= +EXECUTING COMMAND: omdb ["nexus", "chicken-switches", "show", "current"] +termination: Exited(0) +--------------------------------------------- +stdout: +No chicken switches enabled +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["-w", "nexus", "chicken-switches", "set", "--planner-enabled", "true"] +termination: Exited(0) +--------------------------------------------- +stdout: +chicken switches updated to version 1: + planner enabled: true + planner switches: + add zones with mupdate override: true +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["-w", "nexus", "chicken-switches", "set", "--add-zones-with-mupdate-override", "false"] +termination: Exited(0) +--------------------------------------------- +stdout: +chicken switches updated to version 2: + planner enabled: true (unchanged) + planner switches: + * add zones with mupdate override: true -> false +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "chicken-switches", "show", "current"] +termination: Exited(0) +--------------------------------------------- +stdout: +Reconfigurator chicken switches: + version: 2 + modified time: + planner enabled: true + planner switches: + add zones with mupdate override: false +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= EXECUTING COMMAND: omdb ["reconfigurator", "export", ""] termination: Exited(0) --------------------------------------------- diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index a46f218472..2dc7bdbb2a 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -208,27 +208,6 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { &["nexus", "background-tasks", "show", "dns_internal"], &["nexus", "background-tasks", "show", "dns_external"], &["nexus", "background-tasks", "show", "all"], - // chicken switches: show and set - &["nexus", "chicken-switches", "show", "current"], - &[ - "-w", - "nexus", - "chicken-switches", - "set", - "--planner-enabled", - "true", - ], - &[ - "-w", - "nexus", - "chicken-switches", - "set", - "--add-zones-with-mupdate-override", - "false", - ], - // After the set commands above, we should see chicken switches - // populated. - &["nexus", "chicken-switches", "show", "current"], &["nexus", "sagas", "list"], &["--destructive", "nexus", "sagas", "demo-create"], &["nexus", "sagas", "list"], @@ -251,6 +230,27 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { ], // This one should fail because it has no parent. &["nexus", "blueprints", "diff", &initial_blueprint_id], + // chicken switches: show and set + &["nexus", "chicken-switches", "show", "current"], + &[ + "-w", + "nexus", + "chicken-switches", + "set", + "--planner-enabled", + "true", + ], + &[ + "-w", + "nexus", + "chicken-switches", + "set", + "--add-zones-with-mupdate-override", + "false", + ], + // After the set commands above, we should see chicken switches + // populated. + &["nexus", "chicken-switches", "show", "current"], &["reconfigurator", "export", tmppath.as_str()], // We can't easily test the sled agent output because that's only // provided by a real sled agent, which is not available in the diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index e61b281536..01b4e83ee7 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -303,8 +303,6 @@ pub fn load_test_config() -> NexusConfig { // - the CockroachDB TCP listen port be 0, and // - if the log will go to a file then the path must be the sentinel value // "UNUSED". - // - each Nexus created for testing gets its own id so they don't see each - // others sagas and try to recover them // // (See LogContext::new() for details.) Given these restrictions, it may // seem barely worth reading a config file at all. However, developers can @@ -312,10 +310,8 @@ pub fn load_test_config() -> NexusConfig { // configuration options, we expect many of those can be usefully configured // (and reconfigured) for the test suite. let config_file_path = Utf8Path::new("tests/config.test.toml"); - let mut config = NexusConfig::from_file(config_file_path) - .expect("failed to load config.test.toml"); - config.deployment.id = OmicronZoneUuid::new_v4(); - config + NexusConfig::from_file(config_file_path) + .expect("failed to load config.test.toml") } pub async fn test_setup( @@ -835,47 +831,97 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { 0, ); - let mac = self - .rack_init_builder - .mac_addrs - .next() - .expect("ran out of MAC addresses"); - let external_address = - self.config.deployment.dropshot_external.dropshot.bind_address.ip(); - let nexus_id = self.config.deployment.id; self.rack_init_builder.add_service_to_dns( - nexus_id, + self.config.deployment.id, address, ServiceName::Nexus, ); + self.record_nexus_zone(self.config.clone(), address, 0); + self.nexus_internal = Some(nexus_internal); + self.nexus_internal_addr = Some(nexus_internal_addr); + + // Besides the Nexus that we just started, add an entry in the blueprint + // for the Nexus that developers can start using + // nexus/examples/config-second.toml. + // + // The details in its BlueprintZoneType mostly don't matter because + // those are mostly used for DNS (which we don't usually need here) and + // to tell sled agent how to start the zone (which isn't what's going on + // here). But it does need to be present for it to be able to determine + // on startup if it needs to quiesce. + let second_nexus_config_path = + Utf8Path::new(env!("CARGO_MANIFEST_DIR")) + .join("../examples/config-second.toml"); + let mut second_nexus_config = + NexusConfig::from_file(&second_nexus_config_path).unwrap(); + // Okay, this is particularly awful. The system does not allow multiple + // zones to use the same external IP -- makes sense. But it actually is + // fine here because the IP is localhost and we're using host + // networking, and we've already ensured that the ports will be unique. + // Avoid tripping up the validation by using some other IP. This won't + // be used for anything. Pick something that's not in use anywhere + // else. This range is guaranteed by RFC 6666 to discard traffic. + second_nexus_config + .deployment + .dropshot_external + .dropshot + .bind_address + .set_ip("100::1".parse().unwrap()); + let SocketAddr::V6(second_internal_address) = + second_nexus_config.deployment.dropshot_internal.bind_address + else { + panic!( + "expected IPv6 address for dropshot_internal in \ + nexus/examples/config-second.toml" + ); + }; + self.record_nexus_zone(second_nexus_config, second_internal_address, 1); + Ok(()) + } + fn record_nexus_zone( + &mut self, + config: NexusConfig, + internal_address: SocketAddrV6, + which: usize, + ) { + let id = config.deployment.id; + let mac = self + .rack_init_builder + .mac_addrs + .next() + .expect("ran out of MAC addresses"); self.blueprint_zones.push(BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, - id: nexus_id, + id, filesystem_pool: ZpoolName::new_external(ZpoolUuid::new_v4()), zone_type: BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { - external_dns_servers: self - .config + external_dns_servers: config .deployment .external_dns_servers .clone(), external_ip: OmicronZoneExternalFloatingIp { id: ExternalIpUuid::new_v4(), - ip: external_address, + ip: config + .deployment + .dropshot_external + .dropshot + .bind_address + .ip(), }, - external_tls: self.config.deployment.dropshot_external.tls, - internal_address: address, + external_tls: config.deployment.dropshot_external.tls, + internal_address, nic: NetworkInterface { id: Uuid::new_v4(), ip: NEXUS_OPTE_IPV4_SUBNET - .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1) + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES + 1 + which) .unwrap() .into(), kind: NetworkInterfaceKind::Service { - id: nexus_id.into_untyped_uuid(), + id: id.into_untyped_uuid(), }, mac, - name: format!("nexus-{}", nexus_id).parse().unwrap(), + name: format!("nexus-{}", id).parse().unwrap(), primary: true, slot: 0, subnet: (*NEXUS_OPTE_IPV4_SUBNET).into(), @@ -886,11 +932,6 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { }), image_source: BlueprintZoneImageSource::InstallDataset, }); - - self.nexus_internal = Some(nexus_internal); - self.nexus_internal_addr = Some(nexus_internal_addr); - - Ok(()) } pub async fn populate_internal_dns(&mut self) { From fe85a10acfd930e668640e657348f9ccbba2df27 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Fri, 22 Aug 2025 16:27:02 -0700 Subject: [PATCH 06/10] review feedback --- nexus/reconfigurator/execution/src/lib.rs | 5 +- nexus/src/app/quiesce.rs | 21 +++++-- nexus/types/src/quiesce.rs | 67 +++++++++++------------ 3 files changed, 51 insertions(+), 42 deletions(-) diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index b9f83912bc..a4f802b3c9 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -615,8 +615,9 @@ fn register_reassign_sagas_step<'a>( Ok(saga_quiesce .reassign_sagas(async || { // For any expunged Nexus zones, re-assign in-progress - // sagas to some other Nexus. If this fails for some - // reason, it doesn't affect anything else. + // sagas to `nexus_id` (which, in practice, is + // ourselves). If this fails for some reason, it + // doesn't affect anything else. let sec_id = nexus_db_model::SecId::from(nexus_id); let reassigned = sagas::reassign_sagas_from_expunged( opctx, datastore, blueprint, sec_id, diff --git a/nexus/src/app/quiesce.rs b/nexus/src/app/quiesce.rs index cd5e3242ce..3b36ec9799 100644 --- a/nexus/src/app/quiesce.rs +++ b/nexus/src/app/quiesce.rs @@ -80,13 +80,22 @@ impl NexusQuiesceHandle { *q = new_state; true } - QuiesceState::Running if quiescing => { - info!(&self.log, "quiesce starting"); - *q = new_state; - true + QuiesceState::Running => { + if quiescing { + info!(&self.log, "quiesce starting"); + *q = new_state; + true + } else { + // We're not quiescing and not being asked to quiesce. + // Nothing to do. + false + } } - _ => { - // All other cases are either impossible or no-ops. + QuiesceState::DrainingSagas { .. } + | QuiesceState::DrainingDb { .. } + | QuiesceState::RecordingQuiesce { .. } + | QuiesceState::Quiesced { .. } => { + // Once we start quiescing, we never go back. false } } diff --git a/nexus/types/src/quiesce.rs b/nexus/types/src/quiesce.rs index 072a4ca3b1..7c2b3ad42d 100644 --- a/nexus/types/src/quiesce.rs +++ b/nexus/types/src/quiesce.rs @@ -163,14 +163,13 @@ impl SagaQuiesceHandle { /// cannot then re-enable sagas. pub fn set_quiescing(&self, quiescing: bool) { self.inner.send_if_modified(|q| { - let new_state = if quiescing { - SagasAllowed::DisallowedQuiesce - } else { - SagasAllowed::Allowed - }; - match q.new_sagas_allowed { SagasAllowed::DisallowedUnknown => { + let new_state = if quiescing { + SagasAllowed::DisallowedQuiesce + } else { + SagasAllowed::Allowed + }; info!( &self.log, "initial quiesce state"; @@ -179,23 +178,25 @@ impl SagaQuiesceHandle { q.new_sagas_allowed = new_state; true } - SagasAllowed::Allowed if quiescing => { - info!(&self.log, "saga quiesce starting"); - q.new_sagas_allowed = SagasAllowed::DisallowedQuiesce; - true + SagasAllowed::Allowed => { + if quiescing { + info!(&self.log, "saga quiesce starting"); + q.new_sagas_allowed = SagasAllowed::DisallowedQuiesce; + true + } else { + false + } } - SagasAllowed::DisallowedQuiesce if !quiescing => { - // This should be impossible. Report a problem. - error!( - &self.log, - "asked to stop quiescing after previously quiescing" - ); - false - } - _ => { - // There's no transition happening in these cases: - // - SagasAllowed::Allowed and we're not quiescing - // - SagasAllowed::DisallowedQuiesce and we're now quiescing + SagasAllowed::DisallowedQuiesce => { + if !quiescing { + // This should be impossible. Report a problem. + error!( + &self.log, + "asked to stop quiescing after previously quiescing" + ); + } + + // Either way, we're not changing anything. false } } @@ -393,7 +394,7 @@ impl SagaQuiesceHandle { saga_name: &steno::SagaName, ) -> Result { let mut error: Option = None; - let okay = self.inner.send_if_modified(|q| { + self.inner.send_if_modified(|q| { match q.new_sagas_allowed { SagasAllowed::Allowed => (), SagasAllowed::DisallowedQuiesce => { @@ -417,7 +418,15 @@ impl SagaQuiesceHandle { true }); - if okay { + if let Some(error) = error { + info!( + &self.log, + "disallowing saga creation"; + "saga_id" => saga_id.to_string(), + InlineErrorChain::new(&error), + ); + Err(error) + } else { let log = self.log.new(o!("saga_id" => saga_id.to_string())); info!(&log, "tracking newly created saga"); Ok(NewlyPendingSagaRef { @@ -426,16 +435,6 @@ impl SagaQuiesceHandle { saga_id, init_finished: false, }) - } else { - let error = - error.expect("error is always set when disallowing sagas"); - info!( - &self.log, - "disallowing saga creation"; - "saga_id" => saga_id.to_string(), - InlineErrorChain::new(&error), - ); - Err(error) } } From b8d3ee30a1196de33497fa7eab055404765fdb88 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Fri, 22 Aug 2025 17:03:18 -0700 Subject: [PATCH 07/10] fix tests on GNU/Linux --- dev-tools/omdb/tests/successes.out | 4 ++-- dev-tools/omdb/tests/test_all_output.rs | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index bb8f36d4ef..3c47bcc6c5 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -564,7 +564,7 @@ task: "crdb_node_id_collector" configured period: every m last completed activation: , triggered by a dependent task completing started at (s ago) and ran for ms -warning: unknown background task: "crdb_node_id_collector" (don't know how to interpret details: Object {"errors": Array [Object {"err": String("failed to fetch node ID for zone ..................... at http://[::1]:REDACTED_PORT: Communication Error: error sending request for url (http://[::1]:REDACTED_PORT/node/id): error sending request for url (http://[::1]:REDACTED_PORT/node/id): client error (Connect): tcp connect error: Connection refused (os error 146)"), "zone_id": String(".....................")}], "nsuccess": Number(0)}) +warning: unknown background task: "crdb_node_id_collector" (don't know how to interpret details: Object {"errors": Array [Object {"err": String("failed to fetch node ID for zone ..................... at http://[::1]:REDACTED_PORT: Communication Error: error sending request for url (http://[::1]:REDACTED_PORT/node/id): error sending request for url (http://[::1]:REDACTED_PORT/node/id): client error (Connect): tcp connect error: Connection refused (os error )"), "zone_id": String(".....................")}], "nsuccess": Number(0)}) task: "decommissioned_disk_cleaner" configured period: every m @@ -1107,7 +1107,7 @@ task: "crdb_node_id_collector" currently executing: no last completed activation: , triggered by a dependent task completing started at (s ago) and ran for ms -warning: unknown background task: "crdb_node_id_collector" (don't know how to interpret details: Object {"errors": Array [Object {"err": String("failed to fetch node ID for zone ..................... at http://[::1]:REDACTED_PORT: Communication Error: error sending request for url (http://[::1]:REDACTED_PORT/node/id): error sending request for url (http://[::1]:REDACTED_PORT/node/id): client error (Connect): tcp connect error: Connection refused (os error 146)"), "zone_id": String(".....................")}], "nsuccess": Number(0)}) +warning: unknown background task: "crdb_node_id_collector" (don't know how to interpret details: Object {"errors": Array [Object {"err": String("failed to fetch node ID for zone ..................... at http://[::1]:REDACTED_PORT: Communication Error: error sending request for url (http://[::1]:REDACTED_PORT/node/id): error sending request for url (http://[::1]:REDACTED_PORT/node/id): client error (Connect): tcp connect error: Connection refused (os error )"), "zone_id": String(".....................")}], "nsuccess": Number(0)}) task: "decommissioned_disk_cleaner" configured period: every m diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 2dc7bdbb2a..11db63156a 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -264,7 +264,9 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { .extra_variable_length( "cockroachdb_fingerprint", &initial_blueprint.cockroachdb_fingerprint, - ); + ) + // Error numbers vary between operating systems. + .field("os error", r"\d+"); let crdb_version = initial_blueprint.cockroachdb_setting_preserve_downgrade.to_string(); From a6f2f63b07005b46a0e159bfef1f85cd01bfe899 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Fri, 22 Aug 2025 17:43:43 -0700 Subject: [PATCH 08/10] fix end to end dns test --- dev-tools/omicron-dev/src/main.rs | 4 ++-- nexus/test-utils/src/lib.rs | 23 ++++++++++++++++++++++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/dev-tools/omicron-dev/src/main.rs b/dev-tools/omicron-dev/src/main.rs index 456154243d..6c8f22f947 100644 --- a/dev-tools/omicron-dev/src/main.rs +++ b/dev-tools/omicron-dev/src/main.rs @@ -95,8 +95,8 @@ impl RunAllArgs { println!("omicron-dev: services are running."); // Print out basic information about what was started. - // NOTE: The stdout strings here are not intended to be stable, but they are - // used by the test suite. + // NOTE: The stdout strings here are not intended to be stable, but they + // are used by the test suite. let addr = cptestctx.external_client.bind_address; println!("omicron-dev: nexus external API: {:?}", addr); println!( diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 01b4e83ee7..38ec064de8 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -839,7 +839,12 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { self.record_nexus_zone(self.config.clone(), address, 0); self.nexus_internal = Some(nexus_internal); self.nexus_internal_addr = Some(nexus_internal_addr); + Ok(()) + } + pub async fn configure_second_nexus(&mut self) { + let log = &self.logctx.log; + debug!(log, "Configuring second Nexus (not to run)"); // Besides the Nexus that we just started, add an entry in the blueprint // for the Nexus that developers can start using // nexus/examples/config-second.toml. @@ -876,7 +881,6 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { ); }; self.record_nexus_zone(second_nexus_config, second_internal_address, 1); - Ok(()) } fn record_nexus_zone( @@ -1656,6 +1660,7 @@ pub async fn omicron_dev_setup_with_config( sim::SimMode::Auto, None, extra_sled_agents, + true, ) .await) } @@ -1675,6 +1680,7 @@ pub async fn test_setup_with_config( sim_mode, initial_cert, extra_sled_agents, + false, ) .await } @@ -1685,6 +1691,7 @@ async fn setup_with_config_impl( sim_mode: sim::SimMode, initial_cert: Option, extra_sled_agents: u16, + second_nexus: bool, ) -> ControlPlaneTestContext { const STEP_TIMEOUT: Duration = Duration::from_secs(60); @@ -1818,6 +1825,20 @@ async fn setup_with_config_impl( ) .await; + if second_nexus { + builder + .init_with_steps( + vec![( + "configure_second_nexus", + Box::new(|builder| { + builder.configure_second_nexus().boxed() + }), + )], + STEP_TIMEOUT, + ) + .await; + } + // The first and second sled agents have special UUIDs, and any extra ones // after that are random. From b09b83f3470923bf00dcea052f18120b5aad2050 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Sat, 23 Aug 2025 14:38:37 -0700 Subject: [PATCH 09/10] fix omdb test --- dev-tools/omdb/tests/successes.out | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 3c47bcc6c5..a5cdb0c791 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -32,9 +32,7 @@ changes: names added: 3, names removed: 0 + @ NS ns1.oxide-dev.test + ns1 AAAA ::1 -+ test-suite-silo.sys (records: 2) -+ A 127.0.0.1 -+ AAAA 100::1 ++ test-suite-silo.sys A 127.0.0.1 --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable @@ -48,9 +46,7 @@ External zone: oxide-dev.test NAME RECORDS @ NS ns1.oxide-dev.test ns1 AAAA ::1 - test-suite-silo.sys (records: 2) - A 127.0.0.1 - AAAA 100::1 + test-suite-silo.sys A 127.0.0.1 --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable @@ -1535,7 +1531,6 @@ parent: oxp_...................../crypt/zone/oxz_external_dns_..................... ..................... in service none none off oxp_...................../crypt/zone/oxz_internal_dns_..................... ..................... in service none none off oxp_...................../crypt/zone/oxz_nexus_..................... ..................... in service none none off - oxp_...................../crypt/zone/oxz_nexus_..................... ..................... in service none none off oxp_...................../crypt/zone/oxz_ntp_..................... ..................... in service none none off @@ -1550,7 +1545,6 @@ parent: external_dns ..................... install dataset in service ::1 internal_dns ..................... install dataset in service ::1 nexus ..................... install dataset in service ::ffff:127.0.0.1 - nexus ..................... install dataset in service ::1 COCKROACHDB SETTINGS: @@ -1661,7 +1655,6 @@ parent: oxp_...................../crypt/zone/oxz_external_dns_..................... ..................... in service none none off oxp_...................../crypt/zone/oxz_internal_dns_..................... ..................... in service none none off oxp_...................../crypt/zone/oxz_nexus_..................... ..................... in service none none off - oxp_...................../crypt/zone/oxz_nexus_..................... ..................... in service none none off oxp_...................../crypt/zone/oxz_ntp_..................... ..................... in service none none off @@ -1676,7 +1669,6 @@ parent: external_dns ..................... install dataset in service ::1 internal_dns ..................... install dataset in service ::1 nexus ..................... install dataset in service ::ffff:127.0.0.1 - nexus ..................... install dataset in service ::1 COCKROACHDB SETTINGS: From 912ea8f2040ce38e9df0483e29f6049de6c3a6ad Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Mon, 25 Aug 2025 10:53:24 -0700 Subject: [PATCH 10/10] add test that Nexus quiesces when reading a blueprint saying so --- nexus/tests/integration_tests/mod.rs | 1 + nexus/tests/integration_tests/quiesce.rs | 139 +++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 nexus/tests/integration_tests/quiesce.rs diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 497585cceb..c0ea06dcb7 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -35,6 +35,7 @@ mod pantry; mod password_login; mod probe; mod projects; +mod quiesce; mod quotas; mod rack; mod role_assignments; diff --git a/nexus/tests/integration_tests/quiesce.rs b/nexus/tests/integration_tests/quiesce.rs new file mode 100644 index 0000000000..d5ef6bb7e1 --- /dev/null +++ b/nexus/tests/integration_tests/quiesce.rs @@ -0,0 +1,139 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::{Context, anyhow}; +use nexus_auth::context::OpContext; +use nexus_client::types::QuiesceState; +use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; +use nexus_reconfigurator_planning::planner::PlannerRng; +use nexus_reconfigurator_preparation::PlanningInputFromDb; +use nexus_test_interface::NexusServer; +use nexus_test_utils_macros::nexus_test; +use nexus_types::deployment::BlueprintTargetSet; +use nexus_types::deployment::PlannerChickenSwitches; +use omicron_common::api::external::Error; +use omicron_test_utils::dev::poll::CondCheckError; +use omicron_test_utils::dev::poll::wait_for_condition; +use omicron_uuid_kinds::GenericUuid; +use std::time::Duration; + +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + +/// Tests that Nexus quiesces when the blueprint says that it should +#[nexus_test] +async fn test_quiesce(cptestctx: &ControlPlaneTestContext) { + let log = &cptestctx.logctx.log; + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests(log.clone(), datastore.clone()); + let nexus_internal_url = format!( + "http://{}", + cptestctx.server.get_http_server_internal_address().await + ); + let nexus_client = + nexus_client::Client::new(&nexus_internal_url, log.clone()); + + // Collect what we need to modify the blueprint. + let collection = wait_for_condition( + || async { + let collection = datastore + .inventory_get_latest_collection(&opctx) + .await + .map_err(CondCheckError::Failed)?; + match collection { + Some(s) => Ok(s), + None => Err(CondCheckError::::NotYet), + } + }, + &Duration::from_secs(1), + &Duration::from_secs(60), + ) + .await + .expect("initial inventory collection"); + + let chicken_switches = datastore + .reconfigurator_chicken_switches_get_latest(&opctx) + .await + .expect("obtained latest chicken switches") + .map_or_else(PlannerChickenSwitches::default, |cs| { + cs.switches.planner_switches + }); + let planning_input = PlanningInputFromDb::assemble( + &opctx, + &datastore, + chicken_switches, + None, + ) + .await + .expect("planning input"); + let target_blueprint = nexus + .blueprint_target_view(&opctx) + .await + .expect("fetch current target config"); + let blueprint1 = nexus + .blueprint_view(&opctx, *target_blueprint.target_id.as_untyped_uuid()) + .await + .expect("fetch current target blueprint"); + + // Now, update the target blueprint to reflect that Nexus should quiesce. + // We don't need it to be enabled to still reflect quiescing. + let mut builder = BlueprintBuilder::new_based_on( + log, + &blueprint1, + &planning_input, + &collection, + "test-suite", + PlannerRng::from_entropy(), + ) + .expect("creating BlueprintBuilder"); + builder + .set_nexus_generation( + blueprint1.nexus_generation, + blueprint1.nexus_generation.next(), + ) + .expect("failed to set blueprint's Nexus generation"); + let blueprint2 = builder.build(); + nexus + .blueprint_import(&opctx, blueprint2.clone()) + .await + .expect("importing new blueprint"); + nexus + .blueprint_target_set( + &opctx, + BlueprintTargetSet { enabled: false, target_id: blueprint2.id }, + ) + .await + .expect("setting new target"); + + // Wait for Nexus to quiesce. + let _ = wait_for_condition( + || async { + let quiesce = nexus_client + .quiesce_get() + .await + .context("fetching quiesce state") + .map_err(CondCheckError::Failed)? + .into_inner(); + eprintln!("quiesce state: {:#?}\n", quiesce); + match quiesce.state { + QuiesceState::Undetermined => { + Err(CondCheckError::Failed(anyhow!( + "quiesce state should have been determined before \ + test started" + ))) + } + QuiesceState::Running => Err(CondCheckError::NotYet), + QuiesceState::DrainingSagas { .. } + | QuiesceState::DrainingDb { .. } + | QuiesceState::RecordingQuiesce { .. } + | QuiesceState::Quiesced { .. } => Ok(()), + } + }, + &Duration::from_millis(50), + &Duration::from_secs(30), + ) + .await + .expect("Nexus should have quiesced"); +}