diff --git a/Cargo.lock b/Cargo.lock index f61caae2532..4cf350b9dc2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -131,7 +131,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -142,7 +142,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -1704,7 +1704,7 @@ version = "3.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] @@ -2230,7 +2230,7 @@ source = "git+https://github.com/oxidecomputer/crucible?rev=7103cd3a3d7b0112d294 dependencies = [ "crucible-workspace-hack", "libc", - "num-derive 0.4.2", + "num-derive", "num-traits", "thiserror 2.0.18", ] @@ -3629,7 +3629,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -5760,7 +5760,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi 0.5.2", "libc", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -5837,7 +5837,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -6176,12 +6176,12 @@ dependencies = [ [[package]] name = "libscf-sys" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12f02d0eda38e8cc453c5ec5d49945545d8d9eb0e59cb2ce4152ba6518f373e7" +checksum = "d0d7bd6cfd9b5d32738cebd83a1b68060d96b1ca10d88bf9a5cb10dfac0f1cdf" dependencies = [ "libc", - "num-derive 0.3.3", + "num-derive", "num-traits", ] @@ -7969,7 +7969,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -8028,17 +8028,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" -[[package]] -name = "num-derive" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "876a53fff98e03a936a674b29568b0e605f06b29372c2489ff4de23f1949743d" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "num-derive" version = "0.4.2" @@ -11439,9 +11428,9 @@ dependencies = [ [[package]] name = "proptest" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37566cb3fdacef14c0737f9546df7cfeadbfbc9fef10991038bf5015d0c80532" +checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744" dependencies = [ "bit-set 0.8.0", "bit-vec 0.8.0", @@ -12494,7 +12483,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -12597,7 +12586,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -13702,6 +13691,32 @@ dependencies = [ "tufaceous-artifact", ] +[[package]] +name = "sled-agent-scrimlet-reconcilers" +version = "0.1.0" +dependencies = [ + "assert_matches", + "chrono", + "dpd-client 0.1.0 (git+https://github.com/oxidecomputer/dendrite?rev=d147f0925d14ed6f00eb37cb81f3a1bcbcb3c7f3)", + "dropshot 0.17.0", + "gateway-client", + "gateway-messages", + "gateway-test-utils", + "gateway-types", + "httpmock", + "mg-admin-client", + "omicron-common", + "omicron-test-utils", + "omicron-workspace-hack", + "reqwest 0.13.2", + "serde_json", + "sled-agent-types", + "slog", + "slog-error-chain", + "thiserror 2.0.18", + "tokio", +] + [[package]] name = "sled-agent-types" version = "0.1.0" @@ -14129,7 +14144,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -14743,7 +14758,7 @@ dependencies = [ "getrandom 0.4.1", "once_cell", "rustix 1.1.3", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -14763,7 +14778,7 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8c27177b12a6399ffc08b98f76f7c9a1f4fe9fc967c784c5a071fa8d93cf7e1" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -17112,7 +17127,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 3e5adc43e60..d9d389d8a12 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -147,6 +147,7 @@ members = [ "sled-agent/bootstrap-agent-lockstep-api", "sled-agent/bootstrap-agent-lockstep-types", "sled-agent/config-reconciler", + "sled-agent/scrimlet-reconcilers", "sled-agent/health-monitor", "sled-agent/measurements", "sled-agent/rack-setup", @@ -333,6 +334,7 @@ default-members = [ "sled-agent/bootstrap-agent-lockstep-api", "sled-agent/bootstrap-agent-lockstep-types", "sled-agent/config-reconciler", + "sled-agent/scrimlet-reconcilers", "sled-agent/health-monitor", "sled-agent/measurements", "sled-agent/rack-setup", @@ -727,7 +729,7 @@ propolis_api_types = { git = "https://github.com/oxidecomputer/propolis", rev = propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "bc489ddf0f38f75e0c194b86cf6f0de377f68845" } propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "bc489ddf0f38f75e0c194b86cf6f0de377f68845" } # NOTE: see above! -proptest = "1.7.0" +proptest = "1.11.0" qorb = "0.4.1" quote = "1.0" # Some dependencies still require rand 0.8.x. @@ -792,6 +794,7 @@ sled-agent-config-reconciler = { path = "sled-agent/config-reconciler" } sled-agent-health-monitor = { path = "sled-agent/health-monitor" } sled-agent-measurements = { path = "sled-agent/measurements" } sled-agent-rack-setup = { path = "sled-agent/rack-setup" } +sled-agent-scrimlet-reconcilers = { path = "sled-agent/scrimlet-reconcilers" } sled-agent-types = { path = "sled-agent/types" } sled-agent-types-versions = { path = "sled-agent/types/versions" } sled-agent-resolvable-files = { path = "sled-agent/resolvable-files" } diff --git a/sled-agent/scrimlet-reconcilers/Cargo.toml b/sled-agent/scrimlet-reconcilers/Cargo.toml new file mode 100644 index 00000000000..533574a1772 --- /dev/null +++ b/sled-agent/scrimlet-reconcilers/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "sled-agent-scrimlet-reconcilers" +version = "0.1.0" +edition.workspace = true +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +chrono.workspace = true +dpd-client.workspace = true +gateway-client.workspace = true +gateway-types.workspace = true +mg-admin-client.workspace = true +omicron-common.workspace = true +reqwest.workspace = true +sled-agent-types.workspace = true +slog.workspace = true +slog-error-chain.workspace = true +thiserror.workspace = true +tokio.workspace = true + +omicron-workspace-hack.workspace = true + +[dev-dependencies] +assert_matches.workspace = true +dropshot.workspace = true +gateway-messages.workspace = true +gateway-test-utils.workspace = true +httpmock.workspace = true +omicron-test-utils.workspace = true +serde_json.workspace = true + +[features] +testing = [] diff --git a/sled-agent/scrimlet-reconcilers/src/dpd_reconciler.rs b/sled-agent/scrimlet-reconcilers/src/dpd_reconciler.rs new file mode 100644 index 00000000000..ce642d72222 --- /dev/null +++ b/sled-agent/scrimlet-reconcilers/src/dpd_reconciler.rs @@ -0,0 +1,48 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Reconciler responsible for configuration of `dpd` within a scrimlet's switch +//! zone. + +use crate::handle::ScrimletReconcilersMode; +use crate::reconciler_task::Reconciler; +use crate::switch_zone_slot::ThisSledSwitchSlot; +use dpd_client::Client; +use sled_agent_types::system_networking::SystemNetworkingConfig; +use slog::Logger; +use std::time::Duration; + +#[derive(Debug, Clone)] +pub struct DpdReconcilerStatus { + pub todo_status: (), +} + +#[derive(Debug)] +pub(crate) struct DpdReconciler { + _client: Client, + _switch_slot: ThisSledSwitchSlot, +} + +impl Reconciler for DpdReconciler { + type Status = DpdReconcilerStatus; + + const LOGGER_COMPONENT_NAME: &'static str = "DpdReconciler"; + const RE_RECONCILE_INTERVAL: Duration = Duration::from_secs(30); + + fn new( + mode: ScrimletReconcilersMode, + switch_slot: ThisSledSwitchSlot, + parent_log: &Logger, + ) -> Self { + Self { _client: mode.dpd_client(parent_log), _switch_slot: switch_slot } + } + + async fn do_reconciliation( + &mut self, + _system_networking_config: &SystemNetworkingConfig, + _log: &Logger, + ) -> Self::Status { + DpdReconcilerStatus { todo_status: () } + } +} diff --git a/sled-agent/scrimlet-reconcilers/src/handle.rs b/sled-agent/scrimlet-reconcilers/src/handle.rs new file mode 100644 index 00000000000..8509109b29f --- /dev/null +++ b/sled-agent/scrimlet-reconcilers/src/handle.rs @@ -0,0 +1,401 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! [`ScrimletReconcilers`] is the entry point into this crate for `sled-agent`; +//! it provides a suitable handle for `sled-agent`'s "long running tasks", and +//! contains a handle to each of the inner service-specific reconcilers. + +use crate::DetermineSwitchSlotStatus; +use crate::dpd_reconciler::DpdReconciler; +use crate::mgd_reconciler::MgdReconciler; +use crate::reconciler_task::ReconcilerTaskHandle; +use crate::status::ScrimletReconcilersStatus; +use crate::status::ScrimletStatus; +use crate::switch_zone_slot::ThisSledSwitchSlot; +use crate::uplinkd_reconciler::UplinkdReconciler; +use omicron_common::address::DENDRITE_PORT; +use omicron_common::address::MGD_PORT; +use omicron_common::address::MGS_PORT; +use sled_agent_types::sled::ThisSledSwitchZoneUnderlayIpAddr; +use sled_agent_types::system_networking::SystemNetworkingConfig; +use slog::Logger; +use slog::info; +use std::net::SocketAddr; +use std::net::SocketAddrV6; +use std::sync::Arc; +use std::sync::OnceLock; +use std::time::Duration; +use tokio::sync::watch; + +/// Mode in which the scrimlet reconcilers should run. +/// +/// This exists to support tests where we don't have a real switch zone like +/// we expect to have on real hardware. The production sled-agent will always +/// pass `SwitchZone(ip)`; in this mode, we'll run reconcilers that talk to +/// services at the provided IP with their well-known ports and that communicate +/// with SMF within the switch zone. In the `Test { .. }` mode, we'll point the +/// reconcilers at the specified addresses for some services, and won't run the +/// SMF-based reconcilers at all. +#[derive(Debug, Clone, Copy)] +pub enum ScrimletReconcilersMode { + SwitchZone(ThisSledSwitchZoneUnderlayIpAddr), + #[cfg(any(test, feature = "testing"))] + Test { + mgs_addr: SocketAddr, + dpd_addr: SocketAddr, + mgd_addr: SocketAddr, + }, +} + +impl ScrimletReconcilersMode { + // Build a `reqwest` client with different timeout settings depending on + // whether we're expecting to contact a real service or a test one. + fn reqwest_client(&self) -> reqwest::Client { + match self { + ScrimletReconcilersMode::SwitchZone(_) => { + // Build a custom reqwest client, primarily to set a lower + // `pool_idle_timeout`. dropshot's default connection timeout is + // 30 seconds. We want to ensure we don't hit + // in any + // reconcilers that try to re-reconcile on a 30 second interval, + // so we choose a much lower `pool_idle_timeout`: 10 seconds is + // long enough to reuse a connection for all the requests made + // during one reconciliation pass, but is short enough we should + // discard it before the server wants to time us out. + // + // The 15 second connect and read timeout are consistent with + // progenitor's normal defaults. + reqwest::ClientBuilder::new() + .connect_timeout(Duration::from_secs(15)) + .read_timeout(Duration::from_secs(15)) + .pool_idle_timeout(Duration::from_secs(10)) + .build() + .expect("reqwest parameters are valid") + } + #[cfg(any(test, feature = "testing"))] + ScrimletReconcilersMode::Test { .. } => { + // Some of our tests use tokio's paused time. We want to + // construct a reqwest client that does not specify any timeouts + // at all, allowing it to wait forever; this plays nicely with + // paused time. (Paused time + timeouts cause the timeouts to + // elapse instantly, which doesn't mesh well with establishing + // TCP connections.) + reqwest::Client::new() + } + } + } + + pub(crate) fn mgs_client( + self, + parent_log: &Logger, + ) -> gateway_client::Client { + let addr: SocketAddr = match self { + ScrimletReconcilersMode::SwitchZone(ip) => { + SocketAddrV6::new(ip.into(), MGS_PORT, 0, 0).into() + } + #[cfg(any(test, feature = "testing"))] + ScrimletReconcilersMode::Test { mgs_addr, .. } => mgs_addr, + }; + let baseurl = format!("http://{addr}"); + gateway_client::Client::new_with_client( + &baseurl, + self.reqwest_client(), + parent_log + .new(slog::o!("component" => "ThisSledSwitchSlotMgsClient")), + ) + } + + pub(crate) fn dpd_client(self, parent_log: &Logger) -> dpd_client::Client { + use omicron_common::OMICRON_DPD_TAG; + + let addr: SocketAddr = match self { + ScrimletReconcilersMode::SwitchZone(ip) => { + SocketAddrV6::new(ip.into(), DENDRITE_PORT, 0, 0).into() + } + #[cfg(any(test, feature = "testing"))] + ScrimletReconcilersMode::Test { dpd_addr, .. } => dpd_addr, + }; + let baseurl = format!("http://{addr}"); + dpd_client::Client::new_with_client( + &baseurl, + self.reqwest_client(), + dpd_client::ClientState { + tag: OMICRON_DPD_TAG.to_owned(), + log: parent_log + .new(slog::o!("component" => "DpdReconcilerClient")), + }, + ) + } + + pub(crate) fn mgd_client( + self, + parent_log: &Logger, + ) -> mg_admin_client::Client { + let addr: SocketAddr = match self { + ScrimletReconcilersMode::SwitchZone(ip) => { + SocketAddrV6::new(ip.into(), MGD_PORT, 0, 0).into() + } + #[cfg(any(test, feature = "testing"))] + ScrimletReconcilersMode::Test { mgd_addr, .. } => mgd_addr, + }; + let baseurl = format!("http://{addr}"); + mg_admin_client::Client::new_with_client( + &baseurl, + self.reqwest_client(), + parent_log.new(slog::o!("component" => "MgdReconcilerClient")), + ) + } +} + +/// Information required to enable all the scrimlet reconciler tasks provided +/// by this crate. +#[derive(Debug, Clone)] +pub struct SledAgentNetworkingInfo { + pub system_networking_config_rx: watch::Receiver, + pub mode: ScrimletReconcilersMode, +} + +/// Handle to tasks that reconcile network configuration with services within a +/// scrimlet's local switch zone. +/// +/// [`ScrimletReconcilers`] has a two- or three-phase initialization process +/// (depending on whether the sled is a non-scrimlet or a scrimlet) to support +/// being included in `sled-agent`'s set of "long running tasks". +/// [`ScrimletReconcilers::new()`] can be constructed at any time (in +/// particular: very soon after `sled-agent` starts). +/// +/// After a [`ScrimletReconcilers`] has been created, `sled-agent` is +/// responsible for calling +/// [`ScrimletReconcilers::set_sled_agent_networking_info_once()`] exactly one +/// time: this provides the reconcilers with a watch channel to receive the +/// current [`SystemNetworkingConfig`] and the IP address of this sled's switch +/// zone (should it have one). +/// +/// After `sled-agent` has provided those prerequisites, on all sleds, +/// [`ScrimletReconcilers`] spawns a tokio task that waits until both of these +/// have occurred: +/// +/// 1. [`ScrimletReconcilers::set_scrimlet_status()`] is called with +/// [`ScrimletStatus::Scrimlet`]. +/// 2. We successfully contact MGS within our switch zone to determine which +/// switch slot we are. +/// +/// On non-scrimlet sleds, step 1 never happens, so the reconciler tasks are +/// never spawned. +/// +/// Once both of these are satisfied on scrimlets, all reconciliation tasks are +/// spawned and begin running. They will reactivate periodically and in response +/// to any changes to the [`SystemNetworkingConfig`] from sled-agent, and will +/// go inert if [`ScrimletReconcilers::set_scrimlet_status()`] is called with +/// [`ScrimletStatus::NotScrimlet`] (remaining inert until we are told we are a +/// scrimlet again). +pub struct ScrimletReconcilers { + // Sending half of the channel used to communicate to all the reconcilers + // whether we're still a scrimlet. + scrimlet_status_tx: watch::Sender, + + // These once locks hold the second and third phases of initialization + // described in the doc comment above. + determining_switch_slot: + OnceLock>, + running_reconcilers: Arc>, + + parent_log: Logger, +} + +impl ScrimletReconcilers { + pub fn new(parent_log: &Logger) -> Self { + // We discard the receiver here, and create new subscribers if and when + // we spawn tasks that need to consume it. + let (scrimlet_status_tx, _scrimlet_status_rx) = + watch::channel(ScrimletStatus::NotScrimlet); + + Self { + scrimlet_status_tx, + determining_switch_slot: OnceLock::new(), + running_reconcilers: Arc::new(OnceLock::new()), + parent_log: parent_log.clone(), + } + } + + pub fn status(&self) -> ScrimletReconcilersStatus { + // Do we have running reconcilers? If so, report their status. + if let Some(running) = self.running_reconcilers.get() { + let RunningReconcilers { + dpd_reconciler, + mgd_reconciler, + uplinkd_reconciler, + } = running; + ScrimletReconcilersStatus::Running { + dpd_reconciler: dpd_reconciler.status(), + mgd_reconciler: mgd_reconciler.status(), + uplinkd_reconciler: uplinkd_reconciler.status(), + } + } + // Otherwise, have we started determining our switch slot? + else if let Some(status_rx) = self.determining_switch_slot.get() { + ScrimletReconcilersStatus::DeterminingSwitchSlot( + status_rx.borrow().clone(), + ) + } + // Otherwise, we're still waiting for the networking info. + else { + ScrimletReconcilersStatus::WaitingForSledAgentNetworkingInfo + } + } + + /// Set whether this sled is a scrimlet or not. + /// + /// This doesn't change _much_ at runtime, but it can: we may start out "not + /// a scrimlet" and then discover an attached switch later after boot, at + /// which point we become a scrimlet, or we may become "not a scrimlet" if + /// the switch is detached at runtime. + pub fn set_scrimlet_status(&self, status: ScrimletStatus) { + self.scrimlet_status_tx.send_if_modified(|prev| { + if *prev == status { + false + } else { + *prev = status; + true + } + }); + } + + /// Provide the networking information necessary to start the scrimlet + /// reconciler tasks. + /// + /// # Panics + /// + /// This method panics if called more than once; this is considered a + /// programmer error. + /// + /// One of the bits inside `info` is a watch channel: receiving a second + /// channel is a sign of control flow gone very wrong, as all the tasks will + /// already be operating based on the first channel received. + pub fn set_sled_agent_networking_info_once( + &self, + info: SledAgentNetworkingInfo, + ) { + let (determining_switch_slot_tx, determining_switch_slot_rx) = + watch::channel(DetermineSwitchSlotStatus::NotScrimlet); + + // Ensure we're only called once. + if self.determining_switch_slot.set(determining_switch_slot_rx).is_err() + { + panic!( + "set_sled_agent_networking_info_once() called more than \ + once - scrimlet reconcilers are already set up and \ + running based on the initial information provided!" + ); + } + + // We now know this is the one and only time we've been called; spawn a + // task that waits until we're a scrimlet, then waits until we can + // determine our switch slot, then spawns all the running reconcilers + // (populating `self.running_reconcilers`). + // + // We don't hang on to the join handle from this task; it exits either + // when it populates `self.running_reconcilers`, or when we're dropped + // (because it will exit when `self.scrimlet_status_tx` is closed). + tokio::spawn(determine_switch_slot( + Arc::clone(&self.running_reconcilers), + determining_switch_slot_tx, + self.scrimlet_status_tx.subscribe(), + info.mode.mgs_client(&self.parent_log), + info, + self.parent_log.clone(), + )); + } +} + +async fn determine_switch_slot( + running_reconcilers: Arc>, + determining_switch_slot_tx: watch::Sender, + mut scrimlet_status_rx: watch::Receiver, + mgs_client: gateway_client::Client, + networking_info: SledAgentNetworkingInfo, + parent_log: Logger, +) { + let log = parent_log + .new(slog::o!("component" => "ThisSledSwitchSlotDetermination")); + + // Block until either we successfully contact MGS at our switch zone + // underlay IP or the sending half of `scrimlet_status_rx` is closed (which + // means our parent `ScrimletReconcilers` was dropped - this only happens in + // tests). + let this_sled_switch_slot = + match ThisSledSwitchSlot::determine_retrying_forever( + determining_switch_slot_tx, + &mut scrimlet_status_rx, + &mgs_client, + &log, + ) + .await + { + Ok(slot) => slot, + Err(_recv_error) => { + return; + } + }; + info!( + log, "determined this sled's switch slot"; + "slot" => ?this_sled_switch_slot, + ); + + // We know `running_reconcilers` must be unset, because we're the only one + // that sets it, and we're only spawned from + // `set_sled_agent_networking_info_once()` (which itself guarantees it's + // only called once). + running_reconcilers + .set(RunningReconcilers::spawn_all( + scrimlet_status_rx, + networking_info, + this_sled_switch_slot, + &parent_log, + )) + .expect("running reconcilers is only set once"); +} + +#[derive(Debug)] +struct RunningReconcilers { + dpd_reconciler: ReconcilerTaskHandle, + mgd_reconciler: ReconcilerTaskHandle, + uplinkd_reconciler: ReconcilerTaskHandle, +} + +impl RunningReconcilers { + fn spawn_all( + scrimlet_status_rx: watch::Receiver, + networking_info: SledAgentNetworkingInfo, + this_sled_switch_slot: ThisSledSwitchSlot, + parent_log: &Logger, + ) -> Self { + let dpd_reconciler = ReconcilerTaskHandle::::spawn( + scrimlet_status_rx.clone(), + networking_info.system_networking_config_rx.clone(), + networking_info.mode, + this_sled_switch_slot, + parent_log, + ); + let mgd_reconciler = ReconcilerTaskHandle::::spawn( + scrimlet_status_rx.clone(), + networking_info.system_networking_config_rx.clone(), + networking_info.mode, + this_sled_switch_slot, + parent_log, + ); + let uplinkd_reconciler = + ReconcilerTaskHandle::::spawn( + scrimlet_status_rx, + networking_info.system_networking_config_rx, + networking_info.mode, + this_sled_switch_slot, + parent_log, + ); + Self { dpd_reconciler, mgd_reconciler, uplinkd_reconciler } + } +} + +#[cfg(test)] +mod tests; diff --git a/sled-agent/scrimlet-reconcilers/src/handle/tests.rs b/sled-agent/scrimlet-reconcilers/src/handle/tests.rs new file mode 100644 index 00000000000..54439c1bb10 --- /dev/null +++ b/sled-agent/scrimlet-reconcilers/src/handle/tests.rs @@ -0,0 +1,426 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::net::Ipv6Addr; +use std::time::Duration; + +use super::*; +use assert_matches::assert_matches; +use dropshot::ConfigLogging; +use dropshot::ConfigLoggingLevel; +use dropshot::test_util::LogContext; +use gateway_messages::SpPort; +use gateway_test_utils::setup::GatewayTestContext; +use httpmock::Mock; +use httpmock::MockServer; +use omicron_test_utils::dev; +use sled_agent_types::early_networking::RackNetworkConfig; + +// For "happy path" tests, we spin up a real MGS instances (pointed at a +// simulated SP). +// +// For "sad path" tests, we use a mock MGS so we can inject failures. +trait MgsFlavor { + fn address(&self) -> SocketAddr; + async fn teardown(self); +} + +impl MgsFlavor for GatewayTestContext { + fn address(&self) -> SocketAddr { + SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::LOCALHOST, self.port, 0, 0)) + } + + async fn teardown(self) { + GatewayTestContext::teardown(self).await + } +} + +impl MgsFlavor for MockServer { + fn address(&self) -> SocketAddr { + *MockServer::address(self) + } + + async fn teardown(self) {} +} + +struct Harness { + handle: ScrimletReconcilers, + networking_config_tx: watch::Sender, + mgs: T, + logctx: LogContext, +} + +impl Harness { + fn new_common(logctx: LogContext, mgs: T) -> Self { + let (networking_config_tx, _) = + watch::channel(SystemNetworkingConfig { + rack_network_config: RackNetworkConfig { + rack_subnet: "fd00:1122:3344:0100::/56".parse().unwrap(), + infra_ip_first: "192.0.2.10".parse().unwrap(), + infra_ip_last: "192.0.2.100".parse().unwrap(), + ports: Vec::new(), + bgp: Vec::new(), + bfd: Vec::new(), + }, + service_zone_nat_entries: None, + }); + + let handle = ScrimletReconcilers::new(&logctx.log); + + Self { handle, networking_config_tx, mgs, logctx } + } + + async fn teardown(self) { + self.logctx.cleanup_successful(); + self.mgs.teardown().await; + } + + fn sled_agent_networking_info(&self) -> SledAgentNetworkingInfo { + let dummy_addr = "0.0.0.0:0".parse().unwrap(); + SledAgentNetworkingInfo { + system_networking_config_rx: self.networking_config_tx.subscribe(), + mode: ScrimletReconcilersMode::Test { + mgs_addr: self.mgs.address(), + dpd_addr: dummy_addr, + mgd_addr: dummy_addr, + }, + } + } + + async fn wait_for_task_status(&self, matches: F) + where + F: Fn(&ScrimletReconcilersStatus) -> bool, + { + let mut status = self.handle.status(); + let start = tokio::time::Instant::now(); + while start.elapsed() < Duration::from_secs(30) { + if matches(&status) { + return; + } + tokio::time::sleep(Duration::from_millis(10)).await; + status = self.handle.status(); + } + panic!("timeout waiting for task status (got {status:?})"); + } +} + +impl Harness { + async fn new_real_mgs(logctx: LogContext) -> Self { + let ctx = gateway_test_utils::setup::test_setup( + logctx.test_name(), + SpPort::One, + ) + .await; + Self::new_common(logctx, ctx) + } +} + +impl Harness { + fn new_mock_mgs(logctx: LogContext) -> Self { + Self::new_common(logctx, MockServer::start()) + } + + fn mock_mgs_set_switch_slot(&self, slot: u16) -> Mock<'_> { + let body = + serde_json::json!({ "type": "switch", "slot": slot }).to_string(); + self.mgs.mock(|when, then| { + when.method(httpmock::Method::GET).path("/local/switch-id"); + then.status(200) + .header("content-type", "application/json") + .body(body); + }) + } + + fn mock_mgs_set_503(&self) -> Mock<'_> { + self.mgs.mock(|when, then| { + when.method(httpmock::Method::GET).path("/local/switch-id"); + then.status(503).header("content-type", "application/json").body( + serde_json::json!({ + "request_id": "test", + "message": "service unavailable", + }) + .to_string(), + ); + }) + } + + async fn wait_for_mock_to_be_called(&self, mock: &Mock<'_>) { + // Tricky bit: we use tokio's paused time in tests below both for + // consistency and test speed, but we need to wait for _real_ time for + // httpmock to receive requests. Use `std::time::Instant` here so we + // wait for 10 real seconds for a request to be received. + let start = std::time::Instant::now(); + while start.elapsed() < Duration::from_secs(10) { + if mock.calls_async().await == 0 { + // advance paused time... + tokio::time::sleep(Duration::from_secs(1)).await; + // and also _really_ sleep briefly to avoid spinning 100% CPU + std::thread::sleep(Duration::from_millis(10)); + continue; + } + + // We got at least 1 call; assert and return. + return mock.assert_async().await; + } + + panic!("timed out wait for mock to be called"); + } +} + +#[tokio::test] +#[should_panic( + expected = "set_sled_agent_networking_info_once() called more than once" +)] +async fn calling_set_sled_agent_networking_info_once_multiple_times_panics() { + // Set up a stderr logger - we're going to panic and won't have the + // opportunity to clean up a file-based one. + let log_config = + ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Trace }; + let logctx = LogContext::new( + "calling_set_sled_agent_networking_info_once_multiple_times_panics", + &log_config, + ); + let harness = Harness::new_mock_mgs(logctx); + + harness.handle.set_sled_agent_networking_info_once( + harness.sled_agent_networking_info(), + ); + harness.handle.set_sled_agent_networking_info_once( + harness.sled_agent_networking_info(), + ); +} + +// Happy-path test for non-scrimlets. +#[tokio::test] +async fn non_scrimlet_two_phase_initialization() { + let logctx = dev::test_setup_log("non_scrimlet_two_phase_initialization"); + let harness = Harness::new_mock_mgs(logctx); + + // initial status + assert_matches!( + harness.handle.status(), + ScrimletReconcilersStatus::WaitingForSledAgentNetworkingInfo + ); + + harness.handle.set_sled_agent_networking_info_once( + harness.sled_agent_networking_info(), + ); + + // terminal status for non-scrimlets + assert_matches!( + harness.handle.status(), + ScrimletReconcilersStatus::DeterminingSwitchSlot( + DetermineSwitchSlotStatus::NotScrimlet + ) + ); + + harness.teardown().await; +} + +// Happy-path test for scrimlets where we find out we're a scrimlet after +// getting our networking info. +#[tokio::test] +async fn scrimlet_three_phase_initialization_info_then_scrimlet() { + let logctx = dev::test_setup_log( + "scrimlet_three_phase_initialization_info_then_scrimlet", + ); + let harness = Harness::new_real_mgs(logctx).await; + + // initial status + assert_matches!( + harness.handle.status(), + ScrimletReconcilersStatus::WaitingForSledAgentNetworkingInfo + ); + + harness.handle.set_sled_agent_networking_info_once( + harness.sled_agent_networking_info(), + ); + + // status before we become a scrimlet + assert_matches!( + harness.handle.status(), + ScrimletReconcilersStatus::DeterminingSwitchSlot( + DetermineSwitchSlotStatus::NotScrimlet + ) + ); + + harness.handle.set_scrimlet_status(ScrimletStatus::Scrimlet); + + harness + .wait_for_task_status(|status| { + matches!(status, ScrimletReconcilersStatus::Running { .. }) + }) + .await; + + harness.teardown().await; +} + +// Happy-path test for scrimlets where we find out we're a scrimlet before +// getting our networking info. +#[tokio::test] +async fn scrimlet_three_phase_initialization_scrimlet_then_info() { + let logctx = dev::test_setup_log( + "scrimlet_three_phase_initialization_scrimlet_then_info", + ); + let harness = Harness::new_real_mgs(logctx).await; + + // Become a scrimlet right away. + harness.handle.set_scrimlet_status(ScrimletStatus::Scrimlet); + + // initial status + assert_matches!( + harness.handle.status(), + ScrimletReconcilersStatus::WaitingForSledAgentNetworkingInfo + ); + + harness.handle.set_sled_agent_networking_info_once( + harness.sled_agent_networking_info(), + ); + + // We're already a scrimlet, so we contact MGS then transition to Running. + harness + .wait_for_task_status(|status| { + matches!(status, ScrimletReconcilersStatus::Running { .. }) + }) + .await; + + harness.teardown().await; +} + +// Scrimlet test case where MGS fails the first time we ask then succeeds later. +#[tokio::test(start_paused = true)] +async fn scrimlet_mgs_fails_first_attempt() { + let logctx = dev::test_setup_log("scrimlet_mgs_fails_first_attempt"); + let harness = Harness::new_mock_mgs(logctx); + + // Configure our mock MGS to return an HTTP 503. + let mut error_mock = harness.mock_mgs_set_503(); + + // initial status + assert_matches!( + harness.handle.status(), + ScrimletReconcilersStatus::WaitingForSledAgentNetworkingInfo + ); + + harness.handle.set_sled_agent_networking_info_once( + harness.sled_agent_networking_info(), + ); + + // status before we become a scrimlet + assert_matches!( + harness.handle.status(), + ScrimletReconcilersStatus::DeterminingSwitchSlot( + DetermineSwitchSlotStatus::NotScrimlet + ) + ); + + harness.handle.set_scrimlet_status(ScrimletStatus::Scrimlet); + + // Wait for our mock to receive a request and return a 503; we should then + // see the `WaitingToRetry` state with an error. + harness.wait_for_mock_to_be_called(&error_mock).await; + // We should first see the "waiting to retry" status... + harness + .wait_for_task_status(|status| { + matches!( + status, + ScrimletReconcilersStatus::DeterminingSwitchSlot( + DetermineSwitchSlotStatus::WaitingToRetry { + prev_attempt_err, + } + ) if prev_attempt_err.contains("status: 503") + ) + }) + .await; + + // Changing MGS to return success should transition us to `Running`. + error_mock.delete(); + let success_mock = harness.mock_mgs_set_switch_slot(0); + + harness.wait_for_mock_to_be_called(&success_mock).await; + harness + .wait_for_task_status(|status| { + matches!(status, ScrimletReconcilersStatus::Running { .. }) + }) + .await; + + harness.teardown().await; +} + +// Scrimlet test case where MGS fails, then we become "not a scrimlet", then we +// become a scrimlet again and MGS succeeds. +#[tokio::test(start_paused = true)] +async fn scrimlet_mgs_fails_then_we_become_not_a_scrimlet() { + let logctx = + dev::test_setup_log("scrimlet_mgs_fails_then_we_become_not_a_scrimlet"); + let harness = Harness::new_mock_mgs(logctx); + + // Configure our mock MGS to return an HTTP 503. + let mut error_mock = harness.mock_mgs_set_503(); + + // initial status + assert_matches!( + harness.handle.status(), + ScrimletReconcilersStatus::WaitingForSledAgentNetworkingInfo + ); + + harness.handle.set_sled_agent_networking_info_once( + harness.sled_agent_networking_info(), + ); + + // status before we become a scrimlet + assert_matches!( + harness.handle.status(), + ScrimletReconcilersStatus::DeterminingSwitchSlot( + DetermineSwitchSlotStatus::NotScrimlet + ) + ); + + harness.handle.set_scrimlet_status(ScrimletStatus::Scrimlet); + + // Wait until we've failed to determine our switch slot. + harness.wait_for_mock_to_be_called(&error_mock).await; + harness + .wait_for_task_status(|status| { + matches!( + status, + ScrimletReconcilersStatus::DeterminingSwitchSlot( + DetermineSwitchSlotStatus::WaitingToRetry { + prev_attempt_err, + } + ) if prev_attempt_err.contains("status: 503") + ) + }) + .await; + + // Now sled-agent tells us we're not a scrimlet; we should transition back + // to `NotScrimlet`. + harness.handle.set_scrimlet_status(ScrimletStatus::NotScrimlet); + harness + .wait_for_task_status(|status| { + matches!( + status, + ScrimletReconcilersStatus::DeterminingSwitchSlot( + DetermineSwitchSlotStatus::NotScrimlet + ) + ) + }) + .await; + + // Reconfigure MGS to succeed. + error_mock.delete(); + let success_mock = harness.mock_mgs_set_switch_slot(0); + + // Become a scrimlet again - we should transition through `ContactingMgs` + // (with no previous error) then to `Running`. + harness.handle.set_scrimlet_status(ScrimletStatus::Scrimlet); + harness.wait_for_mock_to_be_called(&success_mock).await; + harness + .wait_for_task_status(|status| { + matches!(status, ScrimletReconcilersStatus::Running { .. }) + }) + .await; + + harness.teardown().await; +} diff --git a/sled-agent/scrimlet-reconcilers/src/lib.rs b/sled-agent/scrimlet-reconcilers/src/lib.rs new file mode 100644 index 00000000000..0a5de4a3fe8 --- /dev/null +++ b/sled-agent/scrimlet-reconcilers/src/lib.rs @@ -0,0 +1,67 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! This crate implements long-running reconciler tasks responsible for +//! configuration of services within a scrimlet's switch zone. +//! +//! These tasks _only_ talk to services on the same sled as the sled-agent +//! executing these tasks; we attempt to ensure this at runtime via types like +//! [`ThisSledSwitchZoneUnderlayIpAddr`]. A scrimlet running these tasks should +//! never attempt to talk to another scrimlet's switch zone, and a non-scrimlet +//! running these tasks should never attempt to talk to anything. (Non-scrimlet +//! sleds still create a [`ScrimletReconcilers`] handle, as sled-agent can't +//! easily tell the difference between "not a scrimlet because we're in a +//! different cubby" and "not a scrimlet because we should have a switch but it +//! isn't connected / isn't powered on / etc.", but the reconciliation tasks are +//! only spawned under conditions that only scrimlets can satisfy; see +//! [`ScrimletReconcilers`] for more detail.) +//! +//! These tasks are responsible for applying system-level networking, including: +//! +//! * Configuration of uplink ports within `dpd` +//! * Configuration of NAT entries for system-level services (boundary NTP, +//! Nexus, external DNS - notably _not_ instances) within `dpd` +//! * Configuration of BGP within `mgd` +//! * Configuration of BFD within `mgd` +//! * Configuration of static routes within `mgd` +//! * Configuration of SMF properties for `uplinkd` and `lldpd` +//! +//! The specific configuration that should be applied comes from Nexus (or RSS, +//! at rack setup time) and is sent to `sled-agent` via the bootstore. +//! +//! In the past, responsibility for this configuration was split: sled-agent was +//! responsible for applying an initial config on sled boot (required for cold +//! boot of the rack), and Nexus was responsible for continuously keeping the +//! config in sync afterwards. This had a variety of problems; see +//! . The split is now +//! that Nexus is responsible for maintaining what the configuration should be, +//! and each scrimlet is responsible for applying that configuration to its own +//! switch zone's services; the latter is implemented via this crate. +//! +//! [`ThisSledSwitchZoneUnderlayIpAddr`]: +//! sled_agent_types::sled::ThisSledSwitchZoneUnderlayIpAddr + +mod dpd_reconciler; +mod handle; +mod mgd_reconciler; +mod reconciler_task; +mod status; +mod switch_zone_slot; +mod uplinkd_reconciler; + +pub use dpd_reconciler::DpdReconcilerStatus; +pub use handle::ScrimletReconcilers; +pub use handle::ScrimletReconcilersMode; +pub use handle::SledAgentNetworkingInfo; +pub use mgd_reconciler::MgdReconcilerStatus; +pub use status::DetermineSwitchSlotStatus; +pub use status::ReconcilerActivationReason; +pub use status::ReconcilerCurrentStatus; +pub use status::ReconcilerInertReason; +pub use status::ReconcilerRunningStatus; +pub use status::ReconcilerStatus; +pub use status::ReconciliationCompletedStatus; +pub use status::ScrimletReconcilersStatus; +pub use status::ScrimletStatus; +pub use uplinkd_reconciler::UplinkdReconcilerStatus; diff --git a/sled-agent/scrimlet-reconcilers/src/mgd_reconciler.rs b/sled-agent/scrimlet-reconcilers/src/mgd_reconciler.rs new file mode 100644 index 00000000000..3d4d5664713 --- /dev/null +++ b/sled-agent/scrimlet-reconcilers/src/mgd_reconciler.rs @@ -0,0 +1,48 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Reconciler responsible for configuration of `mgd` within a scrimlet's switch +//! zone. + +use crate::ScrimletReconcilersMode; +use crate::reconciler_task::Reconciler; +use crate::switch_zone_slot::ThisSledSwitchSlot; +use mg_admin_client::Client; +use sled_agent_types::system_networking::SystemNetworkingConfig; +use slog::Logger; +use std::time::Duration; + +#[derive(Debug, Clone)] +pub struct MgdReconcilerStatus { + pub todo_status: (), +} + +#[derive(Debug)] +pub(crate) struct MgdReconciler { + _client: Client, + _switch_slot: ThisSledSwitchSlot, +} + +impl Reconciler for MgdReconciler { + type Status = MgdReconcilerStatus; + + const LOGGER_COMPONENT_NAME: &'static str = "MgdReconciler"; + const RE_RECONCILE_INTERVAL: Duration = Duration::from_secs(30); + + fn new( + mode: ScrimletReconcilersMode, + switch_slot: ThisSledSwitchSlot, + parent_log: &Logger, + ) -> Self { + Self { _client: mode.mgd_client(parent_log), _switch_slot: switch_slot } + } + + async fn do_reconciliation( + &mut self, + _system_networking_config: &SystemNetworkingConfig, + _log: &Logger, + ) -> Self::Status { + MgdReconcilerStatus { todo_status: () } + } +} diff --git a/sled-agent/scrimlet-reconcilers/src/reconciler_task.rs b/sled-agent/scrimlet-reconcilers/src/reconciler_task.rs new file mode 100644 index 00000000000..dbbb2429d4f --- /dev/null +++ b/sled-agent/scrimlet-reconcilers/src/reconciler_task.rs @@ -0,0 +1,303 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The general framework for one of the service-specific reconciler tasks +//! implemented in this crate. +//! +//! Each reconciler follows the same structure: +//! +//! 1. If we ever stop being a scrimlet (i.e., the attached switch goes away), +//! go inert until we become a scrimlet again (i.e., the switch reappears). +//! This can happen during sidecar updates if it powers off briefly to reset +//! internal FPGAs, or in a variety of other less common and more rainy-day +//! situations. +//! 2. Periodically or when the `SystemNetworkingConfig` changes, perform +//! service-specific reconciliation. This is provided by implementors of the +//! [`Reconciler`] trait elsewhere in this crate. +//! 3. Report status of this task in an output watch channel, suitable for +//! reporting in the sled-agent inventory. +//! +//! [`ReconcilerTask::run()`] handles 1 and 3, and service-specific +//! implementations of [`Reconciler`] provide 2. + +use crate::handle::ScrimletReconcilersMode; +use crate::status::ReconcilerActivationReason; +use crate::status::ReconcilerCurrentStatus; +use crate::status::ReconcilerInertReason; +use crate::status::ReconcilerRunningStatus; +use crate::status::ReconcilerStatus; +use crate::status::ReconciliationCompletedStatus; +use crate::status::ScrimletStatus; +use crate::switch_zone_slot::ThisSledSwitchSlot; +use chrono::Utc; +use sled_agent_types::system_networking::SystemNetworkingConfig; +use slog::Logger; +use slog::error; +use slog::info; +use std::convert::Infallible; +use std::time::Duration; +use tokio::sync::watch; +use tokio::sync::watch::error::RecvError; +use tokio::task::JoinHandle; + +/// Trait that should be implemented by the service-specific reconciler tasks +/// elsewhere in this crate. +pub(crate) trait Reconciler: Send + 'static { + type Status: Clone + Send + Sync + 'static; + + const LOGGER_COMPONENT_NAME: &'static str; + const RE_RECONCILE_INTERVAL: Duration; + + /// Construct a new instance of this `Reconciler`. + /// + /// Typically builds a client for the relevant service based on `mode` and + /// record `switch_slot` for use inside future calls to + /// `do_reconciliation()`. + fn new( + mode: ScrimletReconcilersMode, + switch_slot: ThisSledSwitchSlot, + parent_log: &Logger, + ) -> Self; + + /// Perform any required reconciliation based on the current contents of + /// `system_networking_config`. + /// + /// This method is infallible; any errors must be described by + /// `Self::Status`. + fn do_reconciliation( + &mut self, + system_networking_config: &SystemNetworkingConfig, + log: &Logger, + ) -> impl Future + Send; +} + +#[derive(Debug)] +pub(crate) struct ReconcilerTaskHandle { + status_rx: watch::Receiver>, + + // We never wait on this task: the only way it exits is by panicking (which + // results in a process abort, since we build sled-agent with panic=abort) + // or if the watch channels it's reading are dropped, which itself can only + // happen by a panic elsewhere. + _task: JoinHandle<()>, +} + +impl ReconcilerTaskHandle { + pub(crate) fn spawn( + scrimlet_status_rx: watch::Receiver, + system_networking_config_rx: watch::Receiver, + mode: ScrimletReconcilersMode, + this_sled_switch_slot: ThisSledSwitchSlot, + parent_log: &Logger, + ) -> Self { + Self::spawn_impl( + scrimlet_status_rx, + system_networking_config_rx, + mode, + this_sled_switch_slot, + parent_log, + T::new, + ) + } + + // Separate, private function that allows unit tests to customize how `T` is + // constructed. Production passes `T::new` as `inner_constructor`; i.e., + // just call the constructor we know exists from the `Reconciler` trait. + fn spawn_impl( + scrimlet_status_rx: watch::Receiver, + system_networking_config_rx: watch::Receiver, + mode: ScrimletReconcilersMode, + this_sled_switch_slot: ThisSledSwitchSlot, + parent_log: &Logger, + inner_constructor: F, + ) -> Self + where + F: FnOnce(ScrimletReconcilersMode, ThisSledSwitchSlot, &Logger) -> T + + Send + + Sync + + 'static, + { + let (status_tx, status_rx) = watch::channel(ReconcilerStatus { + current_status: ReconcilerCurrentStatus::Idle, + last_completion: None, + }); + + let log = + parent_log.new(slog::o!("component" => T::LOGGER_COMPONENT_NAME)); + + let mut inner_task = ReconcilerTask { + scrimlet_status_rx, + system_networking_config_rx, + status_tx, + inner: inner_constructor(mode, this_sled_switch_slot, parent_log), + log, + }; + + let task = tokio::spawn(async move { + match inner_task.run().await { + // `inner_task.run()` runs forever... + Ok(never_returns) => match never_returns {}, + + // ... unless one of its input watch channels has closed. + Err(_recv_error) => { + inner_task.status_tx.send_modify(|status| { + status.current_status = ReconcilerCurrentStatus::Inert( + ReconcilerInertReason::TaskExitedUnexpectedly, + ); + }); + error!( + inner_task.log, + "exited due to watch channel closure \ + (unexpected except during shutdown in tests)" + ); + } + } + }); + + Self { status_rx, _task: task } + } + + pub(crate) fn status(&self) -> ReconcilerStatus { + self.status_rx.borrow().clone() + } +} + +struct ReconcilerTask { + scrimlet_status_rx: watch::Receiver, + system_networking_config_rx: watch::Receiver, + status_tx: watch::Sender>, + inner: T, + log: Logger, +} + +impl ReconcilerTask { + async fn run(&mut self) -> Result { + let mut activation_reason = ReconcilerActivationReason::Startup; + let mut activation_count: u64 = 0; + + loop { + // We know we _were_ a scrimlet at some point, because we determined + // our switch slot by contacting MGS within our own switch zone. But + // it's possible we could become "not a scrimlet" in the future + // (e.g., if the switch disappears out from under us). In such a + // case, block until it comes back. + self.wait_if_this_sled_is_no_longer_a_scrimlet().await?; + + // We _are_ a scrimlet; perform reconciliation. + info!( + self.log, "starting reconciliation attempt"; + "activation_reason" => ?activation_reason, + "activation_count" => activation_count, + ); + + // Snapshot the current networking config so we hold the watch + // channel as little as possible. + let system_networking_config = + self.system_networking_config_rx.borrow_and_update().clone(); + let running_status = + ReconcilerRunningStatus::new(activation_reason); + self.status_tx.send_modify(|status| { + status.current_status = + ReconcilerCurrentStatus::Running(running_status); + }); + + // Actually perform reconciliation. + let status_result = self + .inner + .do_reconciliation(&system_networking_config, &self.log) + .await; + + // Update our output watch channel with the result. + info!( + self.log, "reconciliation attempt complete"; + "activation_reason" => ?activation_reason, + "activation_count" => activation_count, + ); + self.status_tx.send_modify(|status| { + status.current_status = ReconcilerCurrentStatus::Idle; + status.last_completion = + Some(Box::new(ReconciliationCompletedStatus { + activation_reason, + completed_at_time: Utc::now(), + ran_for: running_status.elapsed_since_start(), + activation_count, + status: status_result, + })); + }); + activation_count = activation_count.wrapping_add(1); + + // Wait until we should perform reconciliation again: our + // re-reconciliation periodic timer fires or one of our input watch + // channels changes. + // + // All arms are cancel-safe and we do not `.await` within the body + // of any arm, avoiding any opportunity for futurelock. + activation_reason = tokio::select! { + () = tokio::time::sleep(T::RE_RECONCILE_INTERVAL) => { + ReconcilerActivationReason::PeriodicTimer + } + + result = self.system_networking_config_rx.changed() => { + () = result?; + ReconcilerActivationReason::SystemNetworkingConfigChanged + } + + result = self.scrimlet_status_rx.changed() => { + () = result?; + ReconcilerActivationReason::ScrimletStatusChanged + } + }; + } + } + + async fn wait_if_this_sled_is_no_longer_a_scrimlet( + &mut self, + ) -> Result<(), RecvError> { + let mut logged_not_scrimlet = false; + + loop { + let status = *self.scrimlet_status_rx.borrow_and_update(); + match status { + ScrimletStatus::Scrimlet => { + return Ok(()); + } + ScrimletStatus::NotScrimlet => { + if !logged_not_scrimlet { + info!( + self.log, + "not a scrimlet - reconciler going inert" + ); + logged_not_scrimlet = true; + } + self.status_tx.send_modify(|status| { + status.current_status = ReconcilerCurrentStatus::Inert( + ReconcilerInertReason::NoLongerAScrimlet, + ); + }); + + // Select over both input channels so we can detect channel + // closure and exit cleanly if either channel goes away. If + // the rack network config changes here, we'll spuriously + // loop around and reread the scrimlet status, but that's no + // big deal. + // + // Both arms are cancel-safe and we do not `.await` within + // the body of any arm, avoiding any opportunity for + // futurelock. + tokio::select! { + result = self.scrimlet_status_rx.changed() => { + () = result?; + } + result = self.system_networking_config_rx.changed() => { + () = result?; + } + } + } + } + } + } +} + +#[cfg(test)] +mod tests; diff --git a/sled-agent/scrimlet-reconcilers/src/reconciler_task/tests.rs b/sled-agent/scrimlet-reconcilers/src/reconciler_task/tests.rs new file mode 100644 index 00000000000..ddf94a0f4df --- /dev/null +++ b/sled-agent/scrimlet-reconcilers/src/reconciler_task/tests.rs @@ -0,0 +1,728 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::*; +use assert_matches::assert_matches; +use sled_agent_types::early_networking::RackNetworkConfig; +use std::mem; +use std::sync::Arc; +use std::sync::Mutex; +use tokio::sync::mpsc; +use tokio::time::Instant; + +struct MockReconciler { + do_reconciliation_calls: Arc>>, + do_reconciliation_results: mpsc::UnboundedReceiver, +} + +impl Reconciler for MockReconciler { + type Status = String; + + const LOGGER_COMPONENT_NAME: &'static str = "MockReconciler"; + const RE_RECONCILE_INTERVAL: Duration = Duration::from_secs(30); + + fn new( + _mode: ScrimletReconcilersMode, + _switch_slot: ThisSledSwitchSlot, + _parent_log: &Logger, + ) -> Self { + unimplemented!("not called by tests") + } + + async fn do_reconciliation( + &mut self, + system_networking_config: &SystemNetworkingConfig, + _log: &Logger, + ) -> Self::Status { + self.do_reconciliation_calls + .lock() + .unwrap() + .push(system_networking_config.clone()); + self.do_reconciliation_results + .recv() + .await + .expect("test never closes sending side of channel") + } +} + +fn test_system_networking_config_1() -> SystemNetworkingConfig { + SystemNetworkingConfig { + rack_network_config: RackNetworkConfig { + rack_subnet: "fd00:1122:3344:0100::/56".parse().unwrap(), + infra_ip_first: "192.0.2.10".parse().unwrap(), + infra_ip_last: "192.0.2.100".parse().unwrap(), + ports: Vec::new(), + bgp: Vec::new(), + bfd: Vec::new(), + }, + service_zone_nat_entries: None, + } +} + +fn test_system_networking_config_2() -> SystemNetworkingConfig { + SystemNetworkingConfig { + rack_network_config: RackNetworkConfig { + rack_subnet: "fd00:aabb:ccdd:0200::/56".parse().unwrap(), + infra_ip_first: "192.0.2.20".parse().unwrap(), + infra_ip_last: "192.0.2.200".parse().unwrap(), + ports: Vec::new(), + bgp: Vec::new(), + bfd: Vec::new(), + }, + service_zone_nat_entries: None, + } +} + +struct Harness { + task: ReconcilerTaskHandle, + scrimlet_status_tx: watch::Sender, + system_networking_config_tx: watch::Sender, + do_reconciliation_results_tx: mpsc::UnboundedSender, + do_reconciliation_calls: Arc>>, +} + +impl Harness { + const WAIT_FOR_STATUS_CHECK_INTERVAL: Duration = Duration::from_millis(100); + + fn new(log: &Logger) -> Self { + let (scrimlet_status_tx, scrimlet_status_rx) = + watch::channel(ScrimletStatus::Scrimlet); + let (system_networking_config_tx, system_networking_config_rx) = + watch::channel(test_system_networking_config_1()); + + let (do_reconciliation_results_tx, do_reconciliation_results) = + mpsc::unbounded_channel(); + let do_reconciliation_calls = Arc::new(Mutex::new(Vec::new())); + + let task = { + let do_reconciliation_calls = Arc::clone(&do_reconciliation_calls); + let dummy_addr = "0.0.0.0:0".parse().unwrap(); + ReconcilerTaskHandle::spawn_impl( + scrimlet_status_rx, + system_networking_config_rx, + ScrimletReconcilersMode::Test { + mgs_addr: dummy_addr, + dpd_addr: dummy_addr, + mgd_addr: dummy_addr, + }, + ThisSledSwitchSlot::TEST_FAKE, + log, + |_ip, _slot, _log| MockReconciler { + do_reconciliation_calls, + do_reconciliation_results, + }, + ) + }; + + Self { + task, + scrimlet_status_tx, + system_networking_config_tx, + do_reconciliation_results_tx, + do_reconciliation_calls, + } + } + + fn set_scrimlet_status(&self, status: ScrimletStatus) { + self.scrimlet_status_tx.send_modify(|s| { + *s = status; + }); + } + + async fn wait_for_do_reconciliation_call_count(&self, count: usize) { + let mut last_seen = self.do_reconciliation_calls.lock().unwrap().len(); + let start = Instant::now(); + while start.elapsed() < Duration::from_secs(5) { + if last_seen == count { + return; + } + tokio::time::sleep(Self::WAIT_FOR_STATUS_CHECK_INTERVAL).await; + last_seen = self.do_reconciliation_calls.lock().unwrap().len(); + } + panic!( + "timeout waiting for do_reconciliation call count {count} \ + (got {last_seen})" + ); + } + + async fn wait_for_task_status( + &self, + description: &str, + matches: F, + ) -> ReconcilerStatus + where + F: Fn(&ReconcilerCurrentStatus) -> bool, + { + let mut status = self.task.status(); + let start = Instant::now(); + while start.elapsed() < Duration::from_secs(5) { + if matches(&status.current_status) { + return status; + } + tokio::time::sleep(Self::WAIT_FOR_STATUS_CHECK_INTERVAL).await; + status = self.task.status(); + } + panic!( + "timeout waiting for task status {description} (got {status:?})" + ); + } + + async fn wait_for_task_status_no_longer_a_scrimlet( + &self, + ) -> ReconcilerStatus { + self.wait_for_task_status("Inert(NoLongerAScrimlet)", |status| { + matches!( + status, + ReconcilerCurrentStatus::Inert( + ReconcilerInertReason::NoLongerAScrimlet + ) + ) + }) + .await + } + + async fn wait_for_task_status_idle(&self) -> ReconcilerStatus { + self.wait_for_task_status("Idle", |status| { + matches!(status, ReconcilerCurrentStatus::Idle) + }) + .await + } + + async fn shutdown_cleanly(self) { + let Self { + task, scrimlet_status_tx, do_reconciliation_results_tx, .. + } = self; + + // Dropping this watch channel should cause the task to exit. + mem::drop(scrimlet_status_tx); + + task._task.await.expect("task didn't panic"); + let final_status = task.status_rx.borrow(); + assert_matches!( + final_status.current_status, + ReconcilerCurrentStatus::Inert( + ReconcilerInertReason::TaskExitedUnexpectedly + ) + ); + + // Explicitly drop this _after_ the task exits so we're guaranteed + // not to hit the `.expect()` in + // `MockReconciler::do_reconciliation()` above. + mem::drop(do_reconciliation_results_tx); + } +} + +// Test the first activation. +#[tokio::test(start_paused = true)] +async fn first_activation() { + let logctx = omicron_test_utils::dev::test_setup_log("first_activation"); + let harness = Harness::new(&logctx.log); + + // Confirm we start in Idle. + assert_matches!( + harness.task.status().current_status, + ReconcilerCurrentStatus::Idle + ); + + // Task should call do_reconciliation() for the first time. + harness.wait_for_do_reconciliation_call_count(1).await; + harness.do_reconciliation_results_tx.send("first".to_string()).unwrap(); + let status = harness.wait_for_task_status_idle().await; + + let completion = + status.last_completion.expect("last_completion should be preserved"); + assert_eq!(completion.activation_count, 0); + assert_eq!(completion.status, "first"); + assert_matches!( + completion.activation_reason, + ReconcilerActivationReason::Startup + ); + + harness.shutdown_cleanly().await; + logctx.cleanup_successful(); +} + +// Test: after completing a reconciliation and reaching the select!, +// setting NotScrimlet causes the task to loop back to +// wait_if_this_sled_is_no_longer_a_scrimlet and go inert — without +// performing another reconciliation. +#[tokio::test(start_paused = true)] +async fn scrimlet_becomes_not_scrimlet_during_select() { + let logctx = omicron_test_utils::dev::test_setup_log( + "scrimlet_becomes_not_scrimlet_during_select", + ); + let harness = Harness::new(&logctx.log); + + // Complete the first reconciliation so the task reaches the select!. + harness.wait_for_do_reconciliation_call_count(1).await; + harness.do_reconciliation_results_tx.send("first".to_string()).unwrap(); + harness.wait_for_task_status_idle().await; + + // Become NotScrimlet → the select! fires with ScrimletStatusChanged, + // the loop goes back to wait_if_this_sled_is_no_longer_a_scrimlet, and the + // task becomes Inert(NoLongerAScrimlet). + harness.set_scrimlet_status(ScrimletStatus::NotScrimlet); + let status = harness.wait_for_task_status_no_longer_a_scrimlet().await; + + // No additional do_reconciliation call should have happened: the + // ScrimletStatusChanged activation saw NotScrimlet and went inert + // instead of reconciling. + assert_eq!(harness.do_reconciliation_calls.lock().unwrap().len(), 1); + + // last_completion from the prior run should still be present. + let completion = + status.last_completion.expect("last_completion should be preserved"); + assert_eq!(completion.activation_count, 0); + assert_eq!(completion.status, "first"); + + harness.shutdown_cleanly().await; + logctx.cleanup_successful(); +} + +// Test: after the first reconciliation completes, changing the +// SystemNetworkingConfig triggers a second reconciliation with +// activation_reason = SystemNetworkingConfigChanged and the new config. +#[tokio::test(start_paused = true)] +async fn system_networking_config_change_triggers_re_reconciliation() { + let logctx = omicron_test_utils::dev::test_setup_log( + "system_networking_config_change_triggers_re_reconciliation", + ); + let harness = Harness::new(&logctx.log); + + // Wait for the first do_reconciliation call (Startup). + harness.wait_for_do_reconciliation_call_count(1).await; + + // Complete the first reconciliation. + harness.do_reconciliation_results_tx.send("first".to_string()).unwrap(); + harness.wait_for_task_status_idle().await; + + // Send a new SystemNetworkingConfig. + let first_config = test_system_networking_config_1(); + let second_config = test_system_networking_config_2(); + assert_ne!(first_config, second_config); + harness.system_networking_config_tx.send(second_config.clone()).unwrap(); + + // Wait for the second do_reconciliation call. + harness.wait_for_do_reconciliation_call_count(2).await; + + // The second call should have received the new config. + let received_configs = + harness.do_reconciliation_calls.lock().unwrap().clone(); + assert_eq!(received_configs.len(), 2); + assert_eq!(received_configs[0], first_config); + assert_eq!(received_configs[1], second_config); + + // Status should be Running with SystemNetworkingConfigChanged. + let status = harness.task.status(); + match &status.current_status { + ReconcilerCurrentStatus::Running(running) => { + assert_matches!( + running.activation_reason(), + ReconcilerActivationReason::SystemNetworkingConfigChanged + ); + } + other => panic!("expected Running status, got {other:?}"), + } + + // Complete the second reconciliation. + harness.do_reconciliation_results_tx.send("second".to_string()).unwrap(); + let status = harness.wait_for_task_status_idle().await; + + let completion = + status.last_completion.expect("should have last_completion"); + assert_matches!( + completion.activation_reason, + ReconcilerActivationReason::SystemNetworkingConfigChanged + ); + assert_eq!(completion.activation_count, 1); + assert_eq!(completion.status, "second"); + + harness.shutdown_cleanly().await; + logctx.cleanup_successful(); +} + +// Test: after the first reconciliation completes, the periodic timer +// fires after RE_RECONCILE_INTERVAL and triggers a second +// reconciliation with activation_reason = PeriodicTimer. +#[tokio::test(start_paused = true)] +async fn periodic_timer_triggers_re_reconciliation() { + let logctx = omicron_test_utils::dev::test_setup_log( + "periodic_timer_triggers_re_reconciliation", + ); + let harness = Harness::new(&logctx.log); + + // Complete the first reconciliation (Startup). + harness.wait_for_do_reconciliation_call_count(1).await; + harness.do_reconciliation_results_tx.send("first".to_string()).unwrap(); + harness.wait_for_task_status_idle().await; + + // Advance time just short of the interval — no second call yet. + tokio::time::advance( + MockReconciler::RE_RECONCILE_INTERVAL - Duration::from_millis(1), + ) + .await; + assert_eq!(harness.do_reconciliation_calls.lock().unwrap().len(), 1); + + // Advance past the interval — periodic timer fires. + tokio::time::advance(Duration::from_millis(1)).await; + harness.wait_for_do_reconciliation_call_count(2).await; + + // Status should be Running with PeriodicTimer. + let status = harness.task.status(); + match &status.current_status { + ReconcilerCurrentStatus::Running(running) => { + assert_matches!( + running.activation_reason(), + ReconcilerActivationReason::PeriodicTimer + ); + } + other => panic!("expected Running status, got {other:?}"), + } + + // Complete and verify. + harness.do_reconciliation_results_tx.send("periodic".to_string()).unwrap(); + let status = harness.wait_for_task_status_idle().await; + let completion = + status.last_completion.expect("should have last_completion"); + assert_matches!( + completion.activation_reason, + ReconcilerActivationReason::PeriodicTimer + ); + assert_eq!(completion.activation_count, 1); + assert_eq!(completion.status, "periodic"); + + harness.shutdown_cleanly().await; + logctx.cleanup_successful(); +} + +// Test: if the SystemNetworkingConfig changes while do_reconciliation is +// in-flight, the task should notice when it reaches the select! and +// immediately perform another reconciliation with +// activation_reason = SystemNetworkingConfigChanged using the latest config. +#[tokio::test(start_paused = true)] +async fn config_change_during_inflight_reconciliation() { + let logctx = omicron_test_utils::dev::test_setup_log( + "config_change_during_inflight_reconciliation", + ); + let harness = Harness::new(&logctx.log); + + // Wait for the first do_reconciliation call (Startup) to be entered. + harness.wait_for_do_reconciliation_call_count(1).await; + + // While the first reconciliation is still in-flight, change the + // config. The task won't see this until it finishes and hits the + // select!. + harness + .system_networking_config_tx + .send(test_system_networking_config_2()) + .unwrap(); + + // Complete the first reconciliation. + harness.do_reconciliation_results_tx.send("first".to_string()).unwrap(); + + // The task should immediately start a second reconciliation because + // system_networking_config_rx.changed() fires in the select!. Because we + // have time paused, the elapsed time should be exactly one check interval + // of our test harness. + let before = tokio::time::Instant::now(); + harness.wait_for_do_reconciliation_call_count(2).await; + assert_eq!(before.elapsed(), Harness::WAIT_FOR_STATUS_CHECK_INTERVAL); + + // The second call should have received the new config (via + // borrow_and_update()). + let received_configs = + harness.do_reconciliation_calls.lock().unwrap().clone(); + assert_eq!(received_configs[0], test_system_networking_config_1()); + assert_eq!(received_configs[1], test_system_networking_config_2()); + + // Status should be Running with SystemNetworkingConfigChanged. + let status = harness.task.status(); + match &status.current_status { + ReconcilerCurrentStatus::Running(running) => { + assert_matches!( + running.activation_reason(), + ReconcilerActivationReason::SystemNetworkingConfigChanged + ); + } + other => panic!("expected Running status, got {other:?}"), + } + + // Complete the second reconciliation and verify. + harness.do_reconciliation_results_tx.send("second".to_string()).unwrap(); + let status = harness.wait_for_task_status_idle().await; + let completion = + status.last_completion.expect("should have last_completion"); + assert_matches!( + completion.activation_reason, + ReconcilerActivationReason::SystemNetworkingConfigChanged + ); + assert_eq!(completion.activation_count, 1); + assert_eq!(completion.status, "second"); + + harness.shutdown_cleanly().await; + logctx.cleanup_successful(); +} + +// Test: full scrimlet status round-trip. Start as scrimlet, complete +// reconciliation #0 (Startup). Set NotScrimlet → task goes inert. Set +// Scrimlet again → reconciliation #1 fires with activation_reason = +// ScrimletStatusChanged and activation_count = 1. +#[tokio::test(start_paused = true)] +async fn scrimlet_status_round_trip() { + let logctx = + omicron_test_utils::dev::test_setup_log("scrimlet_status_round_trip"); + let harness = Harness::new(&logctx.log); + + // First reconciliation (Startup). + harness.wait_for_do_reconciliation_call_count(1).await; + harness.do_reconciliation_results_tx.send("first".to_string()).unwrap(); + let status = harness.wait_for_task_status_idle().await; + let completion = + status.last_completion.expect("should have last_completion"); + assert_matches!( + completion.activation_reason, + ReconcilerActivationReason::Startup + ); + assert_eq!(completion.activation_count, 0); + + // Become NotScrimlet → task should go inert. + harness.set_scrimlet_status(ScrimletStatus::NotScrimlet); + harness.wait_for_task_status_no_longer_a_scrimlet().await; + + // No additional do_reconciliation call should have happened. + assert_eq!(harness.do_reconciliation_calls.lock().unwrap().len(), 1); + + // Become Scrimlet again → reconciliation #1 fires. + harness.set_scrimlet_status(ScrimletStatus::Scrimlet); + harness.wait_for_do_reconciliation_call_count(2).await; + + // Status should be Running with ScrimletStatusChanged. + let status = harness.task.status(); + match &status.current_status { + ReconcilerCurrentStatus::Running(running) => { + assert_matches!( + running.activation_reason(), + ReconcilerActivationReason::ScrimletStatusChanged + ); + } + other => panic!("expected Running status, got {other:?}"), + } + + // Complete the second reconciliation and check last_completion. + harness.do_reconciliation_results_tx.send("second".to_string()).unwrap(); + let status = harness.wait_for_task_status_idle().await; + let completion = + status.last_completion.expect("should have last_completion"); + assert_matches!( + completion.activation_reason, + ReconcilerActivationReason::ScrimletStatusChanged + ); + assert_eq!(completion.activation_count, 1); + assert_eq!(completion.status, "second"); + + harness.shutdown_cleanly().await; + logctx.cleanup_successful(); +} + +// Test: dropping the system_networking_config sender while the task is in +// the select! (after completing a reconciliation) causes the task to +// exit with TaskExitedUnexpectedly. +#[tokio::test(start_paused = true)] +async fn channel_closure_system_networking_config_during_select() { + let logctx = omicron_test_utils::dev::test_setup_log( + "channel_closure_system_networking_config_during_select", + ); + let harness = Harness::new(&logctx.log); + + // Complete one reconciliation so the task reaches the select!. + harness.wait_for_do_reconciliation_call_count(1).await; + harness.do_reconciliation_results_tx.send("done".to_string()).unwrap(); + harness.wait_for_task_status_idle().await; + + // Drop the system_networking_config sender. This closes the watch channel, + // which causes the `system_networking_config_rx.changed()` arm in the + // select! to return Err(RecvError), causing the task to exit. + let Harness { + task, + scrimlet_status_tx, + system_networking_config_tx, + do_reconciliation_results_tx, + do_reconciliation_calls, + } = harness; + + mem::drop(system_networking_config_tx); + + // Wait for the task to exit and verify the final status. + task._task.await.expect("task didn't panic"); + let final_status = task.status_rx.borrow(); + assert_matches!( + final_status.current_status, + ReconcilerCurrentStatus::Inert( + ReconcilerInertReason::TaskExitedUnexpectedly + ) + ); + + // do_reconciliation should have been called exactly once (the initial + // Startup reconciliation); no second call after the channel closed. + assert_eq!(do_reconciliation_calls.lock().unwrap().len(), 1); + + mem::drop(scrimlet_status_tx); + mem::drop(do_reconciliation_results_tx); + + logctx.cleanup_successful(); +} + +// Test: dropping the scrimlet_status sender while the task is in the +// select! (after completing a reconciliation) causes the task to exit +// with TaskExitedUnexpectedly. +#[tokio::test(start_paused = true)] +async fn channel_closure_scrimlet_status_during_select() { + let logctx = omicron_test_utils::dev::test_setup_log( + "channel_closure_scrimlet_status_during_select", + ); + let harness = Harness::new(&logctx.log); + + // Complete one reconciliation so the task reaches the select!. + harness.wait_for_do_reconciliation_call_count(1).await; + harness.do_reconciliation_results_tx.send("done".to_string()).unwrap(); + harness.wait_for_task_status_idle().await; + + // Destructure the harness so we can drop the scrimlet_status sender + // while still holding the other pieces we need. + let Harness { + task, + scrimlet_status_tx, + do_reconciliation_results_tx, + do_reconciliation_calls, + .. + } = harness; + + // Drop the scrimlet_status sender. This closes the watch channel, + // which causes the `scrimlet_status_rx.changed()` arm in the + // select! to return Err(RecvError), causing the task to exit. + mem::drop(scrimlet_status_tx); + + // Wait for the task to exit and verify the final status. + task._task.await.expect("task didn't panic"); + let final_status = task.status_rx.borrow(); + assert_matches!( + final_status.current_status, + ReconcilerCurrentStatus::Inert( + ReconcilerInertReason::TaskExitedUnexpectedly + ) + ); + + // do_reconciliation should have been called exactly once (the initial + // Startup reconciliation); no second call after the channel closed. + assert_eq!(do_reconciliation_calls.lock().unwrap().len(), 1); + + // Explicitly drop after the task exits so we can't hit the .expect() + // in MockReconciler::do_reconciliation(). + mem::drop(do_reconciliation_results_tx); + + logctx.cleanup_successful(); +} + +// Test: dropping the scrimlet_status sender while the task is blocked in +// wait_if_this_sled_is_no_longer_a_scrimlet() waiting for the switch slot +// causes the task to exit with TaskExitedUnexpectedly. +#[tokio::test(start_paused = true)] +async fn channel_closure_scrimlet_status_during_not_scrimlet() { + let logctx = omicron_test_utils::dev::test_setup_log( + "channel_closure_scrimlet_status_during_not_scrimlet", + ); + let harness = Harness::new(&logctx.log); + + // Set ourselves as "not a scrimlet". + harness.set_scrimlet_status(ScrimletStatus::NotScrimlet); + + // Wait for the task to reach Inert(NoLongerAScrimlet). + harness.wait_for_task_status_no_longer_a_scrimlet().await; + + // Destructure the harness so we can drop the scrimlet_status sender + // while still holding the other pieces we need. + let Harness { + task, + scrimlet_status_tx, + system_networking_config_tx, + do_reconciliation_results_tx, + do_reconciliation_calls, + } = harness; + + // Drop the scrimlet_status sender. This closes the watch channel, + // which causes scrimlet_status_rx.changed().await to return + // Err(RecvError) inside wait_if_this_sled_is_no_longer_a_scrimlet(), + // causing the task to exit. + mem::drop(scrimlet_status_tx); + + // Wait for the task to exit and verify the final status. + task._task.await.expect("task didn't panic"); + let final_status = task.status_rx.borrow(); + assert_matches!( + final_status.current_status, + ReconcilerCurrentStatus::Inert( + ReconcilerInertReason::TaskExitedUnexpectedly + ) + ); + + // do_reconciliation() should never have been called. + assert_eq!(do_reconciliation_calls.lock().unwrap().len(), 0); + + // Explicitly drop after the task exits. + mem::drop(do_reconciliation_results_tx); + mem::drop(system_networking_config_tx); + + logctx.cleanup_successful(); +} + +// Test: dropping the system_networking_config sender while the task is blocked +// in wait_if_this_sled_is_no_longer_a_scrimlet() causes the task to exit with +// TaskExitedUnexpectedly. +#[tokio::test(start_paused = true)] +async fn channel_closure_system_networking_config_during_not_scrimlet() { + let logctx = omicron_test_utils::dev::test_setup_log( + "channel_closure_system_networking_config_during_not_scrimlet", + ); + let harness = Harness::new(&logctx.log); + + // Set ourselves as "not a scrimlet". + harness.set_scrimlet_status(ScrimletStatus::NotScrimlet); + + // Wait for the task to reach Inert(NoLongerAScrimlet). + harness.wait_for_task_status_no_longer_a_scrimlet().await; + + // Destructure the harness so we can drop the system_networking_config + // sender while still holding the other pieces we need. + let Harness { + task, + scrimlet_status_tx, + system_networking_config_tx, + do_reconciliation_results_tx, + do_reconciliation_calls, + } = harness; + + // Drop the system_networking_config sender. The task is currently blocked + // waiting to become a scrimlet again, but also monitors this channel for + // closure; it should notice and exit. + mem::drop(system_networking_config_tx); + + // Wait for the task to exit and verify the final status. + task._task.await.expect("task didn't panic"); + let final_status = task.status_rx.borrow(); + assert_matches!( + final_status.current_status, + ReconcilerCurrentStatus::Inert( + ReconcilerInertReason::TaskExitedUnexpectedly + ) + ); + + // do_reconciliation() should never have been called. + assert_eq!(do_reconciliation_calls.lock().unwrap().len(), 0); + + // Explicitly drop after the task exits. + mem::drop(do_reconciliation_results_tx); + mem::drop(scrimlet_status_tx); + + logctx.cleanup_successful(); +} diff --git a/sled-agent/scrimlet-reconcilers/src/status.rs b/sled-agent/scrimlet-reconcilers/src/status.rs new file mode 100644 index 00000000000..d433745b4d6 --- /dev/null +++ b/sled-agent/scrimlet-reconcilers/src/status.rs @@ -0,0 +1,141 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types for the status and results of the reconcilers in this crate. + +use crate::DpdReconcilerStatus; +use crate::MgdReconcilerStatus; +use crate::UplinkdReconcilerStatus; +use chrono::DateTime; +use chrono::Utc; +use std::time::Duration; +use std::time::Instant; + +/// Whether or not this sled is a scrimlet. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ScrimletStatus { + Scrimlet, + NotScrimlet, +} + +/// Status of attempting to determine this sled's switch slot via MGS within +/// this sled's switch zone. +#[derive(Debug, Clone)] +pub enum DetermineSwitchSlotStatus { + /// We're not attempting to contact MGS because we're not a scrimlet. + NotScrimlet, + + /// We're currently attempting to contact MGS. + /// + /// If this is not the first attempt, `prev_attempt_err` contains the error + /// we encountered the last time. (If the last time succeeded, we'd be + /// done!) + ContactingMgs { prev_attempt_err: Option }, + + /// We're currently idle waiting for a timeout to retry due to a previous + /// failure. + WaitingToRetry { prev_attempt_err: String }, +} + +/// Why a reconciler task has gone inert. +#[derive(Debug, Clone, Copy)] +pub enum ReconcilerInertReason { + /// The reconciler task started when this sled was a scrimlet, but it has + /// since become "not a scrimlet" (e.g., because the attached switch has + /// gone away). + NoLongerAScrimlet, + + /// The reconciler task exited. This is not expected except in tests; the + /// task runs forever as long as sled-agent holds on to the channels used to + /// communicate with it. + TaskExitedUnexpectedly, +} + +/// Why a reconciler task was activated. +#[derive(Debug, Clone, Copy)] +pub enum ReconcilerActivationReason { + /// Each reconciler runs once on startup. + Startup, + /// The task was activated due to its periodic timer firing. + PeriodicTimer, + /// The task was activated in response to a change in the networking config. + SystemNetworkingConfigChanged, + /// The task was activated in response to the sled becoming a scrimlet again + /// (after previously transitioning to "not a scrimlet"). + ScrimletStatusChanged, +} + +#[derive(Debug, Clone)] +pub struct ReconciliationCompletedStatus { + pub activation_reason: ReconcilerActivationReason, + pub completed_at_time: DateTime, + pub ran_for: Duration, + pub activation_count: u64, + pub status: T, +} + +#[derive(Debug, Clone, Copy)] +pub struct ReconcilerRunningStatus { + activation_reason: ReconcilerActivationReason, + started_at_time: DateTime, + started_at_instant: Instant, +} + +impl ReconcilerRunningStatus { + pub(crate) fn new(activation_reason: ReconcilerActivationReason) -> Self { + Self { + activation_reason, + started_at_time: Utc::now(), + started_at_instant: Instant::now(), + } + } + + pub fn activation_reason(&self) -> ReconcilerActivationReason { + self.activation_reason + } + + pub fn started_at(&self) -> DateTime { + self.started_at_time + } + + pub fn elapsed_since_start(&self) -> Duration { + self.started_at_instant.elapsed() + } +} + +#[derive(Debug, Clone)] +pub enum ReconcilerCurrentStatus { + /// The reconciler is inert: it will not or cannot run for some reason. + Inert(ReconcilerInertReason), + /// The reconciler is currently running. + Running(ReconcilerRunningStatus), + /// The reconciler is not currently running. + Idle, +} + +#[derive(Debug, Clone)] +pub struct ReconcilerStatus { + /// Status of the task at this moment. + pub current_status: ReconcilerCurrentStatus, + /// Final status of the most recent activation of this task. + // Box the inner status to avoid clippy complaining about + // `ScrimletReconcilersStatus::Running { ... }` being overly large. + pub last_completion: Option>>, +} + +#[derive(Debug, Clone)] +pub enum ScrimletReconcilersStatus { + /// `sled-agent` has not yet provided underlay networking information. + WaitingForSledAgentNetworkingInfo, + + /// We're attempting to determine our switch slot. + DeterminingSwitchSlot(DetermineSwitchSlotStatus), + + /// We are a scrimlet and the individual reconcilers are running. + Running { + dpd_reconciler: ReconcilerStatus, + mgd_reconciler: ReconcilerStatus, + uplinkd_reconciler: ReconcilerStatus, + }, +} diff --git a/sled-agent/scrimlet-reconcilers/src/switch_zone_slot.rs b/sled-agent/scrimlet-reconcilers/src/switch_zone_slot.rs new file mode 100644 index 00000000000..4c9e8f6aa2f --- /dev/null +++ b/sled-agent/scrimlet-reconcilers/src/switch_zone_slot.rs @@ -0,0 +1,143 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::DetermineSwitchSlotStatus; +use crate::ScrimletStatus; +use gateway_client::Client; +use gateway_client::ClientInfo; +use gateway_types::component::SpType; +use sled_agent_types::early_networking::SwitchSlot; +use slog::Logger; +use slog::error; +use slog::warn; +use slog_error_chain::InlineErrorChain; +use std::time::Duration; +use tokio::sync::watch; +use tokio::sync::watch::error::RecvError; + +/// Newtype wrapper around [`SwitchSlot`]. This type is always the physical slot +/// of our own, local switch. +/// +/// This information can only be determined by asking MGS inside our own switch +/// zone. An instance of this type can only be created if we are indeed a +/// scrimlet. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct ThisSledSwitchSlot(SwitchSlot); + +impl PartialEq for ThisSledSwitchSlot { + fn eq(&self, other: &SwitchSlot) -> bool { + self.0 == *other + } +} + +impl PartialEq for SwitchSlot { + fn eq(&self, other: &ThisSledSwitchSlot) -> bool { + *self == other.0 + } +} + +impl ThisSledSwitchSlot { + const MGS_RETRY_TIMEOUT: Duration = Duration::from_secs(5); + + #[cfg(test)] + pub(crate) const TEST_FAKE: Self = Self(SwitchSlot::Switch0); + + pub(crate) async fn determine_retrying_forever( + determine_status_tx: watch::Sender, + scrimlet_status_rx: &mut watch::Receiver, + client: &Client, + log: &Logger, + ) -> Result { + loop { + // Wait until we become a scrimlet; there's no point in trying to + // contact our switch zone if it doesn't exist. + loop { + let scrimlet_status = *scrimlet_status_rx.borrow_and_update(); + match scrimlet_status { + ScrimletStatus::Scrimlet => break, + ScrimletStatus::NotScrimlet => { + determine_status_tx.send_modify(|status| { + *status = DetermineSwitchSlotStatus::NotScrimlet; + }); + scrimlet_status_rx.changed().await?; + continue; + } + } + } + + // Update to our status to `ContactingMgs`, and carry forward any + // error from a previous attempt. + determine_status_tx.send_if_modified(|status| match status { + DetermineSwitchSlotStatus::ContactingMgs { .. } => false, + DetermineSwitchSlotStatus::NotScrimlet => { + *status = DetermineSwitchSlotStatus::ContactingMgs { + prev_attempt_err: None, + }; + true + } + DetermineSwitchSlotStatus::WaitingToRetry { + prev_attempt_err, + } => { + *status = DetermineSwitchSlotStatus::ContactingMgs { + prev_attempt_err: Some(prev_attempt_err.clone()), + }; + true + } + }); + + // We are a scrimlet - see if we know our own slot yet. + let err = match client + .sp_local_switch_id() + .await + .map(|resp| resp.into_inner()) + { + Ok(identity) => match (identity.type_, identity.slot) { + (SpType::Switch, 0) => { + return Ok(ThisSledSwitchSlot(SwitchSlot::Switch0)); + } + (SpType::Switch, 1) => { + return Ok(ThisSledSwitchSlot(SwitchSlot::Switch1)); + } + (sp_type, sp_slot) => { + // We should never get any other response; if we do, + // something has gone very wrong with MGS. It's not + // likely retrying will fix this, but there isn't + // anything else we can do. + error!( + log, + "failed to determine this sled's switch slot: got \ + unexpected identity; will retry"; + "sp_type" => ?sp_type, + "sp_slot" => sp_slot, + ); + format!( + "received invalid SP type/slot combo from MGS {}: \ + {sp_type:?}/{sp_slot}", + client.baseurl() + ) + } + }, + Err(err) => { + let err = InlineErrorChain::new(&err); + warn!( + log, + "failed to determine this sled's switch slot; \ + will retry"; + &err, + ); + err.to_string() + } + }; + + determine_status_tx.send_modify(|status| { + *status = DetermineSwitchSlotStatus::WaitingToRetry { + prev_attempt_err: err, + }; + }); + + // Sleep briefly before retrying. + tokio::time::sleep(Self::MGS_RETRY_TIMEOUT).await; + } + } +} diff --git a/sled-agent/scrimlet-reconcilers/src/uplinkd_reconciler.rs b/sled-agent/scrimlet-reconcilers/src/uplinkd_reconciler.rs new file mode 100644 index 00000000000..3f0e9d487e5 --- /dev/null +++ b/sled-agent/scrimlet-reconcilers/src/uplinkd_reconciler.rs @@ -0,0 +1,50 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Reconciler for configuration of `uplinkd` within a scrimlet's switch zone. +//! +//! Unlike most reconcilers in this crate, `uplinkd`'s configuration is managed +//! via SMF, not a dropshot server. + +use crate::ScrimletReconcilersMode; +use crate::reconciler_task::Reconciler; +use crate::switch_zone_slot::ThisSledSwitchSlot; +use sled_agent_types::system_networking::SystemNetworkingConfig; +use slog::Logger; +use std::time::Duration; + +#[derive(Debug, Clone)] +pub struct UplinkdReconcilerStatus { + pub todo_status: (), +} + +#[derive(Debug)] +pub(crate) struct UplinkdReconciler { + _switch_slot: ThisSledSwitchSlot, +} + +impl Reconciler for UplinkdReconciler { + type Status = UplinkdReconcilerStatus; + + const LOGGER_COMPONENT_NAME: &'static str = "UplinkdReconciler"; + const RE_RECONCILE_INTERVAL: std::time::Duration = Duration::from_secs(30); + + fn new( + _mode: ScrimletReconcilersMode, + switch_slot: ThisSledSwitchSlot, + _parent_log: &Logger, + ) -> Self { + // TODO: Remain inert if `mode` is `ScrimletReconcilersMode::Test`, + // since that indicates there's no real zone to connect to. + Self { _switch_slot: switch_slot } + } + + async fn do_reconciliation( + &mut self, + _system_networking_config: &SystemNetworkingConfig, + _log: &Logger, + ) -> Self::Status { + UplinkdReconcilerStatus { todo_status: () } + } +} diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index fd3f1f0923a..01c0ddb7f3a 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -103,7 +103,7 @@ postgres-types = { version = "0.2.12", default-features = false, features = ["wi ppv-lite86 = { version = "0.2.21", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.4" } proc-macro2 = { version = "1.0.106" } -proptest = { version = "1.10.0" } +proptest = { version = "1.11.0" } quote = { version = "1.0.45" } rand-274715c4dabd11b0 = { package = "rand", version = "0.9.2" } rand-c38e5c1d305a1b54 = { package = "rand", version = "0.8.5" } @@ -251,7 +251,7 @@ postgres-types = { version = "0.2.12", default-features = false, features = ["wi ppv-lite86 = { version = "0.2.21", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.4" } proc-macro2 = { version = "1.0.106" } -proptest = { version = "1.10.0" } +proptest = { version = "1.11.0" } quote = { version = "1.0.45" } rand-274715c4dabd11b0 = { package = "rand", version = "0.9.2" } rand-c38e5c1d305a1b54 = { package = "rand", version = "0.8.5" }