diff --git a/lib/propolis/src/accessors.rs b/lib/propolis/src/accessors.rs index e6a8db675..fe7d4521b 100644 --- a/lib/propolis/src/accessors.rs +++ b/lib/propolis/src/accessors.rs @@ -2,14 +2,51 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Hierarchical access control for emulated resources +//! Hierarchical access control for emulated resources. //! -//! Device emulation logic requires access to resources which may be -//! subsequently moderated by intervening parts of the emulation. +//! The structures in this module are designed to support some of our current +//! needs, but also aspirationally for anticipated needs. +//! +//! First and foremost, device emulation logic requires access to resources +//! which may be subsequently moderated by intervening parts of the emulation. +//! Acquisition of the underlying resource is fallible for this reason. //! //! For example: A PCI device performs DMA to guest memory. If bus-mastering is //! disabled on the device, or any parent bridge in its bus hierarchy, then its //! contained emulation should fail any DMA accesses. +//! +//! This also motivates the tree-like structure of the accessor. Keeping the +//! PCI bus mastering example, an individual endpoint can be allowed to perform +//! bus mastering if Bus Master Enable is set. Additionally, a PCI-PCI bridge +//! has a Bus Master Enable bit with a similar semantic for all devices behind +//! that bridge. +//! +//! There is not yet any support for bus mastering bits, but it's expected this +//! should be straightforward on top of `Node` or `NodeEntry`. +//! +//! Secondly, and more relevant to how Accessor is used in Propolis today, an +//! accessor tree provides a mechanism to provide or remove a reference to the +//! protected resource from an entire device or machine. While the accessor tree +//! is at heart a fancy `Arc>`, an `Arc` is never exposed in the +//! accessor's API; only a wrapper that derefs as `T`. +//! +//! Accessor structures being the sole access mechanism to a guarded resource +//! ensures that the resource can be added or removed *almost*[1] arbitrarily. +//! [`MsiAccessor`] is an example of double-duty here; on one hand, a PCI bridge +//! can have MSI enabled or disabled, as well as the functions behind that +//! bridge. On the other hand, the MSI accessor is mostly just an `Arc`, +//! and it would be unfortunate to have stray `Arc` littered across +//! device emulation[2]. +//! +//! 1: A user of Propolis should only change the guarded resource for devices that +//! are in the initial (pre-run) state, paused, or halted. Removing a guarded +//! resource during arbitrary device operation could, at worst, look to a device +//! like it was the bus master while also losing its ownership of the bus! +//! There is no expectation of correct operation in such a bogus state. +//! +//! 2: `Arc` has since found its way into device emulation in different +//! ways, though the ownership model is simple enough there is little risk of +//! cyclic references keeping a `VmmHdl` alive overly-long. use std::collections::{BTreeMap, BTreeSet, VecDeque}; use std::marker::PhantomData; @@ -638,12 +675,25 @@ impl Accessor { /// Will return [None] if any ancestor node disables access, or if the node /// is not attached to a hierarchy containing a valid resource. /// - /// Unlike [`Accesor::access()`], this returns a guard on the node's Mutex. - /// This will block other calls to `access()` or `access_borrow()`, as well - /// as attempts to replace or modify the tree's guarded resource. Prefer - /// frequently refreshing borrows with calls to `access_borrow()` to holding - /// a `LockedView` for substantial durations. - pub fn access_borrow(&self) -> Option> { + /// Unlike [`Accesor::access()`], this returns a wrapped MutexGuard for this + /// accessor node; callers must carefully consider lock ordering when + /// holding this guard across other operations. As with any other mutex, + /// perfer holding this guard for as small a window as permitted. + /// + /// This function exists solely to support very hot code accessing the same + /// resource across many processors. When the underlying resource is an + /// `Arc`, `access()` implies an `Arc::clone`, which would contentously + /// modify the reference count to disastrous effect. `access_locked()` only + /// involves this (hopefully-uncontended!) node, at the cost of a more + /// error-prone API. If `lock incq/lock decq` aren't in your profile, this + /// probably isn't helpful! + /// + /// Some examples of the added consideration with this function: holding + /// this guard will block other calls to this node's `access()` or + /// `access_locked()`, and *may* block attempts to `access()` or + /// `access_locked()` a child of this node. Holding this guard will block + /// removal of the underlying resource, potentially blocking VM teardown. + pub fn access_locked(&self) -> Option> { let guard = self.0.guard_borrow()?; if guard.res_leaf.is_some() { Some(LockedView { guard }) diff --git a/lib/propolis/src/hw/nvme/queue.rs b/lib/propolis/src/hw/nvme/queue.rs index b977f3427..07ef2a005 100644 --- a/lib/propolis/src/hw/nvme/queue.rs +++ b/lib/propolis/src/hw/nvme/queue.rs @@ -115,9 +115,9 @@ struct QueueState { /// This queue's memory accessor node. /// - /// Be careful about lock ordering when using this accessor; access_borrow() + /// Be careful about lock ordering when using this accessor; access_locked() /// holds this node's lock. If a user of this queue state requires both - /// `access_borrow()` and `QueueInner`, the protocol is to lock queue + /// `access_locked()` and `QueueInner`, the protocol is to lock queue /// state first and this accessor second. acc_mem: MemAccessor, } @@ -659,7 +659,7 @@ impl SubQueue { // see docs on QueueState::acc_mem. let mut state = self.state.lock(); - let Some(mem) = self.state.acc_mem.access_borrow() else { return None }; + let Some(mem) = self.state.acc_mem.access_locked() else { return None }; let mem = mem.view(); // Attempt to reserve an entry on the Completion Queue @@ -897,7 +897,7 @@ impl CompQueue { // XXX: handle a guest addr that becomes unmapped later let addr = self.base.offset::(idx as usize); // TODO: access disallowed? - let Some(mem) = self.state.acc_mem.access_borrow() else { + let Some(mem) = self.state.acc_mem.access_locked() else { // TODO: mark the queue/controller in error state? return; }; diff --git a/lib/propolis/src/hw/nvme/requests.rs b/lib/propolis/src/hw/nvme/requests.rs index 8c8a183fb..4821b47f0 100644 --- a/lib/propolis/src/hw/nvme/requests.rs +++ b/lib/propolis/src/hw/nvme/requests.rs @@ -73,7 +73,7 @@ impl block::DeviceQueue for NvmeBlockQueue { /// the underlying block backend fn next_req(&self) -> Option<(Request, Self::Token, Option)> { let sq = &self.sq; - let mem = self.acc_mem.access_borrow()?; + let mem = self.acc_mem.access_locked()?; let mem = mem.view(); let params = self.sq.params();