diff --git a/src/TiledArray/dist_array.h b/src/TiledArray/dist_array.h index c2994963df..0bf6b8d889 100644 --- a/src/TiledArray/dist_array.h +++ b/src/TiledArray/dist_array.h @@ -35,8 +35,10 @@ #include "TiledArray/util/random.h" #include +#include #include #include +#include namespace TiledArray { @@ -2015,6 +2017,95 @@ std::size_t size_of(const DistArray& da) { return result; } +/// \return the number of bytes the locally-owned tiles of \p storage occupy +/// in memory space `S`. +/// +/// This is the *tile-data* footprint of a `DistArray`'s storage object only. +/// It deliberately does **not** include the `DistArray`-level metadata -- +/// `TiledRange`, `Shape`, and `Pmap` -- because those live in the owning +/// `ArrayImpl`/`TensorImpl`, not in the `DistributedStorage`. For +/// `SparsePolicy` the `Shape` (a per-tile Frobenius-norm table) can be +/// sizeable, so this undercounts the full per-array footprint that +/// `size_of(const DistArray&)` reports. Counts only tiles whose futures are +/// set; pending and remote-cached tiles are skipped. +/// \tparam S the memory space to report +template +std::size_t size_of(const detail::DistributedStorage& storage) { + std::size_t result = 0; + storage.for_each_local_tile( + [&result](const auto& tile) { result += size_of(tile); }); + return result; +} + +/// \return the per-rank tile-data bytes (in memory space `S`) of the +/// `DistributedStorage` of *all* live `DistArray` of the +/// requested type currently registered in \p world, discovered by walking +/// the World's `WorldObject` registry. +/// +/// Each array's tile storage is a single `detail::DistributedStorage` +/// `WorldObject`, so an array referenced by N shallow-copy handles is counted +/// exactly once — unlike summing `size_of` over a set of handles, which +/// double-counts shared storage. This makes the result suitable as ground +/// truth for validating handle-based tile-data accounting. +/// +/// Discovery is type-safe: each registered pointer is recovered as the common +/// polymorphic base `madness::WorldObjectBase` and `dynamic_cast` to the +/// `DistributedStorage` matching `DistArrayT`'s tile type; non-matching +/// objects (other tile types, MADNESS containers) are skipped. Assumes the +/// registered `WorldObject`s place `WorldObjectBase` at offset 0 (true for +/// the single-inheritance `class X : public WorldObject` idiom TA uses). +/// +/// \warning This reports the `DistributedStorage` (tile-data) footprint only. +/// It excludes the `DistArray`-level `TiledRange`, `Shape`, and `Pmap`; the +/// `Shape` can be large under `SparsePolicy`. It is therefore **not** +/// comparable term-for-term with a sum of `size_of(const DistArray&)` over +/// handles (which includes the shape). Use it for tile-data accounting, not +/// total-DistArray-footprint accounting. +/// \note Counts only locally-owned tiles whose futures are set. Excludes +/// remote-tile caches. Call at a quiescent point (after a fence). +/// \tparam DistArrayT the `DistArray` specialization to look for +/// \tparam S the memory space to report (default `Host`) +template +std::size_t size_of_live_distarray_storage(World& world) { + using tile_type = typename DistArrayT::value_type; + using storage_type = detail::DistributedStorage; + std::size_t result = 0; + for (const auto& id : world.get_object_ids()) { + auto base_opt = world.template ptr_from_id(id); + if (!base_opt || !*base_opt) continue; + if (auto* storage = dynamic_cast(*base_opt)) { + result += size_of(*storage); + } + } + return result; +} + +/// \return a matrix of per-rank live-storage tile-data byte totals indexed +/// `[world_index][type_index]`: for each `World` in \p worlds (rows) and each +/// `DistArray` type in the pack `DistArrayTs` (columns), the value of +/// `size_of_live_distarray_storage(world)`. Lets a caller +/// inventory which array types hold how much tile data in which world at a +/// checkpoint, deduplicated across shallow-copy handles. +/// +/// \warning Tile-data only; see `size_of_live_distarray_storage` for the +/// excluded-metadata caveat (no `TiledRange`/`Shape`/`Pmap`). +/// \note `S` is the leading template argument (it has a default but precedes +/// the type pack), so callers must spell it out: +/// `size_of_live_distarrays_storage(worlds)`. +/// \pre every pointer in \p worlds is non-null +template +std::vector> +size_of_live_distarrays_storage(const std::vector& worlds) { + std::vector> result; + result.reserve(worlds.size()); + for (World* w : worlds) { + TA_ASSERT(w != nullptr); + result.push_back({size_of_live_distarray_storage(*w)...}); + } + return result; +} + #ifndef TILEDARRAY_HEADER_ONLY extern template class DistArray, DensePolicy>; diff --git a/src/TiledArray/distributed_storage.h b/src/TiledArray/distributed_storage.h index 60eb715c34..88e43e3196 100644 --- a/src/TiledArray/distributed_storage.h +++ b/src/TiledArray/distributed_storage.h @@ -20,6 +20,7 @@ #ifndef TILEDARRAY_DISTRIBUTED_STORAGE_H__INCLUDED #define TILEDARRAY_DISTRIBUTED_STORAGE_H__INCLUDED +#include #include namespace TiledArray { @@ -360,6 +361,25 @@ class DistributedStorage : public madness::WorldObject> { /// \throw nothing size_type size() const { return data_.size(); } + /// Apply \p op to each locally-owned tile whose future is already set. + + /// Pending (unset) and remote-cached elements are skipped. No + /// communication; intended to be called at a quiescent point (e.g. after a + /// fence). This is the per-rank local tile set, the same one + /// `size_of(DistArray)` iterates. Any summation it enables (e.g. of + /// `size_of(tile)`) is left to the caller, which sees the tile-type + /// overloads -- those need not be visible where this low-level header is + /// parsed. + /// \tparam Op a callable invocable as `op(const value_type&)` + /// \param op the callable to apply to each set local tile + template + void for_each_local_tile(Op&& op) const { + for (auto it = data_.begin(); it != data_.end(); ++it) { + const future& f = it->second; + if (f.probe()) op(f.get()); + } + } + /// Max size accessor /// The maximum size is the total number of elements that can be held by diff --git a/tests/dist_array.cpp b/tests/dist_array.cpp index 5d7c4a9ea6..9384a9760d 100644 --- a/tests/dist_array.cpp +++ b/tests/dist_array.cpp @@ -1020,6 +1020,71 @@ BOOST_AUTO_TEST_CASE(size_of) { BOOST_REQUIRE(sz0 == sz0_expected); } +BOOST_AUTO_TEST_CASE(live_storage_size_in_world) { + using T = Tensor; + using ToT = Tensor; + using Policy = SparsePolicy; + using ArrayT = DistArray; + using ArrayToT = DistArray; + + auto& world = get_default_world(); + world.gop.fence(); + + // arrays from earlier test cases may still be registered (destruction is + // deferred to the next fence), so measure a baseline and compare deltas + auto const base_T = TiledArray::size_of_live_distarray_storage(world); + auto const base_ToT = + TiledArray::size_of_live_distarray_storage(world); + + TiledRange const trange({{0, 2, 5, 7}, {0, 5, 7, 10, 12}}); + + // two distinct regular arrays + auto a1 = make_array(world, trange, [](T& tile, Range const& rng) { + tile = T(rng, 1.0); + return tile.norm(); + }); + auto a2 = make_array(world, trange, [](T& tile, Range const& rng) { + tile = T(rng, 2.0); + return tile.norm(); + }); + // shallow copy: shares a1's storage WorldObject, must NOT be double-counted + ArrayT a1_copy = a1; + BOOST_REQUIRE(a1_copy.trange() == a1.trange()); // keep a1_copy alive & used + + world.gop.fence(); + + // per-array local tile-data bytes = size_of(array) - size_of(shape); the + // storage walk reports tile data only, so subtract the shape from the + // handle-based full-array size_of + auto tiles_only = [](ArrayT const& a) { + return TiledArray::size_of(a) - + TiledArray::size_of(a.shape()); + }; + + // the storage walk counts each distinct DistributedStorage once: a1 + a2, + // NOT a1 + a2 + a1_copy + auto const expected_T = tiles_only(a1) + tiles_only(a2); + auto const got_T = + TiledArray::size_of_live_distarray_storage(world) - base_T; + BOOST_CHECK_EQUAL(got_T, expected_T); + + // the ToT-typed walk must not pick up the regular (T) arrays + auto const got_ToT_delta = + TiledArray::size_of_live_distarray_storage(world) - base_ToT; + BOOST_CHECK_EQUAL(got_ToT_delta, 0u); + + // variadic matrix: one world (one row), two types (two columns) + auto const mat = TiledArray::size_of_live_distarrays_storage< + MemorySpace::Host, ArrayT, ArrayToT>(std::vector{&world}); + BOOST_REQUIRE_EQUAL(mat.size(), 1u); + BOOST_CHECK_EQUAL(mat[0][0], + TiledArray::size_of_live_distarray_storage(world)); + BOOST_CHECK_EQUAL( + mat[0][1], TiledArray::size_of_live_distarray_storage(world)); + + world.gop.fence(); +} + BOOST_FIXTURE_TEST_CASE(fill_zero_sparse, ArrayFixture) { // construct a sparse array with some non-zero tiles and fill it SpArrayN as(world, tr, TiledArray::SparseShape(shape_tensor, tr));