From fab78713143ebee6d7fa441e77987fb6b9b7bbd7 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 20 May 2026 12:47:30 -0400 Subject: [PATCH 1/7] =?UTF-8?q?refactor(optimize):=20generalize=20OptRes::?= =?UTF-8?q?flops=20=E2=86=92=20ops=20and=20extract=20DP=20helpers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- SeQuant/core/optimize/single_term.hpp | 169 ++++++++++++++++---------- 1 file changed, 104 insertions(+), 65 deletions(-) diff --git a/SeQuant/core/optimize/single_term.hpp b/SeQuant/core/optimize/single_term.hpp index 2d5b72ed15..b0b4610660 100644 --- a/SeQuant/core/optimize/single_term.hpp +++ b/SeQuant/core/optimize/single_term.hpp @@ -23,26 +23,31 @@ #include namespace sequant::opt { -/// -/// \param idxsz An invocable that returns size_t for Index argument. -/// \param idxs Index objects. -/// \return flops count -/// template concept has_index_extent = std::is_invocable_r_v; namespace detail { -auto constexpr flops_counter(has_index_extent auto&& ixex) { +/// \brief Cost function returning the flop count of a binary tensor +/// contraction. +/// +/// The returned callable computes the product of extents over the union of +/// indices on \c lhs, \c rhs, and \c result. A product of 1 (scalar +/// contraction) is reported as zero flops. +/// +/// \param ixex Invocable mapping an Index to its extent. +/// \return A callable (lhs, rhs, result) -> double yielding the +/// flop count of the contraction. +auto flops_counter(has_index_extent auto&& ixex) { return [ixex](meta::range_of auto const& lhs, meta::range_of auto const& rhs, - meta::range_of auto const& result) { + meta::range_of auto const& result) -> double { using ranges::views::concat; - auto tot_idxs = tot_indices>( - concat(lhs, rhs, result)); - double ops = 1.; - for (auto&& idx : concat(tot_idxs.outer, tot_idxs.inner)) ops *= ixex(idx); - // ops == 1 implies zero flops - return ops == 1. ? 0. : ops; + auto tot_idxs = tot_indices(concat(lhs, rhs, result)); + double total_flops = ranges::accumulate( + concat(tot_idxs.outer, tot_idxs.inner), 1., std::multiplies{}, + std::forward(ixex)); + // total_flops == 1. implies zero flops + return total_flops == 1. ? 0. : total_flops; }; } @@ -54,8 +59,8 @@ struct OptRes { /// Free indices remaining upon evaluation IndexSet indices; - /// The flops count of evaluation - double flops; + /// The operations count of evaluation + double ops; /// The evaluation sequence EvalSequence sequence; @@ -83,6 +88,84 @@ struct SubNetEqual { } }; +/// \brief Seeds the DP table with per-subset open indices and singleton/empty +/// ops counts. +/// +/// For each subset \c i of the input tensors, computes the indices that remain +/// open after evaluating that subset and stores them in \c results[i].indices. +/// Initializes \c results[i].ops to 0 for empty and singleton subsets, and +/// to \c max as a sentinel for subsets that will later be filled in by the DP. +/// Singleton subsets also get their one-element \c sequence pre-populated. +template +void init_results(TensorNetwork const& network, TIdxs const& tidxs, + container::vector& results) { + using IndexContainer = decltype(OptRes::indices); + auto tensor_indices = network.tensors() // + | ranges::views::indirect // + | ranges::views::transform(slots); + auto imed_indices = subset_target_indices(tensor_indices, tidxs); + SEQUANT_ASSERT(ranges::distance(imed_indices) == ranges::distance(results)); + for (size_t i = 0; i < results.size(); ++i) { + results[i].indices = + imed_indices[i] | ranges::views::move | ranges::to; + results[i].ops = std::popcount(i) > 1 + ? std::numeric_limits::max() + : 0; + if (std::popcount(i) == 1) + results[i].sequence.emplace_back(std::countr_zero(i)); + // else results[i].sequence is left uninitialized + } +} + +struct SubnetMetadata { + /// meta_ids[n] is the canonical-subnet id of subset n, or + /// numeric_limits::max() for subsets with popcount < 2. + container::vector meta_ids; + /// Cost of evaluating one representative of each canonical subnet id, + /// indexed by id. Populated lazily during the DP. + container::vector unique_meta_costs; +}; + +/// \brief Precomputes canonical-subnet identifiers for every subset of size +/// >= 2 so that structurally equivalent subnetworks share a CSE id. +/// +/// Builds a `TensorNetwork` for each subset, canonicalizes it, and assigns a +/// dense integer id to each distinct canonical form. The returned +/// `unique_meta_costs` is sized to the number of distinct ids and zero-filled; +/// it is populated during the DP as each canonical subnet's optimal cost +/// becomes known. +/// +/// Side effect: `results[n].indices` may be reordered by `canonicalize_slots`. +inline SubnetMetadata build_subnet_metadata( + TensorNetwork const& network, container::vector& results) { + SubnetMetadata out; + // Use max as sentinel for entries with popcount < 2 (singletons/empty), + // which are skipped below and never assigned a real meta ID. + out.meta_ids.resize(results.size(), std::numeric_limits::max()); + container::unordered_map + meta_to_id; + + for (size_t n = 0; n < results.size(); ++n) { + if (std::popcount(n) < 2) continue; + auto ts = bits::on_bits_index(n) | bits::sieve(network.tensors()); + container::vector ts_expr; + for (auto&& t : ts) + ts_expr.emplace_back(std::dynamic_pointer_cast(t)->clone()); + + auto tn = TensorNetwork{ts_expr}; + auto meta = tn.canonicalize_slots( + TensorCanonicalizer::cardinal_tensor_labels(), &results[n].indices); + + auto [it, inserted] = meta_to_id.try_emplace(std::move(meta), 0); + if (inserted) it->second = meta_to_id.size() - 1; + + out.meta_ids[n] = it->second; + } + out.unique_meta_costs.resize(meta_to_id.size(), 0.0); + return out; +} + /// \brief Finds the optimal evaluation sequence for a single-term tensor /// contraction. /// @@ -130,34 +213,12 @@ EvalSequence single_term_opt_impl(TensorNetwork const& network, meta::range_of auto const& tidxs, CostFn&& cost_fn, bool subnet_cse) { using ranges::views::concat; - using ranges::views::indirect; - using ranges::views::transform; - using IndexContainer = decltype(OptRes::indices); auto const nt = network.tensors().size(); if (nt == 1) return EvalSequence{0}; if (nt == 2) return EvalSequence{0, 1, -1}; container::vector results((size_t{1} << nt)); - - // initialize the intermediate results - { - auto tensor_indices = network.tensors() // - | indirect // - | transform(slots); - auto imed_indices = subset_target_indices(tensor_indices, tidxs); - SEQUANT_ASSERT(ranges::distance(imed_indices) == ranges::distance(results)); - for (size_t i = 0; i < results.size(); ++i) { - results[i].indices = - imed_indices[i] | ranges::views::move | ranges::to; - results[i].flops = - std::popcount(i) > 1 - ? std::numeric_limits::max() - : 0; - if (std::popcount(i) == 1) - results[i].sequence.emplace_back(std::countr_zero(i)); - // else results[i].sequence is left uninitialized - } - } + init_results(network, tidxs, results); // precompute all subnet_meta if subnet_cse is true // Note: the O(2^n) cost is bounded in practice — subset_target_indices above @@ -165,37 +226,15 @@ EvalSequence single_term_opt_impl(TensorNetwork const& network, container::vector meta_ids; container::vector unique_meta_costs; if (subnet_cse) { - // Use max as sentinel for entries with popcount < 2 (singletons/empty), - // which are skipped below and never assigned a real meta ID. - meta_ids.resize(results.size(), std::numeric_limits::max()); - container::unordered_map - meta_to_id; - - for (size_t n = 0; n < results.size(); ++n) { - if (std::popcount(n) < 2) continue; - auto ts = bits::on_bits_index(n) | bits::sieve(network.tensors()); - container::vector ts_expr; - for (auto&& t : ts) { - ts_expr.emplace_back(std::dynamic_pointer_cast(t)->clone()); - } - auto tn = TensorNetwork{ts_expr}; - auto meta = tn.canonicalize_slots( - TensorCanonicalizer::cardinal_tensor_labels(), &results[n].indices); - - auto [it, inserted] = meta_to_id.try_emplace(std::move(meta), 0); - if (inserted) { - it->second = meta_to_id.size() - 1; - } - meta_ids[n] = it->second; - } - unique_meta_costs.resize(meta_to_id.size(), 0.0); + auto md = build_subnet_metadata(network, results); + meta_ids = std::move(md.meta_ids); + unique_meta_costs = std::move(md.unique_meta_costs); } // find the optimal evaluation sequence for (size_t n = 0; n < results.size(); ++n) { if (std::popcount(n) < 2) continue; - for (auto& curr_cost = results[n].flops; + for (auto& curr_cost = results[n].ops; auto&& [lp, rp] : bits::bipartitions(n)) { // do nothing with the trivial bipartition // i.e. one subset is the empty set and the other full @@ -219,7 +258,7 @@ EvalSequence single_term_opt_impl(TensorNetwork const& network, new_cost = cost_fn(results[lp].indices, // results[rp].indices, // results[n].indices) // - + results[lp].flops + results[rp].flops; + + results[lp].ops + results[rp].ops; } if (new_cost <= curr_cost) { From 2dc64d35a70059c6a43d831693db5faa85f0e288 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 20 May 2026 12:52:47 -0400 Subject: [PATCH 2/7] feature(memsize_counter): new metric that computes binary tensor contraction total mem. --- SeQuant/core/optimize/single_term.hpp | 29 +++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/SeQuant/core/optimize/single_term.hpp b/SeQuant/core/optimize/single_term.hpp index b0b4610660..a1d480ea30 100644 --- a/SeQuant/core/optimize/single_term.hpp +++ b/SeQuant/core/optimize/single_term.hpp @@ -51,6 +51,35 @@ auto flops_counter(has_index_extent auto&& ixex) { }; } +/// \brief Cost function returning the total memory footprint of a binary +/// tensor contraction. +/// +/// The returned callable sums, over \c lhs, \c rhs, and \c result, the +/// product of extents of each operand's indices. Operands whose extent +/// product is 1 contribute zero. +/// +/// \param ixex Invocable mapping an Index to its extent. +/// \return A callable (lhs, rhs, result) -> double yielding the +/// summed memory size of the three operands. +auto memsize_counter(has_index_extent auto&& ixex) { + return [ixex](meta::range_of auto const& lhs, + meta::range_of auto const& rhs, + meta::range_of auto const& result) -> double { + using ranges::views::concat; + double total_mem{0}; + for (auto&& tot_idxs : + {tot_indices(lhs), tot_indices(rhs), tot_indices(result)}) { + double mem = ranges::accumulate(concat(tot_idxs.outer, tot_idxs.inner), + 1., std::multiplies{}, + std::forward(ixex)); + if (mem != 1.) total_mem += mem; + // else 1. is assumed to be the accumulation init value; + // skip adding it to the total. + } + return total_mem; + }; +} + /// /// Represents a result of optimization on a range of expressions /// for a binary evaluation From c8f8914a20b251e3f71ddc3c3ad92a5cb8ce3f4b Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 20 May 2026 18:42:34 -0400 Subject: [PATCH 3/7] refactor(optimize): make cost metric and index extent provider configurable - Add OptFor { Flops, Memsize } and ReorderSum { Reorder, NoReorder } in a shared optimize/flags.hpp; thread them through single_term_opt and the top-level optimize() API. - Add index_to_extent_t alias for the type-erased provider used by the public API; templated single_term_opt callers still pass callables directly via the has_index_extent concept. - Parallelize the outermost Sum's per-summand single-term optimization with sequant::for_each; recurse sequentially on nested Sums. - Have opt::reorder reuse pre-binarized eval nodes instead of rebuilding them inside clusters(). - Harden opt_mixed_product placeholder labels (non-identifier prefix, starts_with match, digit-walk suffix parse, asserted invariants). - Fix std::forward misuse in flops_counter / memsize_counter that prevented compilation under clang. --- CMakeLists.txt | 1 + SeQuant/core/optimize/flags.hpp | 15 ++ SeQuant/core/optimize/optimize.cpp | 217 +++++++++++++++++--------- SeQuant/core/optimize/optimize.hpp | 52 +++--- SeQuant/core/optimize/single_term.hpp | 69 +++++--- SeQuant/core/optimize/sum.cpp | 28 +++- SeQuant/core/optimize/sum.hpp | 13 ++ 7 files changed, 273 insertions(+), 122 deletions(-) create mode 100644 SeQuant/core/optimize/flags.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index d5da7c30ab..af0fd0d5a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -446,6 +446,7 @@ set(SeQuant_eval_src # Optimize sources (depends on eval) set(SeQuant_optimize_src SeQuant/core/optimize/common_subexpression_elimination.hpp + SeQuant/core/optimize/flags.hpp SeQuant/core/optimize/fusion.cpp SeQuant/core/optimize/fusion.hpp SeQuant/core/optimize/optimize.cpp diff --git a/SeQuant/core/optimize/flags.hpp b/SeQuant/core/optimize/flags.hpp new file mode 100644 index 0000000000..9484c50894 --- /dev/null +++ b/SeQuant/core/optimize/flags.hpp @@ -0,0 +1,15 @@ +#ifndef SEQUANT_CORE_OPTIMIZE_FLAGS_HPP +#define SEQUANT_CORE_OPTIMIZE_FLAGS_HPP + +namespace sequant { + +/// Cost metric to optimize for in single-term and top-level optimize routines. +enum class OptFor { Flops, Memsize }; + +/// Whether to reorder summands so terms with shared intermediates appear +/// closer to each other. +enum class ReorderSum { Reorder, NoReorder }; + +} // namespace sequant + +#endif // SEQUANT_CORE_OPTIMIZE_FLAGS_HPP diff --git a/SeQuant/core/optimize/optimize.cpp b/SeQuant/core/optimize/optimize.cpp index bb73493b88..090ccd9121 100644 --- a/SeQuant/core/optimize/optimize.cpp +++ b/SeQuant/core/optimize/optimize.cpp @@ -4,9 +4,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -14,95 +16,166 @@ #include #include #include -#include -#include -#include +#include #include -#include +#include +#include #include namespace sequant { -namespace opt { - -/// -/// \param expr Expression to be optimized. -/// \param idxsz An invocable object that maps an Index object to size. -/// \param reorder_sum If true, the summands are reordered so that terms with -/// common sub-expressions appear closer to each other. -/// \return Optimized expression for lower evaluation cost. -template >> -ExprPtr optimize(ExprPtr const& expr, IdxToSize const& idx2size, - bool reorder_sum) { - using ranges::views::transform; +namespace { + +index_to_extent_t default_idx_to_size() { + return [](Index const& ix) { return ix.space().approximate_size(); }; +} + +/// Optimize a Product that contains only Tensor and scalar factors. +ExprPtr opt_pure_product(Product const& prod, index_to_extent_t const& idx2size, + OptFor opt_for) { + if (opt_for == OptFor::Flops) + return opt::single_term_opt(prod, idx2size); + SEQUANT_ASSERT(opt_for == OptFor::Memsize); + return opt::single_term_opt(prod, idx2size); +} + +/// Deliberately non-identifier label prefix used to stand in for non-Tensor, +/// non-scalar factors during single-term optimization. Chosen so that no +/// user-defined tensor label can collide with it. +inline constexpr std::wstring_view placeholder_label_prefix = L"@__opt_"; + +/// Optimize a Product that contains some non-Tensor, non-scalar factors by +/// substituting placeholder tensors with target indices, optimizing the +/// resulting tensor-only product, then swapping the originals back in. +ExprPtr opt_mixed_product(Product const& prod, + index_to_extent_t const& idx2size, OptFor opt_for) { + container::svector non_tensors(prod.size()); + container::svector new_factors; + new_factors.reserve(prod.size()); + + for (std::size_t i = 0; i < prod.size(); ++i) { + auto&& f = prod.factor(i); + if (f->is() || f->is_scalar()) { + new_factors.emplace_back(f); + } else { + non_tensors[i] = f; + auto target_idxs = get_unique_indices(f); + new_factors.emplace_back(ex( + std::wstring(placeholder_label_prefix) + std::to_wstring(i), + bra(target_idxs.bra), ket(target_idxs.ket), aux(target_idxs.aux))); + } + } + + auto result = opt_pure_product( + Product{prod.scalar(), new_factors, Product::Flatten::No}, idx2size, + opt_for); + + auto replacer = [&non_tensors](ExprPtr& out) { + if (!out->is()) return; + auto label = out->as().label(); + if (!label.starts_with(placeholder_label_prefix)) return; + + // The placeholder prefix is internal; anything carrying it must have been + // emitted by this function, with a pure-decimal suffix indexing + // non_tensors. Any deviation is a programming error. + auto suffix_view = label.substr(placeholder_label_prefix.size()); + SEQUANT_ASSERT(!suffix_view.empty()); + std::size_t suffix = 0; + for (wchar_t c : suffix_view) { + SEQUANT_ASSERT(c >= L'0' && c <= L'9'); + suffix = suffix * 10 + static_cast(c - L'0'); + } + SEQUANT_ASSERT(suffix < non_tensors.size() && non_tensors[suffix]); + out = non_tensors[suffix].clone(); + }; + + result->visit(replacer, /* atoms_only = */ true); + return result; +} + +/// Recursive workhorse. \p parallel_outer controls whether the (single) +/// outermost Sum's summands are processed in parallel; nested recursive +/// calls always run sequentially to avoid `sequant::for_each` oversubscription. +ExprPtr optimize_impl(ExprPtr const& expr, index_to_extent_t const& idx2size, + OptFor opt_for, bool reorder, bool parallel_outer) { if (expr->is()) { - if (ranges::all_of(*expr, [](auto&& x) { - return x->template is() || x->is_scalar(); - })) - return opt::single_term_opt(expr->as(), idx2size); - else { - auto const& prod = expr->as(); - - container::svector non_tensors(prod.size()); - container::svector new_factors; - - for (auto i = 0; i < prod.size(); ++i) { - auto&& f = prod.factor(i); - if (f->is() || f->is_scalar()) - new_factors.emplace_back(f); - else { - non_tensors[i] = f; - auto target_idxs = get_unique_indices(f); - new_factors.emplace_back( - ex(L"I_" + std::to_wstring(i), bra(target_idxs.bra), - ket(target_idxs.ket), aux(target_idxs.aux))); - } - } - - auto result = opt::single_term_opt( - Product(prod.scalar(), new_factors, Product::Flatten::No), idx2size); - - auto replacer = [&non_tensors](ExprPtr& out) { - if (!out->is()) return; - auto const& tnsr = out->as(); - auto&& label = tnsr.label(); - if (label.at(0) == L'I' && label.at(1) == L'_') { - size_t suffix = std::stoi(std::wstring(label.data() + 2)); - out = non_tensors[suffix].clone(); - } - }; - - result->visit(replacer, /* atoms_only = */ true); - return result; + auto const& prod = expr->as(); + bool pure = ranges::all_of(prod, [](auto&& x) { + return x->template is() || x->is_scalar(); + }); + return pure ? opt_pure_product(prod, idx2size, opt_for) + : opt_mixed_product(prod, idx2size, opt_for); + } + + if (expr->is()) { + auto const& in_sum = expr->as(); + Sum::summands_type new_smands(in_sum.size()); + + auto do_term = [&](std::size_t i) { + new_smands[i] = + optimize_impl(in_sum.summand(i), idx2size, opt_for, + /*reorder=*/false, /*parallel_outer=*/false); + }; + + if (parallel_outer && in_sum.size() > 1) { + auto indices = ranges::views::iota(std::size_t{0}, in_sum.size()); + sequant::for_each(indices, do_term); + } else { + for (std::size_t i = 0; i < in_sum.size(); ++i) do_term(i); } - } else if (expr->is()) { - auto smands = *expr | transform([&idx2size](auto&& s) { - return optimize(s, idx2size, /* reorder_sum= */ false); - }) | ranges::to_vector; - auto sum = Sum{smands.begin(), smands.end()}; - return reorder_sum ? ex(opt::reorder(sum)) : ex(std::move(sum)); - } else - return expr->clone(); + + Sum new_sum(std::move(new_smands), Sum::move_only_tag{}); + if (!reorder) return ex(std::move(new_sum)); + + // Binarize once per optimized summand and hand the nodes to reorder() + // so they aren't re-built inside clusters(). + container::vector> nodes; + nodes.reserve(new_sum.size()); + for (auto const& s : new_sum.summands()) nodes.push_back(binarize(s)); + return ex(opt::reorder(new_sum, nodes)); + } + + return expr->clone(); } -} // namespace opt +ExprPtr optimize_dispatch(ExprPtr const& expr, index_to_extent_t idx2size, + OptFor opt_for, ReorderSum reorder) { + if (!idx2size) idx2size = default_idx_to_size(); + return optimize_impl(expr, idx2size, opt_for, reorder == ReorderSum::Reorder, + /*parallel_outer=*/true); +} + +} // namespace + +ExprPtr optimize(ExprPtr const& expr, OptFor opt_for, ReorderSum reorder) { + return optimize_dispatch(expr, default_idx_to_size(), opt_for, reorder); +} -ExprPtr optimize(ExprPtr const& expr, bool reorder_sum) { - return opt::optimize( - expr, [](Index const& ix) { return ix.space().approximate_size(); }, - reorder_sum); +ExprPtr optimize(ExprPtr const& expr, index_to_extent_t idx2size, + OptFor opt_for, ReorderSum reorder) { + return optimize_dispatch(expr, std::move(idx2size), opt_for, reorder); } -ResultExpr& optimize(ResultExpr& expr, bool reorder_sum) { - expr.expression() = optimize(expr.expression(), reorder_sum); +ResultExpr& optimize(ResultExpr& expr, OptFor opt_for, ReorderSum reorder) { + expr.expression() = optimize(expr.expression(), opt_for, reorder); + return expr; +} +ResultExpr& optimize(ResultExpr& expr, index_to_extent_t idx2size, + OptFor opt_for, ReorderSum reorder) { + expr.expression() = + optimize(expr.expression(), std::move(idx2size), opt_for, reorder); return expr; } -ResultExpr& optimize(ResultExpr&& expr, bool reorder_sum) { - return optimize(expr, reorder_sum); +ResultExpr& optimize(ResultExpr&& expr, OptFor opt_for, ReorderSum reorder) { + return optimize(expr, opt_for, reorder); +} + +ResultExpr& optimize(ResultExpr&& expr, index_to_extent_t idx2size, + OptFor opt_for, ReorderSum reorder) { + return optimize(expr, std::move(idx2size), opt_for, reorder); } } // namespace sequant diff --git a/SeQuant/core/optimize/optimize.hpp b/SeQuant/core/optimize/optimize.hpp index 77d7571409..c565b3ece5 100644 --- a/SeQuant/core/optimize/optimize.hpp +++ b/SeQuant/core/optimize/optimize.hpp @@ -2,39 +2,43 @@ #define SEQUANT_OPTIMIZE_OPTIMIZE_HPP #include +#include +#include namespace sequant { /// -/// Optimize the expression using IndexSpace::approximate_size() for reference -/// index extent. +/// Optimize the expression using IndexSpace::approximate_size() as the index +/// extent provider. /// /// \param expr Expression to be optimized. -/// \param reorder_sum If true, the summands are reordered so that terms with -/// common sub-expressions appear closer to each other. -/// True by default. +/// \param opt_for Cost metric to optimize for (Flops by default). +/// \param reorder Whether to reorder summands; on by default. /// \return Optimized expression for lower evaluation cost. -ExprPtr optimize(ExprPtr const& expr, bool reorder_sum = true); +ExprPtr optimize(ExprPtr const& expr, OptFor opt_for = OptFor::Flops, + ReorderSum reorder = ReorderSum::Reorder); -/// Optimize the expression using IndexSpace::approximate_size() for reference -/// index extent. -/// -/// \param expr Expression to be optimized. -/// \param reorder_sum If true, the summands are reordered so that terms with -/// common sub-expressions appear closer to each other. -/// True by default. -/// \return Optimized expression for lower evaluation cost. -ResultExpr& optimize(ResultExpr& expr, bool reorder_sum = true); +/// Optimize using a caller-supplied index extent provider. Pass an empty +/// \c idx2size to fall back to IndexSpace::approximate_size(). +ExprPtr optimize(ExprPtr const& expr, index_to_extent_t idx2size, + OptFor opt_for = OptFor::Flops, + ReorderSum reorder = ReorderSum::Reorder); -/// Optimize the expression using IndexSpace::approximate_size() for reference -/// index extent. -/// -/// \param expr Expression to be optimized. -/// \param reorder_sum If true, the summands are reordered so that terms with -/// common sub-expressions appear closer to each other. -/// True by default. -/// \return Optimized expression for lower evaluation cost. -[[nodiscard]] ResultExpr& optimize(ResultExpr&& expr, bool reorder_sum = true); +ResultExpr& optimize(ResultExpr& expr, OptFor opt_for = OptFor::Flops, + ReorderSum reorder = ReorderSum::Reorder); + +ResultExpr& optimize(ResultExpr& expr, index_to_extent_t idx2size, + OptFor opt_for = OptFor::Flops, + ReorderSum reorder = ReorderSum::Reorder); + +[[nodiscard]] ResultExpr& optimize(ResultExpr&& expr, + OptFor opt_for = OptFor::Flops, + ReorderSum reorder = ReorderSum::Reorder); + +[[nodiscard]] ResultExpr& optimize(ResultExpr&& expr, + index_to_extent_t idx2size, + OptFor opt_for = OptFor::Flops, + ReorderSum reorder = ReorderSum::Reorder); } // namespace sequant diff --git a/SeQuant/core/optimize/single_term.hpp b/SeQuant/core/optimize/single_term.hpp index a1d480ea30..f5b057d5bf 100644 --- a/SeQuant/core/optimize/single_term.hpp +++ b/SeQuant/core/optimize/single_term.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -19,12 +20,26 @@ #include #include +#include +#include #include #include -namespace sequant::opt { +namespace sequant { + +/// A type-erased provider mapping an Index to its extent. Used by the public +/// optimize() API. Callers reaching for the templated opt::single_term_opt +/// overloads (constrained by \ref opt::has_index_extent) should pass the +/// callable directly instead of wrapping it here — that keeps the cost +/// function's call site inlineable, whereas a value of this alias goes +/// through std::function's type-erased dispatch on every Index lookup. +using index_to_extent_t = std::function; + +namespace opt { + template concept has_index_extent = std::is_invocable_r_v; + namespace detail { /// \brief Cost function returning the flop count of a binary tensor @@ -38,14 +53,14 @@ namespace detail { /// \return A callable (lhs, rhs, result) -> double yielding the /// flop count of the contraction. auto flops_counter(has_index_extent auto&& ixex) { - return [ixex](meta::range_of auto const& lhs, - meta::range_of auto const& rhs, - meta::range_of auto const& result) -> double { + return [ixex = std::forward(ixex)]( + meta::range_of auto const& lhs, + meta::range_of auto const& rhs, + meta::range_of auto const& result) -> double { using ranges::views::concat; auto tot_idxs = tot_indices(concat(lhs, rhs, result)); double total_flops = ranges::accumulate( - concat(tot_idxs.outer, tot_idxs.inner), 1., std::multiplies{}, - std::forward(ixex)); + concat(tot_idxs.outer, tot_idxs.inner), 1., std::multiplies{}, ixex); // total_flops == 1. implies zero flops return total_flops == 1. ? 0. : total_flops; }; @@ -62,16 +77,16 @@ auto flops_counter(has_index_extent auto&& ixex) { /// \return A callable (lhs, rhs, result) -> double yielding the /// summed memory size of the three operands. auto memsize_counter(has_index_extent auto&& ixex) { - return [ixex](meta::range_of auto const& lhs, - meta::range_of auto const& rhs, - meta::range_of auto const& result) -> double { + return [ixex = std::forward(ixex)]( + meta::range_of auto const& lhs, + meta::range_of auto const& rhs, + meta::range_of auto const& result) -> double { using ranges::views::concat; double total_mem{0}; for (auto&& tot_idxs : {tot_indices(lhs), tot_indices(rhs), tot_indices(result)}) { double mem = ranges::accumulate(concat(tot_idxs.outer, tot_idxs.inner), - 1., std::multiplies{}, - std::forward(ixex)); + 1., std::multiplies{}, ixex); if (mem != 1.) total_mem += mem; // else 1. is assumed to be the accumulation init value; // skip adding it to the total. @@ -327,34 +342,43 @@ EvalSequence single_term_opt_impl(TensorNetwork const& network, } /// +/// \tparam OptFor Cost metric to optimize for (Flops or Memsize). /// \tparam IdxToSz /// \param network A TensorNetwork object. /// \param idxsz An invocable on Index, that maps Index to its dimension. /// \param subnet_cse Whether to recognize equivalent subnetworks to try /// minimizing the ops counts. -/// \return Optimal evaluation sequence that -/// minimizes flops. If there are -/// equivalent optimal sequences then the result is the one that keeps -/// the order of tensors in the network as original as possible. +/// \return Optimal evaluation sequence under the chosen cost metric. If there +/// are equivalent optimal sequences then the result is the one that +/// keeps the order of tensors in the network as original as possible. /// -template +template EvalSequence single_term_opt(TensorNetwork const& network, IdxToSz&& idxsz, bool subnet_cse) { - auto cost_fn = flops_counter(std::forward(idxsz)); decltype(OptRes::indices) tidxs{}; - return single_term_opt_impl(network, tidxs, cost_fn, subnet_cse); + if constexpr (Metric == OptFor::Flops) { + auto cost_fn = flops_counter(std::forward(idxsz)); + return single_term_opt_impl(network, tidxs, cost_fn, subnet_cse); + } else { + static_assert(Metric == OptFor::Memsize, + "Only Flops and Memsize OptFor supported."); + auto cost_fn = memsize_counter(std::forward(idxsz)); + return single_term_opt_impl(network, tidxs, cost_fn, subnet_cse); + } } } // namespace detail /// +/// \tparam Metric Cost metric to optimize for (Flops by default; Memsize +/// minimizes total operand memory rather than flops). /// \param prod Product to be optimized. /// \param idxsz An invocable object that maps an Index object to size. /// \return Parenthesized product expression. /// /// @note @c prod is assumed to consist of only Tensor expressions /// -template +template ExprPtr single_term_opt(Product const& prod, IdxToSz&& idxsz, bool subnet_cse = false) { using ranges::views::filter; @@ -365,8 +389,8 @@ ExprPtr single_term_opt(Product const& prod, IdxToSz&& idxsz, prod.factors().end(), Product::Flatten::No}); auto const tensors = prod | filter(&ExprPtr::template is) | ranges::to_vector; - auto seq = detail::single_term_opt(TensorNetwork{tensors}, - std::forward(idxsz), subnet_cse); + auto seq = detail::single_term_opt( + TensorNetwork{tensors}, std::forward(idxsz), subnet_cse); auto result = container::svector{}; for (auto i : seq) if (i == -1) { @@ -390,6 +414,7 @@ ExprPtr single_term_opt(Product const& prod, IdxToSz&& idxsz, return *result.rbegin(); } -} // namespace sequant::opt +} // namespace opt +} // namespace sequant #endif // SEQUANT_CORE_OPTIMIZE_SINGLE_TERM_HPP diff --git a/SeQuant/core/optimize/sum.cpp b/SeQuant/core/optimize/sum.cpp index 048720b3da..d0edd09745 100644 --- a/SeQuant/core/optimize/sum.cpp +++ b/SeQuant/core/optimize/sum.cpp @@ -1,7 +1,10 @@ +#include #include #include #include +#include #include +#include #include #include @@ -24,7 +27,9 @@ bool has_only_single_atom(const ExprPtr& term) { return term->size() == 1 && has_only_single_atom(*term->begin()); } -container::vector> clusters(Sum const& expr) { +container::vector> clusters( + Sum const& expr, container::vector> const& nodes) { + SEQUANT_ASSERT(nodes.size() == expr.size()); using ranges::views::tail; using ranges::views::transform; using hash_t = size_t; @@ -44,7 +49,7 @@ container::vector> clusters(Sum const& expr) { }; for (auto const& term : expr) { - auto const node = binarize(term); + auto const& node = nodes[pos]; if (has_only_single_atom(term)) { visitor(node); } else { @@ -93,14 +98,29 @@ container::vector> clusters(Sum const& expr) { return result; } -Sum reorder(Sum const& sum) { +container::vector> clusters(Sum const& expr) { + container::vector> nodes; + nodes.reserve(expr.size()); + for (auto const& term : expr) nodes.push_back(binarize(term)); + return clusters(expr, nodes); +} + +Sum reorder(Sum const& sum, + container::vector> const& nodes) { Sum result; - for (auto const& clstr : clusters(sum)) + for (auto const& clstr : clusters(sum, nodes)) for (auto p : clstr) result.append(sum.at(p)); SEQUANT_ASSERT(result.size() == sum.size()); return result; } +Sum reorder(Sum const& sum) { + container::vector> nodes; + nodes.reserve(sum.size()); + for (auto const& term : sum) nodes.push_back(binarize(term)); + return reorder(sum, nodes); +} + } // namespace sequant::opt diff --git a/SeQuant/core/optimize/sum.hpp b/SeQuant/core/optimize/sum.hpp index 70b548f0b0..b45ecf67ad 100644 --- a/SeQuant/core/optimize/sum.hpp +++ b/SeQuant/core/optimize/sum.hpp @@ -1,7 +1,9 @@ #ifndef SEQUANT_CORE_OPTIMIZE_SUM_HPP #define SEQUANT_CORE_OPTIMIZE_SUM_HPP +#include #include +#include #include namespace sequant::opt { @@ -16,6 +18,12 @@ namespace sequant::opt { /// container::vector> clusters(Sum const& expr); +/// Same as \ref clusters(Sum const&) but uses precomputed eval nodes for the +/// summands, avoiding an internal \c binarize pass. \p nodes[i] must be the +/// binarized form of \c expr.at(i). +container::vector> clusters( + Sum const& expr, container::vector> const& nodes); + /// /// \brief Reorder summands so that terms having common intermediates appear /// closer. @@ -25,6 +33,11 @@ container::vector> clusters(Sum const& expr); /// Sum reorder(Sum const& sum); +/// Same as \ref reorder(Sum const&) but uses precomputed eval nodes for the +/// summands. \p nodes[i] must be the binarized form of \c sum.at(i). +Sum reorder(Sum const& sum, + container::vector> const& nodes); + } // namespace sequant::opt #endif // SEQUANT_CORE_OPTIMIZE_SUM_HPP From 0efc42b5bc25b90f7ebc2dcf268246942aaf284a Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Wed, 20 May 2026 19:25:05 -0400 Subject: [PATCH 4/7] refactor(optimize): consolidate public knobs into OptimizeOptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename optimize/flags.hpp → optimize/options.hpp; move OptFor, ReorderSum, and index_to_extent_t into it alongside a new OptimizeOptions struct (opt_for / reorder / idx_to_extent with sensible defaults). - Collapse the six optimize() overloads into three (ExprPtr&, ResultExpr&, ResultExpr&&), each taking OptimizeOptions{} by default. Existing default-argument call sites are source-compatible. - Stop including from optimize.hpp. The public optimize() API now needs only the lightweight options header, so single_term.hpp (and its range-v3 transitives) no longer leak into consumers. This fixes the CI unity-build failure in utilities/external-interface, where range/v3/view/indirect.hpp's use of meta:: became ambiguous against sequant::meta once a sibling .cpp in the same TU had issued `using namespace sequant;`. --- CMakeLists.txt | 2 +- SeQuant/core/optimize/flags.hpp | 15 ---------- SeQuant/core/optimize/optimize.cpp | 40 +++++++------------------ SeQuant/core/optimize/optimize.hpp | 39 +++++++------------------ SeQuant/core/optimize/options.hpp | 42 +++++++++++++++++++++++++++ SeQuant/core/optimize/single_term.hpp | 19 ++---------- 6 files changed, 66 insertions(+), 91 deletions(-) delete mode 100644 SeQuant/core/optimize/flags.hpp create mode 100644 SeQuant/core/optimize/options.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index af0fd0d5a4..384717c9d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -446,9 +446,9 @@ set(SeQuant_eval_src # Optimize sources (depends on eval) set(SeQuant_optimize_src SeQuant/core/optimize/common_subexpression_elimination.hpp - SeQuant/core/optimize/flags.hpp SeQuant/core/optimize/fusion.cpp SeQuant/core/optimize/fusion.hpp + SeQuant/core/optimize/options.hpp SeQuant/core/optimize/optimize.cpp SeQuant/core/optimize/optimize.hpp SeQuant/core/optimize/single_term.hpp diff --git a/SeQuant/core/optimize/flags.hpp b/SeQuant/core/optimize/flags.hpp deleted file mode 100644 index 9484c50894..0000000000 --- a/SeQuant/core/optimize/flags.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef SEQUANT_CORE_OPTIMIZE_FLAGS_HPP -#define SEQUANT_CORE_OPTIMIZE_FLAGS_HPP - -namespace sequant { - -/// Cost metric to optimize for in single-term and top-level optimize routines. -enum class OptFor { Flops, Memsize }; - -/// Whether to reorder summands so terms with shared intermediates appear -/// closer to each other. -enum class ReorderSum { Reorder, NoReorder }; - -} // namespace sequant - -#endif // SEQUANT_CORE_OPTIMIZE_FLAGS_HPP diff --git a/SeQuant/core/optimize/optimize.cpp b/SeQuant/core/optimize/optimize.cpp index 090ccd9121..2ee9175d32 100644 --- a/SeQuant/core/optimize/optimize.cpp +++ b/SeQuant/core/optimize/optimize.cpp @@ -139,43 +139,23 @@ ExprPtr optimize_impl(ExprPtr const& expr, index_to_extent_t const& idx2size, return expr->clone(); } -ExprPtr optimize_dispatch(ExprPtr const& expr, index_to_extent_t idx2size, - OptFor opt_for, ReorderSum reorder) { - if (!idx2size) idx2size = default_idx_to_size(); - return optimize_impl(expr, idx2size, opt_for, reorder == ReorderSum::Reorder, - /*parallel_outer=*/true); -} - } // namespace -ExprPtr optimize(ExprPtr const& expr, OptFor opt_for, ReorderSum reorder) { - return optimize_dispatch(expr, default_idx_to_size(), opt_for, reorder); -} - -ExprPtr optimize(ExprPtr const& expr, index_to_extent_t idx2size, - OptFor opt_for, ReorderSum reorder) { - return optimize_dispatch(expr, std::move(idx2size), opt_for, reorder); -} - -ResultExpr& optimize(ResultExpr& expr, OptFor opt_for, ReorderSum reorder) { - expr.expression() = optimize(expr.expression(), opt_for, reorder); - return expr; +ExprPtr optimize(ExprPtr const& expr, OptimizeOptions opts) { + auto idx2size = opts.idx_to_extent ? std::move(opts.idx_to_extent) + : default_idx_to_size(); + return optimize_impl(expr, idx2size, opts.opt_for, + opts.reorder == ReorderSum::Reorder, + /*parallel_outer=*/true); } -ResultExpr& optimize(ResultExpr& expr, index_to_extent_t idx2size, - OptFor opt_for, ReorderSum reorder) { - expr.expression() = - optimize(expr.expression(), std::move(idx2size), opt_for, reorder); +ResultExpr& optimize(ResultExpr& expr, OptimizeOptions opts) { + expr.expression() = optimize(expr.expression(), std::move(opts)); return expr; } -ResultExpr& optimize(ResultExpr&& expr, OptFor opt_for, ReorderSum reorder) { - return optimize(expr, opt_for, reorder); -} - -ResultExpr& optimize(ResultExpr&& expr, index_to_extent_t idx2size, - OptFor opt_for, ReorderSum reorder) { - return optimize(expr, std::move(idx2size), opt_for, reorder); +ResultExpr& optimize(ResultExpr&& expr, OptimizeOptions opts) { + return optimize(expr, std::move(opts)); } } // namespace sequant diff --git a/SeQuant/core/optimize/optimize.hpp b/SeQuant/core/optimize/optimize.hpp index c565b3ece5..3f0bb78255 100644 --- a/SeQuant/core/optimize/optimize.hpp +++ b/SeQuant/core/optimize/optimize.hpp @@ -2,43 +2,24 @@ #define SEQUANT_OPTIMIZE_OPTIMIZE_HPP #include -#include -#include +#include namespace sequant { -/// -/// Optimize the expression using IndexSpace::approximate_size() as the index -/// extent provider. +/// Optimize the expression for lower evaluation cost. /// /// \param expr Expression to be optimized. -/// \param opt_for Cost metric to optimize for (Flops by default). -/// \param reorder Whether to reorder summands; on by default. -/// \return Optimized expression for lower evaluation cost. -ExprPtr optimize(ExprPtr const& expr, OptFor opt_for = OptFor::Flops, - ReorderSum reorder = ReorderSum::Reorder); - -/// Optimize using a caller-supplied index extent provider. Pass an empty -/// \c idx2size to fall back to IndexSpace::approximate_size(). -ExprPtr optimize(ExprPtr const& expr, index_to_extent_t idx2size, - OptFor opt_for = OptFor::Flops, - ReorderSum reorder = ReorderSum::Reorder); - -ResultExpr& optimize(ResultExpr& expr, OptFor opt_for = OptFor::Flops, - ReorderSum reorder = ReorderSum::Reorder); +/// \param opts Options controlling the cost metric, summand reordering, and +/// the index → extent provider. An empty +/// \c OptimizeOptions::idx_to_extent falls back to +/// \c IndexSpace::approximate_size(). +/// \return Optimized expression. +ExprPtr optimize(ExprPtr const& expr, OptimizeOptions opts = {}); -ResultExpr& optimize(ResultExpr& expr, index_to_extent_t idx2size, - OptFor opt_for = OptFor::Flops, - ReorderSum reorder = ReorderSum::Reorder); - -[[nodiscard]] ResultExpr& optimize(ResultExpr&& expr, - OptFor opt_for = OptFor::Flops, - ReorderSum reorder = ReorderSum::Reorder); +ResultExpr& optimize(ResultExpr& expr, OptimizeOptions opts = {}); [[nodiscard]] ResultExpr& optimize(ResultExpr&& expr, - index_to_extent_t idx2size, - OptFor opt_for = OptFor::Flops, - ReorderSum reorder = ReorderSum::Reorder); + OptimizeOptions opts = {}); } // namespace sequant diff --git a/SeQuant/core/optimize/options.hpp b/SeQuant/core/optimize/options.hpp new file mode 100644 index 0000000000..8c3669eccc --- /dev/null +++ b/SeQuant/core/optimize/options.hpp @@ -0,0 +1,42 @@ +#ifndef SEQUANT_CORE_OPTIMIZE_OPTIONS_HPP +#define SEQUANT_CORE_OPTIMIZE_OPTIONS_HPP + +#include +#include + +namespace sequant { + +class Index; + +/// Cost metric to optimize for in single-term and top-level optimize routines. +enum class OptFor { Flops, Memsize }; + +/// Whether to reorder summands so terms with shared intermediates appear +/// closer to each other. +enum class ReorderSum { Reorder, NoReorder }; + +/// A type-erased provider mapping an Index to its extent. Used by the public +/// optimize() API. Callers reaching for the templated opt::single_term_opt +/// overloads (constrained by \ref opt::has_index_extent) should pass the +/// callable directly instead of wrapping it here — that keeps the cost +/// function's call site inlineable, whereas a value of this alias goes +/// through std::function's type-erased dispatch on every Index lookup. +using index_to_extent_t = std::function; + +/// Options that control behavior of \ref sequant::optimize. +struct OptimizeOptions { + /// Cost metric to minimize. + OptFor opt_for = OptFor::Flops; + + /// Whether to reorder summands so terms with shared intermediates appear + /// closer to each other. + ReorderSum reorder = ReorderSum::Reorder; + + /// Caller-supplied Index to extent provider. If empty, defaults to + /// \c IndexSpace::approximate_size(). + index_to_extent_t idx_to_extent = {}; +}; + +} // namespace sequant + +#endif // SEQUANT_CORE_OPTIMIZE_OPTIONS_HPP diff --git a/SeQuant/core/optimize/single_term.hpp b/SeQuant/core/optimize/single_term.hpp index f5b057d5bf..68e4aed2ed 100644 --- a/SeQuant/core/optimize/single_term.hpp +++ b/SeQuant/core/optimize/single_term.hpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -20,22 +20,10 @@ #include #include -#include -#include #include #include -namespace sequant { - -/// A type-erased provider mapping an Index to its extent. Used by the public -/// optimize() API. Callers reaching for the templated opt::single_term_opt -/// overloads (constrained by \ref opt::has_index_extent) should pass the -/// callable directly instead of wrapping it here — that keeps the cost -/// function's call site inlineable, whereas a value of this alias goes -/// through std::function's type-erased dispatch on every Index lookup. -using index_to_extent_t = std::function; - -namespace opt { +namespace sequant::opt { template concept has_index_extent = std::is_invocable_r_v; @@ -414,7 +402,6 @@ ExprPtr single_term_opt(Product const& prod, IdxToSz&& idxsz, return *result.rbegin(); } -} // namespace opt -} // namespace sequant +} // namespace sequant::opt #endif // SEQUANT_CORE_OPTIMIZE_SINGLE_TERM_HPP From 59f4b57bf7f50bd5cb80f4e75ea381dfd9c0eb34 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Fri, 22 May 2026 09:11:35 -0400 Subject: [PATCH 5/7] refactor(optimize): re-add bool backwards-compat overloads and document optimize() API --- SeQuant/core/optimize/optimize.cpp | 21 +++++++++++++++++++++ SeQuant/core/optimize/optimize.hpp | 29 +++++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/SeQuant/core/optimize/optimize.cpp b/SeQuant/core/optimize/optimize.cpp index 2ee9175d32..b6df46e5af 100644 --- a/SeQuant/core/optimize/optimize.cpp +++ b/SeQuant/core/optimize/optimize.cpp @@ -158,4 +158,25 @@ ResultExpr& optimize(ResultExpr&& expr, OptimizeOptions opts) { return optimize(expr, std::move(opts)); } +// backwards compatibility overloads + +namespace { +inline OptimizeOptions compatibility_opts(bool reorder_sum) { + return OptimizeOptions{.reorder = reorder_sum ? ReorderSum::Reorder + : ReorderSum::NoReorder}; +} +} // namespace + +ExprPtr optimize(ExprPtr const& expr, bool reorder_sum) { + return optimize(expr, compatibility_opts(reorder_sum)); +} + +ResultExpr& optimize(ResultExpr& expr, bool reorder_sum) { + return optimize(expr, compatibility_opts(reorder_sum)); +} + +ResultExpr& optimize(ResultExpr&& expr, bool reorder_sum) { + return optimize(std::move(expr), compatibility_opts(reorder_sum)); +} + } // namespace sequant diff --git a/SeQuant/core/optimize/optimize.hpp b/SeQuant/core/optimize/optimize.hpp index 3f0bb78255..2efeb0593d 100644 --- a/SeQuant/core/optimize/optimize.hpp +++ b/SeQuant/core/optimize/optimize.hpp @@ -9,18 +9,39 @@ namespace sequant { /// Optimize the expression for lower evaluation cost. /// /// \param expr Expression to be optimized. -/// \param opts Options controlling the cost metric, summand reordering, and -/// the index → extent provider. An empty -/// \c OptimizeOptions::idx_to_extent falls back to -/// \c IndexSpace::approximate_size(). +/// \param opts Optimization parameters; see \c OptimizeOptions. By default: +/// the cost metric is flop count, index extents are taken from +/// \c IndexSpace::approximate_size(), and the summands of a sum +/// are reordered to cluster terms that share intermediates. /// \return Optimized expression. ExprPtr optimize(ExprPtr const& expr, OptimizeOptions opts = {}); +/// \copydoc optimize(ExprPtr const&, OptimizeOptions) ResultExpr& optimize(ResultExpr& expr, OptimizeOptions opts = {}); +/// \copydoc optimize(ExprPtr const&, OptimizeOptions) [[nodiscard]] ResultExpr& optimize(ResultExpr&& expr, OptimizeOptions opts = {}); +// Overloads for backwards compatibility + +/// \deprecated Use the \c OptimizeOptions overload instead. +/// +/// Equivalent to calling the primary overload with default \c OptimizeOptions +/// and \c OptimizeOptions::reorder_sum set to \p reorder_sum. +/// +/// \param expr Expression to be optimized. +/// \param reorder_sum If true, reorder the summands of a sum to cluster terms +/// that share intermediates. +/// \return Optimized expression. +ExprPtr optimize(ExprPtr const& expr, bool reorder_sum); + +/// \copydoc optimize(ExprPtr const&, bool) +ResultExpr& optimize(ResultExpr& expr, bool reorder_sum); + +/// \copydoc optimize(ExprPtr const&, bool) +[[nodiscard]] ResultExpr& optimize(ResultExpr&& expr, bool reorder_sum); + } // namespace sequant #endif // SEQUANT_OPTIMIZE_OPTIMIZE_HPP From f667916ddadbea4dba648d2495197a1913b32082 Mon Sep 17 00:00:00 2001 From: Bimal Gaudel Date: Fri, 22 May 2026 19:45:22 -0400 Subject: [PATCH 6/7] Add the deprecation attributes the relevant functions. --- SeQuant/core/optimize/optimize.hpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/SeQuant/core/optimize/optimize.hpp b/SeQuant/core/optimize/optimize.hpp index 2efeb0593d..caa6952d44 100644 --- a/SeQuant/core/optimize/optimize.hpp +++ b/SeQuant/core/optimize/optimize.hpp @@ -34,13 +34,21 @@ ResultExpr& optimize(ResultExpr& expr, OptimizeOptions opts = {}); /// \param reorder_sum If true, reorder the summands of a sum to cluster terms /// that share intermediates. /// \return Optimized expression. -ExprPtr optimize(ExprPtr const& expr, bool reorder_sum); +[[deprecated( + "use the OptimizeOptions" + " overload of optimize() instead")]] ExprPtr +optimize(ExprPtr const& expr, bool reorder_sum); /// \copydoc optimize(ExprPtr const&, bool) -ResultExpr& optimize(ResultExpr& expr, bool reorder_sum); +[[deprecated( + "use the OptimizeOptions" + " overload of optimize() instead")]] ResultExpr& +optimize(ResultExpr& expr, bool reorder_sum); /// \copydoc optimize(ExprPtr const&, bool) -[[nodiscard]] ResultExpr& optimize(ResultExpr&& expr, bool reorder_sum); +[[nodiscard, deprecated("use the OptimizeOptions" + " overload of optimize() instead")]] ResultExpr& +optimize(ResultExpr&& expr, bool reorder_sum); } // namespace sequant From a1d0264129bcbc590d42431dac996a9dc40631cf Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Fri, 22 May 2026 17:35:47 -0700 Subject: [PATCH 7/7] test(optimize): cover OptimizeOptions, Memsize metric, and parallel summands Add minimal tests for the refactored optimize() API: - both OptFor::Flops and OptFor::Memsize binarize a product correctly - ReorderSum knob, and bare optimize() == explicit Reorder - parallel summand optimization (set_num_threads 1 vs 4) yields identical results, guarding the optimize_impl(..., parallel_outer=true) path Also document the thread-safety invariants of the parallel branch (work on per-task clones; binarize()'s lazy Index::label() cache write is sequenced after for_each joins) and explain the deliberate IndexSet-vs-vector container choice and the exact-equality scalar sentinel in the flop/memsize counters. Co-Authored-By: Claude Opus 4.7 (1M context) --- SeQuant/core/optimize/optimize.cpp | 19 +++++++- SeQuant/core/optimize/single_term.hpp | 16 +++++-- tests/unit/test_optimize.cpp | 67 +++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 4 deletions(-) diff --git a/SeQuant/core/optimize/optimize.cpp b/SeQuant/core/optimize/optimize.cpp index b6df46e5af..aaeab5e4ba 100644 --- a/SeQuant/core/optimize/optimize.cpp +++ b/SeQuant/core/optimize/optimize.cpp @@ -118,6 +118,22 @@ ExprPtr optimize_impl(ExprPtr const& expr, index_to_extent_t const& idx2size, /*reorder=*/false, /*parallel_outer=*/false); }; + // Thread-safety of the parallel branch rests on two invariants; do NOT + // break them without re-auditing: + // 1. Each task writes a distinct, pre-allocated new_smands[i] slot, and + // the work below (single_term_opt -> + // TensorNetwork::canonicalize_slots) operates on per-task *clones* of + // the input tensors. The lazily populated `mutable` caches on + // Expr/Index (hash_value_, label_, ...) are unsynchronized, so + // concurrent work must never read/write them on a shared (non-cloned) + // node. Index comparison touches only immutable members, so building + // index sets over shared indices is safe. + // 2. The binarize() pass below DOES read Index::label() (a lazy cache + // write) on the optimized summands, so it is run *sequentially, after* + // for_each() has joined -- never inside do_term(). + // The default Context and cardinal_tensor_labels must also be configured + // before entering here (their writes are unsynchronized unless + // SEQUANT_CONTEXT_MANIPULATION_THREADSAFE); optimize() only reads them. if (parallel_outer && in_sum.size() > 1) { auto indices = ranges::views::iota(std::size_t{0}, in_sum.size()); sequant::for_each(indices, do_term); @@ -129,7 +145,8 @@ ExprPtr optimize_impl(ExprPtr const& expr, index_to_extent_t const& idx2size, if (!reorder) return ex(std::move(new_sum)); // Binarize once per optimized summand and hand the nodes to reorder() - // so they aren't re-built inside clusters(). + // so they aren't re-built inside clusters(). NOTE: this runs sequentially + // by design -- see invariant (2) above. container::vector> nodes; nodes.reserve(new_sum.size()); for (auto const& s : new_sum.summands()) nodes.push_back(binarize(s)); diff --git a/SeQuant/core/optimize/single_term.hpp b/SeQuant/core/optimize/single_term.hpp index 68e4aed2ed..636c8378cd 100644 --- a/SeQuant/core/optimize/single_term.hpp +++ b/SeQuant/core/optimize/single_term.hpp @@ -46,10 +46,16 @@ auto flops_counter(has_index_extent auto&& ixex) { meta::range_of auto const& rhs, meta::range_of auto const& result) -> double { using ranges::views::concat; + // is required here: concatenating the three operands repeats + // every contracted/shared index, so it must be deduplicated before taking + // the extent product (cf. memsize_counter, which processes each operand + // separately and so can use the default vector container). auto tot_idxs = tot_indices(concat(lhs, rhs, result)); double total_flops = ranges::accumulate( concat(tot_idxs.outer, tot_idxs.inner), 1., std::multiplies{}, ixex); - // total_flops == 1. implies zero flops + // A product of exactly 1. means the index set was empty (the accumulation + // init value), i.e. a scalar contraction => zero flops. Extents are + // integer-valued, so this equality is exact. return total_flops == 1. ? 0. : total_flops; }; } @@ -71,13 +77,17 @@ auto memsize_counter(has_index_extent auto&& ixex) { meta::range_of auto const& result) -> double { using ranges::views::concat; double total_mem{0}; + // Each operand is sized independently, so the default (vector) container of + // tot_indices suffices -- a single operand's index list has no duplicates, + // unlike the concatenated set flops_counter must dedup. for (auto&& tot_idxs : {tot_indices(lhs), tot_indices(rhs), tot_indices(result)}) { double mem = ranges::accumulate(concat(tot_idxs.outer, tot_idxs.inner), 1., std::multiplies{}, ixex); + // mem == 1. means this operand had no indices (the accumulation init + // value), i.e. a scalar; it contributes no memory. Same exact-equality + // convention as flops_counter above. if (mem != 1.) total_mem += mem; - // else 1. is assumed to be the accumulation init value; - // skip adding it to the total. } return total_mem; }; diff --git a/tests/unit/test_optimize.cpp b/tests/unit/test_optimize.cpp index 9b4e1f232f..530f961deb 100644 --- a/tests/unit/test_optimize.cpp +++ b/tests/unit/test_optimize.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -27,6 +28,15 @@ sequant::ExprPtr extract(sequant::ExprPtr expr, return result; } +// number of Tensor leaves in a (binarized) expression tree +size_t count_tensor_leaves(sequant::ExprPtr const& expr) { + using namespace sequant; + size_t n = 0; + expr->visit([&n](auto const& x) { n += x->template is() ? 1 : 0; }, + /*atoms_only=*/true); + return n; +} + TEST_CASE("optimize", "[optimize]") { using namespace sequant; @@ -227,6 +237,63 @@ TEST_CASE("optimize", "[optimize]") { aux->approximate_size(aux_sz); } + + SECTION("OptimizeOptions: cost metric and reorder knobs") { + auto const prod = parse_expr_antisymm( + L"g_{i3,i4}^{a3,a4} t_{a1,a2}^{i3,i4} t_{a3,a4}^{i1,i2}"); + + // both metrics must binarize the 3-tensor product into a binary tree: + // a 2-factor top product whose leaves are the 3 original tensors + for (auto opt_for : {OptFor::Flops, OptFor::Memsize}) { + CAPTURE(static_cast(opt_for)); + auto res = optimize(prod, OptimizeOptions{.opt_for = opt_for}); + REQUIRE(res->is()); + REQUIRE(res->as().factors().size() == 2); + REQUIRE(count_tensor_leaves(res) == 3); + } + + // reorder knob: a two-summand sum is optimized either way, and the + // optimize() default (reorder) matches an explicit Reorder request + auto const sum = parse_expr_antisymm( + L"g_{i3,i4}^{a3,a4} t_{a1,a2}^{i3,i4} t_{a3,a4}^{i1,i2}" + L" + g_{i3,i4}^{a3,a4} t_{a3,a4}^{i1,i2} t_{a1}^{i3} t_{a2}^{i4}"); + REQUIRE(sum->is()); + + auto no_reorder = + optimize(sum, OptimizeOptions{.reorder = ReorderSum::NoReorder}); + auto reorder = + optimize(sum, OptimizeOptions{.reorder = ReorderSum::Reorder}); + REQUIRE(no_reorder->is()); + REQUIRE(reorder->is()); + REQUIRE(no_reorder->as().size() == sum->as().size()); + REQUIRE(reorder->as().size() == sum->as().size()); + // default options == explicit Reorder + REQUIRE(*optimize(sum) == *reorder); + } + + SECTION("Parallel optimization of summands matches sequential") { + // exercise optimize_impl(..., parallel_outer=true): a multi-summand sum + // optimized concurrently must yield the same result as single-threaded. + auto const sum = parse_expr_antisymm( + L"g_{i3,i4}^{a3,a4} t_{a1,a2}^{i3,i4} t_{a3,a4}^{i1,i2}" + L" + g_{i3,i4}^{a3,a4} t_{a3,a4}^{i1,i2} t_{a1}^{i3} t_{a2}^{i4}" + L" + g_{i3,i4}^{a3,a4} t_{a1}^{i3} t_{a2}^{i4} t_{a3,a4}^{i1,i2}"); + REQUIRE(sum->is()); + REQUIRE(sum->as().size() > 1); + + auto const nthreads_save = num_threads(); + struct ThreadGuard { + int n; + ~ThreadGuard() { set_num_threads(n); } + } guard{nthreads_save}; + + set_num_threads(1); + auto const seq = optimize(sum); + set_num_threads(4); + auto const par = optimize(sum); + + REQUIRE(*seq == *par); + } } SECTION("CSE") {