Skip to content
Merged
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,7 @@ set(SeQuant_optimize_src
SeQuant/core/optimize/common_subexpression_elimination.hpp
SeQuant/core/optimize/fusion.cpp
SeQuant/core/optimize/fusion.hpp
SeQuant/core/optimize/options.hpp
SeQuant/core/optimize/optimize.cpp
SeQuant/core/optimize/optimize.hpp
SeQuant/core/optimize/single_term.hpp
Expand Down
233 changes: 162 additions & 71 deletions SeQuant/core/optimize/optimize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,105 +4,196 @@
#include <SeQuant/core/eval/eval_expr.hpp>
#include <SeQuant/core/expr.hpp>
#include <SeQuant/core/hash.hpp>
#include <SeQuant/core/index.hpp>
#include <SeQuant/core/optimize/optimize.hpp>
#include <SeQuant/core/optimize/single_term.hpp>
#include <SeQuant/core/optimize/sum.hpp>
#include <SeQuant/core/runtime.hpp>
#include <SeQuant/core/utility/indices.hpp>
#include <SeQuant/core/utility/macros.hpp>

#include <range/v3/algorithm/all_of.hpp>
#include <range/v3/iterator/basic_iterator.hpp>
#include <range/v3/range/access.hpp>
#include <range/v3/range/conversion.hpp>
#include <range/v3/view/tail.hpp>
#include <range/v3/view/transform.hpp>
#include <range/v3/view/view.hpp>
#include <range/v3/view/iota.hpp>

#include <cstddef>
#include <type_traits>
#include <string>
#include <string_view>
#include <utility>

namespace sequant {

namespace opt {

///
/// \param expr Expression to be optimized.
/// \param idxsz An invocable object that maps an Index object to size.
/// \param reorder_sum If true, the summands are reordered so that terms with
/// common sub-expressions appear closer to each other.
/// \return Optimized expression for lower evaluation cost.
template <typename IdxToSize, typename = std::enable_if_t<std::is_invocable_r_v<
size_t, IdxToSize, const Index&>>>
ExprPtr optimize(ExprPtr const& expr, IdxToSize const& idx2size,
bool reorder_sum) {
using ranges::views::transform;
namespace {

index_to_extent_t default_idx_to_size() {
return [](Index const& ix) { return ix.space().approximate_size(); };
}

/// Optimize a Product that contains only Tensor and scalar factors.
ExprPtr opt_pure_product(Product const& prod, index_to_extent_t const& idx2size,
OptFor opt_for) {
if (opt_for == OptFor::Flops)
return opt::single_term_opt<OptFor::Flops>(prod, idx2size);
SEQUANT_ASSERT(opt_for == OptFor::Memsize);
return opt::single_term_opt<OptFor::Memsize>(prod, idx2size);
}

/// Deliberately non-identifier label prefix used to stand in for non-Tensor,
/// non-scalar factors during single-term optimization. Chosen so that no
/// user-defined tensor label can collide with it.
inline constexpr std::wstring_view placeholder_label_prefix = L"@__opt_";

/// Optimize a Product that contains some non-Tensor, non-scalar factors by
/// substituting placeholder tensors with target indices, optimizing the
/// resulting tensor-only product, then swapping the originals back in.
ExprPtr opt_mixed_product(Product const& prod,
index_to_extent_t const& idx2size, OptFor opt_for) {
container::svector<ExprPtr> non_tensors(prod.size());
container::svector<ExprPtr> new_factors;
new_factors.reserve(prod.size());

for (std::size_t i = 0; i < prod.size(); ++i) {
auto&& f = prod.factor(i);
if (f->is<Tensor>() || f->is_scalar()) {
new_factors.emplace_back(f);
} else {
non_tensors[i] = f;
auto target_idxs = get_unique_indices(f);
new_factors.emplace_back(ex<Tensor>(
std::wstring(placeholder_label_prefix) + std::to_wstring(i),
bra(target_idxs.bra), ket(target_idxs.ket), aux(target_idxs.aux)));
}
}

auto result = opt_pure_product(
Product{prod.scalar(), new_factors, Product::Flatten::No}, idx2size,
opt_for);

auto replacer = [&non_tensors](ExprPtr& out) {
if (!out->is<Tensor>()) return;
auto label = out->as<Tensor>().label();
if (!label.starts_with(placeholder_label_prefix)) return;

// The placeholder prefix is internal; anything carrying it must have been
// emitted by this function, with a pure-decimal suffix indexing
// non_tensors. Any deviation is a programming error.
auto suffix_view = label.substr(placeholder_label_prefix.size());
SEQUANT_ASSERT(!suffix_view.empty());
std::size_t suffix = 0;
for (wchar_t c : suffix_view) {
SEQUANT_ASSERT(c >= L'0' && c <= L'9');
suffix = suffix * 10 + static_cast<std::size_t>(c - L'0');
}
SEQUANT_ASSERT(suffix < non_tensors.size() && non_tensors[suffix]);
out = non_tensors[suffix].clone();
};

result->visit(replacer, /* atoms_only = */ true);
return result;
}

/// Recursive workhorse. \p parallel_outer controls whether the (single)
/// outermost Sum's summands are processed in parallel; nested recursive
/// calls always run sequentially to avoid `sequant::for_each` oversubscription.
ExprPtr optimize_impl(ExprPtr const& expr, index_to_extent_t const& idx2size,
OptFor opt_for, bool reorder, bool parallel_outer) {
if (expr->is<Product>()) {
if (ranges::all_of(*expr, [](auto&& x) {
return x->template is<Tensor>() || x->is_scalar();
}))
return opt::single_term_opt(expr->as<Product>(), idx2size);
else {
auto const& prod = expr->as<Product>();

container::svector<ExprPtr> non_tensors(prod.size());
container::svector<ExprPtr> new_factors;

for (auto i = 0; i < prod.size(); ++i) {
auto&& f = prod.factor(i);
if (f->is<Tensor>() || f->is_scalar())
new_factors.emplace_back(f);
else {
non_tensors[i] = f;
auto target_idxs = get_unique_indices(f);
new_factors.emplace_back(
ex<Tensor>(L"I_" + std::to_wstring(i), bra(target_idxs.bra),
ket(target_idxs.ket), aux(target_idxs.aux)));
}
}

auto result = opt::single_term_opt(
Product(prod.scalar(), new_factors, Product::Flatten::No), idx2size);

auto replacer = [&non_tensors](ExprPtr& out) {
if (!out->is<Tensor>()) return;
auto const& tnsr = out->as<Tensor>();
auto&& label = tnsr.label();
if (label.at(0) == L'I' && label.at(1) == L'_') {
size_t suffix = std::stoi(std::wstring(label.data() + 2));
out = non_tensors[suffix].clone();
}
};

result->visit(replacer, /* atoms_only = */ true);
return result;
auto const& prod = expr->as<Product>();
bool pure = ranges::all_of(prod, [](auto&& x) {
return x->template is<Tensor>() || x->is_scalar();
});
return pure ? opt_pure_product(prod, idx2size, opt_for)
: opt_mixed_product(prod, idx2size, opt_for);
}

if (expr->is<Sum>()) {
auto const& in_sum = expr->as<Sum>();
Sum::summands_type new_smands(in_sum.size());

auto do_term = [&](std::size_t i) {
new_smands[i] =
optimize_impl(in_sum.summand(i), idx2size, opt_for,
/*reorder=*/false, /*parallel_outer=*/false);
};

// Thread-safety of the parallel branch rests on two invariants; do NOT
// break them without re-auditing:
// 1. Each task writes a distinct, pre-allocated new_smands[i] slot, and
// the work below (single_term_opt ->
// TensorNetwork::canonicalize_slots) operates on per-task *clones* of
// the input tensors. The lazily populated `mutable` caches on
// Expr/Index (hash_value_, label_, ...) are unsynchronized, so
// concurrent work must never read/write them on a shared (non-cloned)
// node. Index comparison touches only immutable members, so building
// index sets over shared indices is safe.
// 2. The binarize() pass below DOES read Index::label() (a lazy cache
// write) on the optimized summands, so it is run *sequentially, after*
// for_each() has joined -- never inside do_term().
// The default Context and cardinal_tensor_labels must also be configured
// before entering here (their writes are unsynchronized unless
// SEQUANT_CONTEXT_MANIPULATION_THREADSAFE); optimize() only reads them.
if (parallel_outer && in_sum.size() > 1) {
auto indices = ranges::views::iota(std::size_t{0}, in_sum.size());
sequant::for_each(indices, do_term);
} else {
for (std::size_t i = 0; i < in_sum.size(); ++i) do_term(i);
}
} else if (expr->is<Sum>()) {
auto smands = *expr | transform([&idx2size](auto&& s) {
return optimize(s, idx2size, /* reorder_sum= */ false);
}) | ranges::to_vector;
auto sum = Sum{smands.begin(), smands.end()};
return reorder_sum ? ex<Sum>(opt::reorder(sum)) : ex<Sum>(std::move(sum));
} else
return expr->clone();

Sum new_sum(std::move(new_smands), Sum::move_only_tag{});
if (!reorder) return ex<Sum>(std::move(new_sum));

// Binarize once per optimized summand and hand the nodes to reorder()
// so they aren't re-built inside clusters(). NOTE: this runs sequentially
// by design -- see invariant (2) above.
container::vector<FullBinaryNode<EvalExpr>> nodes;
nodes.reserve(new_sum.size());
for (auto const& s : new_sum.summands()) nodes.push_back(binarize(s));
return ex<Sum>(opt::reorder(new_sum, nodes));
}

return expr->clone();
}

} // namespace opt
} // namespace

ExprPtr optimize(ExprPtr const& expr, OptimizeOptions opts) {
auto idx2size = opts.idx_to_extent ? std::move(opts.idx_to_extent)
: default_idx_to_size();
return optimize_impl(expr, idx2size, opts.opt_for,
opts.reorder == ReorderSum::Reorder,
/*parallel_outer=*/true);
}

ResultExpr& optimize(ResultExpr& expr, OptimizeOptions opts) {
expr.expression() = optimize(expr.expression(), std::move(opts));
return expr;
}

ResultExpr& optimize(ResultExpr&& expr, OptimizeOptions opts) {
return optimize(expr, std::move(opts));
}

// backwards compatibility overloads

namespace {
inline OptimizeOptions compatibility_opts(bool reorder_sum) {
return OptimizeOptions{.reorder = reorder_sum ? ReorderSum::Reorder
: ReorderSum::NoReorder};
}
} // namespace

ExprPtr optimize(ExprPtr const& expr, bool reorder_sum) {
return opt::optimize(
expr, [](Index const& ix) { return ix.space().approximate_size(); },
reorder_sum);
return optimize(expr, compatibility_opts(reorder_sum));
}

ResultExpr& optimize(ResultExpr& expr, bool reorder_sum) {
expr.expression() = optimize(expr.expression(), reorder_sum);

return expr;
return optimize(expr, compatibility_opts(reorder_sum));
}

ResultExpr& optimize(ResultExpr&& expr, bool reorder_sum) {
return optimize(expr, reorder_sum);
return optimize(std::move(expr), compatibility_opts(reorder_sum));
}

} // namespace sequant
66 changes: 40 additions & 26 deletions SeQuant/core/optimize/optimize.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,53 @@
#define SEQUANT_OPTIMIZE_OPTIMIZE_HPP

#include <SeQuant/core/expr_fwd.hpp>
#include <SeQuant/core/optimize/options.hpp>

namespace sequant {

///
/// Optimize the expression using IndexSpace::approximate_size() for reference
/// index extent.
/// Optimize the expression for lower evaluation cost.
///
/// \param expr Expression to be optimized.
/// \param reorder_sum If true, the summands are reordered so that terms with
/// common sub-expressions appear closer to each other.
/// True by default.
/// \return Optimized expression for lower evaluation cost.
ExprPtr optimize(ExprPtr const& expr, bool reorder_sum = true);

/// Optimize the expression using IndexSpace::approximate_size() for reference
/// index extent.
/// \param opts Optimization parameters; see \c OptimizeOptions. By default:
/// the cost metric is flop count, index extents are taken from
/// \c IndexSpace::approximate_size(), and the summands of a sum
/// are reordered to cluster terms that share intermediates.
/// \return Optimized expression.
ExprPtr optimize(ExprPtr const& expr, OptimizeOptions opts = {});

/// \copydoc optimize(ExprPtr const&, OptimizeOptions)
ResultExpr& optimize(ResultExpr& expr, OptimizeOptions opts = {});

/// \copydoc optimize(ExprPtr const&, OptimizeOptions)
[[nodiscard]] ResultExpr& optimize(ResultExpr&& expr,
OptimizeOptions opts = {});

// Overloads for backwards compatibility

/// \deprecated Use the \c OptimizeOptions overload instead.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can (and maybe should) make use of the [[deprecated]] attribute: https://en.cppreference.com/cpp/language/attributes/deprecated

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about that. I might add it.

///
/// \param expr Expression to be optimized.
/// \param reorder_sum If true, the summands are reordered so that terms with
/// common sub-expressions appear closer to each other.
/// True by default.
/// \return Optimized expression for lower evaluation cost.
ResultExpr& optimize(ResultExpr& expr, bool reorder_sum = true);

/// Optimize the expression using IndexSpace::approximate_size() for reference
/// index extent.
/// Equivalent to calling the primary overload with default \c OptimizeOptions
/// and \c OptimizeOptions::reorder_sum set to \p reorder_sum.
///
/// \param expr Expression to be optimized.
/// \param reorder_sum If true, the summands are reordered so that terms with
/// common sub-expressions appear closer to each other.
/// True by default.
/// \return Optimized expression for lower evaluation cost.
[[nodiscard]] ResultExpr& optimize(ResultExpr&& expr, bool reorder_sum = true);
/// \param expr Expression to be optimized.
/// \param reorder_sum If true, reorder the summands of a sum to cluster terms
/// that share intermediates.
/// \return Optimized expression.
[[deprecated(
"use the OptimizeOptions"
" overload of optimize() instead")]] ExprPtr
optimize(ExprPtr const& expr, bool reorder_sum);

/// \copydoc optimize(ExprPtr const&, bool)
[[deprecated(
"use the OptimizeOptions"
" overload of optimize() instead")]] ResultExpr&
optimize(ResultExpr& expr, bool reorder_sum);

/// \copydoc optimize(ExprPtr const&, bool)
[[nodiscard, deprecated("use the OptimizeOptions"
" overload of optimize() instead")]] ResultExpr&
optimize(ResultExpr&& expr, bool reorder_sum);

} // namespace sequant

Expand Down
42 changes: 42 additions & 0 deletions SeQuant/core/optimize/options.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#ifndef SEQUANT_CORE_OPTIMIZE_OPTIONS_HPP
#define SEQUANT_CORE_OPTIMIZE_OPTIONS_HPP

#include <cstddef>
#include <functional>

namespace sequant {

class Index;

/// Cost metric to optimize for in single-term and top-level optimize routines.
enum class OptFor { Flops, Memsize };

/// Whether to reorder summands so terms with shared intermediates appear
/// closer to each other.
enum class ReorderSum { Reorder, NoReorder };

/// A type-erased provider mapping an Index to its extent. Used by the public
/// optimize() API. Callers reaching for the templated opt::single_term_opt
/// overloads (constrained by \ref opt::has_index_extent) should pass the
/// callable directly instead of wrapping it here — that keeps the cost
/// function's call site inlineable, whereas a value of this alias goes
/// through std::function's type-erased dispatch on every Index lookup.
using index_to_extent_t = std::function<std::size_t(Index const&)>;

/// Options that control behavior of \ref sequant::optimize.
struct OptimizeOptions {
/// Cost metric to minimize.
OptFor opt_for = OptFor::Flops;

/// Whether to reorder summands so terms with shared intermediates appear
/// closer to each other.
ReorderSum reorder = ReorderSum::Reorder;

/// Caller-supplied Index to extent provider. If empty, defaults to
/// \c IndexSpace::approximate_size().
index_to_extent_t idx_to_extent = {};
};

} // namespace sequant

#endif // SEQUANT_CORE_OPTIMIZE_OPTIONS_HPP
Loading