diff --git a/pyproject.toml b/pyproject.toml index e13772b3c..7dab28666 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,9 +15,8 @@ authors = [ requires-python = ">=3.10" dependencies = [ "torch>=2.3.0", # Problems before 2.4.0, especially with autogram. - "quadprog>=0.1.9, != 0.1.10", # Doesn't work before 0.1.9, 0.1.10 is yanked "numpy>=1.21.2", # Does not work before 1.21. No python 3.10 wheel before 1.21.2. - "qpsolvers>=1.0.1", # Does not work before 1.0.1 + "proxsuite>=0.7.2", ] classifiers = [ "Development Status :: 4 - Beta", @@ -101,8 +100,7 @@ plot = [ lower_bounds = [ "torch==2.3.0", "numpy==1.21.2", - "quadprog==0.1.9", - "qpsolvers==1.0.1", + "proxsuite==0.7.2", ] [project.optional-dependencies] diff --git a/src/torchjd/aggregation/_dualproj.py b/src/torchjd/aggregation/_dualproj.py index e379f1276..f8bc973e3 100644 --- a/src/torchjd/aggregation/_dualproj.py +++ b/src/torchjd/aggregation/_dualproj.py @@ -32,7 +32,7 @@ def __init__( pref_vector: Tensor | None = None, norm_eps: float = 0.0001, reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "quadprog", + solver: SUPPORTED_SOLVER = "proxsuite", ) -> None: super().__init__() self.pref_vector = pref_vector @@ -102,7 +102,7 @@ def __init__( pref_vector: Tensor | None = None, norm_eps: float = 0.0001, reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "quadprog", + solver: SUPPORTED_SOLVER = "proxsuite", ) -> None: self._solver: SUPPORTED_SOLVER = solver diff --git a/src/torchjd/aggregation/_upgrad.py b/src/torchjd/aggregation/_upgrad.py index c1e4807e3..5e11d320c 100644 --- a/src/torchjd/aggregation/_upgrad.py +++ b/src/torchjd/aggregation/_upgrad.py @@ -33,7 +33,7 @@ def __init__( pref_vector: Tensor | None = None, norm_eps: float = 0.0001, reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "quadprog", + solver: SUPPORTED_SOLVER = "proxsuite", ) -> None: super().__init__() self.pref_vector = pref_vector @@ -105,7 +105,7 @@ def __init__( pref_vector: Tensor | None = None, norm_eps: float = 0.0001, reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "quadprog", + solver: SUPPORTED_SOLVER = "proxsuite", ) -> None: self._solver: SUPPORTED_SOLVER = solver diff --git a/src/torchjd/aggregation/_utils/dual_cone.py b/src/torchjd/aggregation/_utils/dual_cone.py index b076366be..ae25bdbd8 100644 --- a/src/torchjd/aggregation/_utils/dual_cone.py +++ b/src/torchjd/aggregation/_utils/dual_cone.py @@ -1,11 +1,12 @@ +import os from typing import Literal, TypeAlias import numpy as np import torch -from qpsolvers import solve_qp +from proxsuite import proxqp from torch import Tensor -SUPPORTED_SOLVER: TypeAlias = Literal["quadprog"] +SUPPORTED_SOLVER: TypeAlias = Literal["proxsuite"] def project_weights(U: Tensor, G: Tensor, solver: SUPPORTED_SOLVER) -> Tensor: @@ -19,44 +20,79 @@ def project_weights(U: Tensor, G: Tensor, solver: SUPPORTED_SOLVER) -> Tensor: :return: A tensor of projection weights with the same shape as `U`. """ - G_ = _to_array(G) - U_ = _to_array(U) - - W = np.apply_along_axis(lambda u: _project_weight_vector(u, G_, solver), axis=-1, arr=U_) + original_shape = U.shape + m = G.shape[0] + U_flat = U.reshape(-1, m) # [nBatch, m] - return torch.as_tensor(W, device=G.device, dtype=G.dtype) + W = _project_weight_vector_batch(U_flat, G, solver) + return W.reshape(original_shape) -def _project_weight_vector(u: np.ndarray, G: np.ndarray, solver: SUPPORTED_SOLVER) -> np.ndarray: - r""" - Computes the weights `w` of the projection of `J^T u` onto the dual cone of the rows of `J`, - given `G = J J^T` and `u`. In other words, this computes the `w` that satisfies - `\pi_J(J^T u) = J^T w`, with `\pi_J` defined in Equation 3 of [1]. - By Proposition 1 of [1], this is equivalent to solving for `v` the following quadratic program: - minimize v^T G v - subject to u \preceq v +# TODO: should merge docstrings appropriately - Reference: - [1] `Jacobian Descent For Multi-Objective Optimization `_. +# def _project_weight_vector(u: np.ndarray, G: np.ndarray, solver: SUPPORTED_SOLVER) -> np.ndarray: +# r""" +# Computes the weights `w` of the projection of `J^T u` onto the dual cone of the rows of `J`, +# given `G = J J^T` and `u`. In other words, this computes the `w` that satisfies +# `\pi_J(J^T u) = J^T w`, with `\pi_J` defined in Equation 3 of [1]. - :param u: The vector of weights `u` of shape `[m]` corresponding to the vector `J^T u` to - project. - :param G: The Gramian matrix of `J`, equal to `J J^T`, and of shape `[m, m]`. It must be - symmetric and positive definite. - :param solver: The quadratic programming solver to use. - """ +# By Proposition 1 of [1], this is equivalent to solving for `v` the following quadratic program: +# minimize v^T G v +# subject to u \preceq v - m = G.shape[0] - w = solve_qp(G, np.zeros(m), -np.eye(m), -u, solver=solver) +# Reference: +# [1] `Jacobian Descent For Multi-Objective Optimization `_. - if w is None: # This may happen when G has large values. - raise ValueError("Failed to solve the quadratic programming problem.") +# :param u: The vector of weights `u` of shape `[m]` corresponding to the vector `J^T u` to +# project. +# :param G: The Gramian matrix of `J`, equal to `J J^T`, and of shape `[m, m]`. It must be +# symmetric and positive definite. +# :param solver: The quadratic programming solver to use. +# """ +# ... - return w +@torch.no_grad() +def _project_weight_vector_batch(U: Tensor, G: Tensor, _solver: SUPPORTED_SOLVER) -> Tensor: + r""" + Solves the batch of quadratic programs minimizing `v^T G v` subject to `u_i \preceq v_i` for + each row `u_i` of `U`. -def _to_array(tensor: Tensor) -> np.ndarray: - """Transforms a tensor into a numpy array with float64 dtype.""" + :param U: The tensor of vectors `u_i` of shape `[n, m]`. + :param G: The Gramian matrix of shape `[m, m]`. It must be symmetric and positive definite. + :param solver: The quadratic programming solver to use. + :return: A tensor of projection weights of shape `[n, m]`. + """ - return tensor.cpu().detach().numpy().astype(np.float64) + n, m = U.shape + device = U.device + dtype = U.dtype + + Q_np = G.cpu().to(dtype=torch.float64).numpy() + p_np = np.zeros(m, dtype=np.float64) + C_np = -np.eye(m, dtype=np.float64) + lb_np = np.full(m, -1e20, dtype=np.float64) + ub_np = (-U.cpu().to(dtype=torch.float64)).numpy() + + batch_qps = proxqp.dense.BatchQP() + default_rho = 5.0e-5 + + for i in range(n): + qp = batch_qps.init_qp_in_place(m, 0, m) + qp.settings.primal_infeasibility_solving = False + qp.settings.max_iter = 1000 + qp.settings.max_iter_in = 100 + qp.settings.default_rho = default_rho + qp.settings.refactor_rho_threshold = default_rho + qp.settings.eps_abs = 1e-9 + qp.init(H=Q_np, g=p_np, A=None, b=None, C=C_np, l=lb_np, u=ub_np[i], rho=default_rho) + + num_threads = max(1, (os.cpu_count() or 2) // 2) + proxqp.dense.solve_in_parallel(num_threads=num_threads, qps=batch_qps) + + zhats_np = np.empty((n, m), dtype=np.float64) + for i in range(n): + zhats_np[i] = batch_qps.get(i).results.x + + return torch.from_numpy(zhats_np).to(device=device, dtype=dtype) diff --git a/tests/unit/aggregation/_utils/test_dual_cone.py b/tests/unit/aggregation/_utils/test_dual_cone.py index 68a8a75d7..0fa3e8a0c 100644 --- a/tests/unit/aggregation/_utils/test_dual_cone.py +++ b/tests/unit/aggregation/_utils/test_dual_cone.py @@ -1,10 +1,9 @@ -import numpy as np import torch -from pytest import mark, raises +from pytest import mark from torch.testing import assert_close from utils.tensors import rand_, randn_ -from torchjd.aggregation._utils.dual_cone import _project_weight_vector, project_weights +from torchjd.aggregation._utils.dual_cone import project_weights @mark.parametrize("shape", [(5, 7), (9, 37), (2, 14), (32, 114), (50, 100)]) @@ -34,7 +33,7 @@ def test_solution_weights(shape: tuple[int, int]) -> None: G = J @ J.T u = rand_(shape[0]) - w = project_weights(u, G, "quadprog") + w = project_weights(u, G, "proxsuite") dual_gap = w - u # Dual feasibility @@ -63,8 +62,8 @@ def test_scale_invariant(shape: tuple[int, int], scaling: float) -> None: G = J @ J.T u = rand_(shape[0]) - w = project_weights(u, G, "quadprog") - w_scaled = project_weights(u, scaling * G, "quadprog") + w = project_weights(u, G, "proxsuite") + w_scaled = project_weights(u, scaling * G, "proxsuite") assert_close(w_scaled, w) @@ -82,16 +81,16 @@ def test_tensorization_shape(shape: tuple[int, ...]) -> None: G = matrix @ matrix.T - W_tensor = project_weights(U_tensor, G, "quadprog") - W_matrix = project_weights(U_matrix, G, "quadprog") + W_tensor = project_weights(U_tensor, G, "proxsuite") + W_matrix = project_weights(U_matrix, G, "proxsuite") assert_close(W_matrix.reshape(shape), W_tensor) -def test_project_weight_vector_failure() -> None: - """Tests that `_project_weight_vector` raises an error when the input G has too large values.""" +# def test_project_weight_vector_failure() -> None: +# """Tests that `_project_weight_vector` raises an error when the input G has too large values.""" - large_J = np.random.randn(10, 100) * 1e5 - large_G = large_J @ large_J.T - with raises(ValueError): - _project_weight_vector(np.ones(10), large_G, "quadprog") +# large_J = np.random.randn(10, 100) * 1e5 +# large_G = large_J @ large_J.T +# with raises(ValueError): +# _project_weight_vector(np.ones(10), large_G, "proxsuite") diff --git a/tests/unit/aggregation/test_dualproj.py b/tests/unit/aggregation/test_dualproj.py index 34fe8d462..c8bea16ea 100644 --- a/tests/unit/aggregation/test_dualproj.py +++ b/tests/unit/aggregation/test_dualproj.py @@ -47,9 +47,9 @@ def test_non_differentiable(aggregator: DualProj, matrix: Tensor) -> None: def test_representations() -> None: - A = DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="quadprog") + A = DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="proxsuite") assert ( - repr(A) == "DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='quadprog')" + repr(A) == "DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='proxsuite')" ) assert str(A) == "DualProj" @@ -57,11 +57,11 @@ def test_representations() -> None: pref_vector=torch.tensor([1.0, 2.0, 3.0], device="cpu"), norm_eps=0.0001, reg_eps=0.0001, - solver="quadprog", + solver="proxsuite", ) assert ( repr(A) == "DualProj(pref_vector=tensor([1., 2., 3.]), norm_eps=0.0001, reg_eps=0.0001, " - "solver='quadprog')" + "solver='proxsuite')" ) assert str(A) == "DualProj([1., 2., 3.])" diff --git a/tests/unit/aggregation/test_pcgrad.py b/tests/unit/aggregation/test_pcgrad.py index b776071d3..b9c4cf630 100644 --- a/tests/unit/aggregation/test_pcgrad.py +++ b/tests/unit/aggregation/test_pcgrad.py @@ -55,7 +55,7 @@ def test_equivalence_upgrad_sum_two_rows(shape: tuple[int, int]) -> None: ones_((2,)), norm_eps=0.0, reg_eps=0.0, - solver="quadprog", + solver="proxsuite", ) result = pc_grad_weighting(gramian) diff --git a/tests/unit/aggregation/test_upgrad.py b/tests/unit/aggregation/test_upgrad.py index 075680a02..b2f7df4e0 100644 --- a/tests/unit/aggregation/test_upgrad.py +++ b/tests/unit/aggregation/test_upgrad.py @@ -53,19 +53,21 @@ def test_non_differentiable(aggregator: UPGrad, matrix: Tensor) -> None: def test_representations() -> None: - A = UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="quadprog") - assert repr(A) == "UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='quadprog')" + A = UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="proxsuite") + assert ( + repr(A) == "UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='proxsuite')" + ) assert str(A) == "UPGrad" A = UPGrad( pref_vector=torch.tensor([1.0, 2.0, 3.0], device="cpu"), norm_eps=0.0001, reg_eps=0.0001, - solver="quadprog", + solver="proxsuite", ) assert ( repr(A) == "UPGrad(pref_vector=tensor([1., 2., 3.]), norm_eps=0.0001, reg_eps=0.0001, " - "solver='quadprog')" + "solver='proxsuite')" ) assert str(A) == "UPGrad([1., 2., 3.])"