From 7573e4c5501699b317ed504450b699e0bef040b2 Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Thu, 7 May 2026 13:25:07 +0200 Subject: [PATCH 1/5] add proxsuite --- pyproject.toml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e13772b3..7dab2866 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,9 +15,8 @@ authors = [ requires-python = ">=3.10" dependencies = [ "torch>=2.3.0", # Problems before 2.4.0, especially with autogram. - "quadprog>=0.1.9, != 0.1.10", # Doesn't work before 0.1.9, 0.1.10 is yanked "numpy>=1.21.2", # Does not work before 1.21. No python 3.10 wheel before 1.21.2. - "qpsolvers>=1.0.1", # Does not work before 1.0.1 + "proxsuite>=0.7.2", ] classifiers = [ "Development Status :: 4 - Beta", @@ -101,8 +100,7 @@ plot = [ lower_bounds = [ "torch==2.3.0", "numpy==1.21.2", - "quadprog==0.1.9", - "qpsolvers==1.0.1", + "proxsuite==0.7.2", ] [project.optional-dependencies] From 69e42a3dd3f62a2c578f434955ab9f5690dec6ac Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Thu, 7 May 2026 13:47:40 +0200 Subject: [PATCH 2/5] Translate from qpsolvers to proxsuite (done by opencode) --- src/torchjd/aggregation/_dualproj.py | 4 +- src/torchjd/aggregation/_upgrad.py | 4 +- src/torchjd/aggregation/_utils/dual_cone.py | 79 +++++++++++-------- .../unit/aggregation/_utils/test_dual_cone.py | 12 +-- tests/unit/aggregation/test_dualproj.py | 8 +- tests/unit/aggregation/test_pcgrad.py | 2 +- tests/unit/aggregation/test_upgrad.py | 10 ++- 7 files changed, 69 insertions(+), 50 deletions(-) diff --git a/src/torchjd/aggregation/_dualproj.py b/src/torchjd/aggregation/_dualproj.py index acb87d2f..62f1f894 100644 --- a/src/torchjd/aggregation/_dualproj.py +++ b/src/torchjd/aggregation/_dualproj.py @@ -30,7 +30,7 @@ def __init__( pref_vector: Tensor | None = None, norm_eps: float = 0.0001, reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "quadprog", + solver: SUPPORTED_SOLVER = "proxsuite", ) -> None: super().__init__() self.pref_vector = pref_vector @@ -100,7 +100,7 @@ def __init__( pref_vector: Tensor | None = None, norm_eps: float = 0.0001, reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "quadprog", + solver: SUPPORTED_SOLVER = "proxsuite", ) -> None: self._solver: SUPPORTED_SOLVER = solver diff --git a/src/torchjd/aggregation/_upgrad.py b/src/torchjd/aggregation/_upgrad.py index 68689829..914a8aa9 100644 --- a/src/torchjd/aggregation/_upgrad.py +++ b/src/torchjd/aggregation/_upgrad.py @@ -31,7 +31,7 @@ def __init__( pref_vector: Tensor | None = None, norm_eps: float = 0.0001, reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "quadprog", + solver: SUPPORTED_SOLVER = "proxsuite", ) -> None: super().__init__() self.pref_vector = pref_vector @@ -103,7 +103,7 @@ def __init__( pref_vector: Tensor | None = None, norm_eps: float = 0.0001, reg_eps: float = 0.0001, - solver: SUPPORTED_SOLVER = "quadprog", + solver: SUPPORTED_SOLVER = "proxsuite", ) -> None: self._solver: SUPPORTED_SOLVER = solver diff --git a/src/torchjd/aggregation/_utils/dual_cone.py b/src/torchjd/aggregation/_utils/dual_cone.py index b076366b..53756b05 100644 --- a/src/torchjd/aggregation/_utils/dual_cone.py +++ b/src/torchjd/aggregation/_utils/dual_cone.py @@ -1,11 +1,10 @@ from typing import Literal, TypeAlias -import numpy as np import torch -from qpsolvers import solve_qp +from proxsuite.torch.qplayer import QPFunction from torch import Tensor -SUPPORTED_SOLVER: TypeAlias = Literal["quadprog"] +SUPPORTED_SOLVER: TypeAlias = Literal["proxsuite"] def project_weights(U: Tensor, G: Tensor, solver: SUPPORTED_SOLVER) -> Tensor: @@ -19,44 +18,62 @@ def project_weights(U: Tensor, G: Tensor, solver: SUPPORTED_SOLVER) -> Tensor: :return: A tensor of projection weights with the same shape as `U`. """ - G_ = _to_array(G) - U_ = _to_array(U) + original_shape = U.shape + m = G.shape[0] + U_flat = U.reshape(-1, m) # [nBatch, m] - W = np.apply_along_axis(lambda u: _project_weight_vector(u, G_, solver), axis=-1, arr=U_) + W = _project_weight_vector_batch(U_flat, G, solver) - return torch.as_tensor(W, device=G.device, dtype=G.dtype) + return W.reshape(original_shape) -def _project_weight_vector(u: np.ndarray, G: np.ndarray, solver: SUPPORTED_SOLVER) -> np.ndarray: - r""" - Computes the weights `w` of the projection of `J^T u` onto the dual cone of the rows of `J`, - given `G = J J^T` and `u`. In other words, this computes the `w` that satisfies - `\pi_J(J^T u) = J^T w`, with `\pi_J` defined in Equation 3 of [1]. +# TODO: should merge docstrings appropriately - By Proposition 1 of [1], this is equivalent to solving for `v` the following quadratic program: - minimize v^T G v - subject to u \preceq v +# def _project_weight_vector(u: np.ndarray, G: np.ndarray, solver: SUPPORTED_SOLVER) -> np.ndarray: +# r""" +# Computes the weights `w` of the projection of `J^T u` onto the dual cone of the rows of `J`, +# given `G = J J^T` and `u`. In other words, this computes the `w` that satisfies +# `\pi_J(J^T u) = J^T w`, with `\pi_J` defined in Equation 3 of [1]. - Reference: - [1] `Jacobian Descent For Multi-Objective Optimization `_. +# By Proposition 1 of [1], this is equivalent to solving for `v` the following quadratic program: +# minimize v^T G v +# subject to u \preceq v - :param u: The vector of weights `u` of shape `[m]` corresponding to the vector `J^T u` to - project. - :param G: The Gramian matrix of `J`, equal to `J J^T`, and of shape `[m, m]`. It must be - symmetric and positive definite. - :param solver: The quadratic programming solver to use. - """ +# Reference: +# [1] `Jacobian Descent For Multi-Objective Optimization `_. + +# :param u: The vector of weights `u` of shape `[m]` corresponding to the vector `J^T u` to +# project. +# :param G: The Gramian matrix of `J`, equal to `J J^T`, and of shape `[m, m]`. It must be +# symmetric and positive definite. +# :param solver: The quadratic programming solver to use. +# """ +# ... - m = G.shape[0] - w = solve_qp(G, np.zeros(m), -np.eye(m), -u, solver=solver) - if w is None: # This may happen when G has large values. - raise ValueError("Failed to solve the quadratic programming problem.") +@torch.no_grad() +def _project_weight_vector_batch(U: Tensor, G: Tensor, _solver: SUPPORTED_SOLVER) -> Tensor: + r""" + Solves the batch of quadratic programs minimizing `v^T G v` subject to `u_i \preceq v_i` for + each row `u_i` of `U`. + + :param U: The tensor of vectors `u_i` of shape `[n, m]`. + :param G: The Gramian matrix of shape `[m, m]`. It must be symmetric and positive definite. + :param solver: The quadratic programming solver to use. + :return: A tensor of projection weights of shape `[n, m]`. + """ - return w + _, m = U.shape + device = U.device + dtype = U.dtype + Q = G.cpu().to(dtype=torch.float64) + p = torch.zeros(m, dtype=torch.float64) + C = -torch.eye(m, dtype=torch.float64) + lb = torch.full((m,), -1e20, dtype=torch.float64) + ub = -U.cpu().to(dtype=torch.float64) -def _to_array(tensor: Tensor) -> np.ndarray: - """Transforms a tensor into a numpy array with float64 dtype.""" + solver_fn = QPFunction(structural_feasibility=True) + zhats, _, _ = solver_fn(Q, p, torch.Tensor(), torch.Tensor(), C, lb, ub) - return tensor.cpu().detach().numpy().astype(np.float64) + return zhats.to(device=device, dtype=dtype) diff --git a/tests/unit/aggregation/_utils/test_dual_cone.py b/tests/unit/aggregation/_utils/test_dual_cone.py index 68a8a75d..2ada2ded 100644 --- a/tests/unit/aggregation/_utils/test_dual_cone.py +++ b/tests/unit/aggregation/_utils/test_dual_cone.py @@ -34,7 +34,7 @@ def test_solution_weights(shape: tuple[int, int]) -> None: G = J @ J.T u = rand_(shape[0]) - w = project_weights(u, G, "quadprog") + w = project_weights(u, G, "proxsuite") dual_gap = w - u # Dual feasibility @@ -63,8 +63,8 @@ def test_scale_invariant(shape: tuple[int, int], scaling: float) -> None: G = J @ J.T u = rand_(shape[0]) - w = project_weights(u, G, "quadprog") - w_scaled = project_weights(u, scaling * G, "quadprog") + w = project_weights(u, G, "proxsuite") + w_scaled = project_weights(u, scaling * G, "proxsuite") assert_close(w_scaled, w) @@ -82,8 +82,8 @@ def test_tensorization_shape(shape: tuple[int, ...]) -> None: G = matrix @ matrix.T - W_tensor = project_weights(U_tensor, G, "quadprog") - W_matrix = project_weights(U_matrix, G, "quadprog") + W_tensor = project_weights(U_tensor, G, "proxsuite") + W_matrix = project_weights(U_matrix, G, "proxsuite") assert_close(W_matrix.reshape(shape), W_tensor) @@ -94,4 +94,4 @@ def test_project_weight_vector_failure() -> None: large_J = np.random.randn(10, 100) * 1e5 large_G = large_J @ large_J.T with raises(ValueError): - _project_weight_vector(np.ones(10), large_G, "quadprog") + _project_weight_vector(np.ones(10), large_G, "proxsuite") diff --git a/tests/unit/aggregation/test_dualproj.py b/tests/unit/aggregation/test_dualproj.py index 34fe8d46..c8bea16e 100644 --- a/tests/unit/aggregation/test_dualproj.py +++ b/tests/unit/aggregation/test_dualproj.py @@ -47,9 +47,9 @@ def test_non_differentiable(aggregator: DualProj, matrix: Tensor) -> None: def test_representations() -> None: - A = DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="quadprog") + A = DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="proxsuite") assert ( - repr(A) == "DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='quadprog')" + repr(A) == "DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='proxsuite')" ) assert str(A) == "DualProj" @@ -57,11 +57,11 @@ def test_representations() -> None: pref_vector=torch.tensor([1.0, 2.0, 3.0], device="cpu"), norm_eps=0.0001, reg_eps=0.0001, - solver="quadprog", + solver="proxsuite", ) assert ( repr(A) == "DualProj(pref_vector=tensor([1., 2., 3.]), norm_eps=0.0001, reg_eps=0.0001, " - "solver='quadprog')" + "solver='proxsuite')" ) assert str(A) == "DualProj([1., 2., 3.])" diff --git a/tests/unit/aggregation/test_pcgrad.py b/tests/unit/aggregation/test_pcgrad.py index b776071d..b9c4cf63 100644 --- a/tests/unit/aggregation/test_pcgrad.py +++ b/tests/unit/aggregation/test_pcgrad.py @@ -55,7 +55,7 @@ def test_equivalence_upgrad_sum_two_rows(shape: tuple[int, int]) -> None: ones_((2,)), norm_eps=0.0, reg_eps=0.0, - solver="quadprog", + solver="proxsuite", ) result = pc_grad_weighting(gramian) diff --git a/tests/unit/aggregation/test_upgrad.py b/tests/unit/aggregation/test_upgrad.py index 075680a0..b2f7df4e 100644 --- a/tests/unit/aggregation/test_upgrad.py +++ b/tests/unit/aggregation/test_upgrad.py @@ -53,19 +53,21 @@ def test_non_differentiable(aggregator: UPGrad, matrix: Tensor) -> None: def test_representations() -> None: - A = UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="quadprog") - assert repr(A) == "UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='quadprog')" + A = UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="proxsuite") + assert ( + repr(A) == "UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='proxsuite')" + ) assert str(A) == "UPGrad" A = UPGrad( pref_vector=torch.tensor([1.0, 2.0, 3.0], device="cpu"), norm_eps=0.0001, reg_eps=0.0001, - solver="quadprog", + solver="proxsuite", ) assert ( repr(A) == "UPGrad(pref_vector=tensor([1., 2., 3.]), norm_eps=0.0001, reg_eps=0.0001, " - "solver='quadprog')" + "solver='proxsuite')" ) assert str(A) == "UPGrad([1., 2., 3.])" From 952e6171224507dd8f102b721cc3efc3a6d24b0d Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Thu, 7 May 2026 14:04:35 +0200 Subject: [PATCH 3/5] remove `project_weights_vector` --- tests/unit/aggregation/_utils/test_dual_cone.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tests/unit/aggregation/_utils/test_dual_cone.py b/tests/unit/aggregation/_utils/test_dual_cone.py index 2ada2ded..0fa3e8a0 100644 --- a/tests/unit/aggregation/_utils/test_dual_cone.py +++ b/tests/unit/aggregation/_utils/test_dual_cone.py @@ -1,10 +1,9 @@ -import numpy as np import torch -from pytest import mark, raises +from pytest import mark from torch.testing import assert_close from utils.tensors import rand_, randn_ -from torchjd.aggregation._utils.dual_cone import _project_weight_vector, project_weights +from torchjd.aggregation._utils.dual_cone import project_weights @mark.parametrize("shape", [(5, 7), (9, 37), (2, 14), (32, 114), (50, 100)]) @@ -88,10 +87,10 @@ def test_tensorization_shape(shape: tuple[int, ...]) -> None: assert_close(W_matrix.reshape(shape), W_tensor) -def test_project_weight_vector_failure() -> None: - """Tests that `_project_weight_vector` raises an error when the input G has too large values.""" +# def test_project_weight_vector_failure() -> None: +# """Tests that `_project_weight_vector` raises an error when the input G has too large values.""" - large_J = np.random.randn(10, 100) * 1e5 - large_G = large_J @ large_J.T - with raises(ValueError): - _project_weight_vector(np.ones(10), large_G, "proxsuite") +# large_J = np.random.randn(10, 100) * 1e5 +# large_G = large_J @ large_J.T +# with raises(ValueError): +# _project_weight_vector(np.ones(10), large_G, "proxsuite") From b1b8c8dbef16433e6aa74dfb02024cbf0ee3305e Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Fri, 8 May 2026 09:24:30 +0200 Subject: [PATCH 4/5] Change from qplayer to BatchQP (lower level) --- src/torchjd/aggregation/_utils/dual_cone.py | 38 +++++++++++++++------ 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/src/torchjd/aggregation/_utils/dual_cone.py b/src/torchjd/aggregation/_utils/dual_cone.py index 53756b05..2f4d2693 100644 --- a/src/torchjd/aggregation/_utils/dual_cone.py +++ b/src/torchjd/aggregation/_utils/dual_cone.py @@ -1,7 +1,8 @@ from typing import Literal, TypeAlias +import numpy as np import torch -from proxsuite.torch.qplayer import QPFunction +from proxsuite import proxqp from torch import Tensor SUPPORTED_SOLVER: TypeAlias = Literal["proxsuite"] @@ -63,17 +64,34 @@ def _project_weight_vector_batch(U: Tensor, G: Tensor, _solver: SUPPORTED_SOLVER :return: A tensor of projection weights of shape `[n, m]`. """ - _, m = U.shape + n, m = U.shape device = U.device dtype = U.dtype - Q = G.cpu().to(dtype=torch.float64) - p = torch.zeros(m, dtype=torch.float64) - C = -torch.eye(m, dtype=torch.float64) - lb = torch.full((m,), -1e20, dtype=torch.float64) - ub = -U.cpu().to(dtype=torch.float64) - - solver_fn = QPFunction(structural_feasibility=True) - zhats, _, _ = solver_fn(Q, p, torch.Tensor(), torch.Tensor(), C, lb, ub) + Q_np = G.cpu().to(dtype=torch.float64).numpy() + p_np = np.zeros(m, dtype=np.float64) + C_np = -np.eye(m, dtype=np.float64) + lb_np = np.full(m, -1e20, dtype=np.float64) + ub_np = (-U.cpu().to(dtype=torch.float64)).numpy() + + batch_qps = proxqp.dense.BatchQP() + default_rho = 5.0e-5 + + for i in range(n): + qp = batch_qps.init_qp_in_place(m, 0, m) + qp.settings.primal_infeasibility_solving = False + qp.settings.max_iter = 1000 + qp.settings.max_iter_in = 100 + qp.settings.default_rho = default_rho + qp.settings.refactor_rho_threshold = default_rho + qp.settings.eps_abs = 1e-9 + qp.init(H=Q_np, g=p_np, A=None, b=None, C=C_np, l=lb_np, u=ub_np[i], rho=default_rho) + + for i in range(n): + batch_qps.get(i).solve() + + zhats = torch.empty((n, m), dtype=torch.float64) + for i in range(n): + zhats[i] = torch.from_numpy(batch_qps.get(i).results.x) return zhats.to(device=device, dtype=dtype) From 8968715a44e46ab3422a1899c2347bd1481beafa Mon Sep 17 00:00:00 2001 From: Pierre Quinton Date: Sun, 10 May 2026 11:41:28 +0200 Subject: [PATCH 5/5] Make proxsuite run QPs in parallel, put the cast of result to tensor at the end. --- src/torchjd/aggregation/_utils/dual_cone.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/torchjd/aggregation/_utils/dual_cone.py b/src/torchjd/aggregation/_utils/dual_cone.py index 2f4d2693..ae25bdbd 100644 --- a/src/torchjd/aggregation/_utils/dual_cone.py +++ b/src/torchjd/aggregation/_utils/dual_cone.py @@ -1,3 +1,4 @@ +import os from typing import Literal, TypeAlias import numpy as np @@ -87,11 +88,11 @@ def _project_weight_vector_batch(U: Tensor, G: Tensor, _solver: SUPPORTED_SOLVER qp.settings.eps_abs = 1e-9 qp.init(H=Q_np, g=p_np, A=None, b=None, C=C_np, l=lb_np, u=ub_np[i], rho=default_rho) - for i in range(n): - batch_qps.get(i).solve() + num_threads = max(1, (os.cpu_count() or 2) // 2) + proxqp.dense.solve_in_parallel(num_threads=num_threads, qps=batch_qps) - zhats = torch.empty((n, m), dtype=torch.float64) + zhats_np = np.empty((n, m), dtype=np.float64) for i in range(n): - zhats[i] = torch.from_numpy(batch_qps.get(i).results.x) + zhats_np[i] = batch_qps.get(i).results.x - return zhats.to(device=device, dtype=dtype) + return torch.from_numpy(zhats_np).to(device=device, dtype=dtype)