From 7573e4c5501699b317ed504450b699e0bef040b2 Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Thu, 7 May 2026 13:25:07 +0200
Subject: [PATCH 1/5] add proxsuite

---
 pyproject.toml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e13772b3..7dab2866 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,9 +15,8 @@ authors = [
 requires-python = ">=3.10"
 dependencies = [
     "torch>=2.3.0",  # Problems before 2.4.0, especially with autogram.
-    "quadprog>=0.1.9, != 0.1.10",  # Doesn't work before 0.1.9, 0.1.10 is yanked
     "numpy>=1.21.2",  # Does not work before 1.21. No python 3.10 wheel before 1.21.2.
-    "qpsolvers>=1.0.1",  # Does not work before 1.0.1
+    "proxsuite>=0.7.2",
 ]
 classifiers = [
     "Development Status :: 4 - Beta",
@@ -101,8 +100,7 @@ plot = [
 lower_bounds = [
     "torch==2.3.0",
     "numpy==1.21.2",
-    "quadprog==0.1.9",
-    "qpsolvers==1.0.1",
+    "proxsuite==0.7.2",
 ]
 
 [project.optional-dependencies]

From 69e42a3dd3f62a2c578f434955ab9f5690dec6ac Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Thu, 7 May 2026 13:47:40 +0200
Subject: [PATCH 2/5] Translate from qpsolvers to proxsuite (done by opencode)

---
 src/torchjd/aggregation/_dualproj.py          |  4 +-
 src/torchjd/aggregation/_upgrad.py            |  4 +-
 src/torchjd/aggregation/_utils/dual_cone.py   | 79 +++++++++++--------
 .../unit/aggregation/_utils/test_dual_cone.py | 12 +--
 tests/unit/aggregation/test_dualproj.py       |  8 +-
 tests/unit/aggregation/test_pcgrad.py         |  2 +-
 tests/unit/aggregation/test_upgrad.py         | 10 ++-
 7 files changed, 69 insertions(+), 50 deletions(-)

diff --git a/src/torchjd/aggregation/_dualproj.py b/src/torchjd/aggregation/_dualproj.py
index acb87d2f..62f1f894 100644
--- a/src/torchjd/aggregation/_dualproj.py
+++ b/src/torchjd/aggregation/_dualproj.py
@@ -30,7 +30,7 @@ def __init__(
         pref_vector: Tensor | None = None,
         norm_eps: float = 0.0001,
         reg_eps: float = 0.0001,
-        solver: SUPPORTED_SOLVER = "quadprog",
+        solver: SUPPORTED_SOLVER = "proxsuite",
     ) -> None:
         super().__init__()
         self.pref_vector = pref_vector
@@ -100,7 +100,7 @@ def __init__(
         pref_vector: Tensor | None = None,
         norm_eps: float = 0.0001,
         reg_eps: float = 0.0001,
-        solver: SUPPORTED_SOLVER = "quadprog",
+        solver: SUPPORTED_SOLVER = "proxsuite",
     ) -> None:
         self._solver: SUPPORTED_SOLVER = solver
 
diff --git a/src/torchjd/aggregation/_upgrad.py b/src/torchjd/aggregation/_upgrad.py
index 68689829..914a8aa9 100644
--- a/src/torchjd/aggregation/_upgrad.py
+++ b/src/torchjd/aggregation/_upgrad.py
@@ -31,7 +31,7 @@ def __init__(
         pref_vector: Tensor | None = None,
         norm_eps: float = 0.0001,
         reg_eps: float = 0.0001,
-        solver: SUPPORTED_SOLVER = "quadprog",
+        solver: SUPPORTED_SOLVER = "proxsuite",
     ) -> None:
         super().__init__()
         self.pref_vector = pref_vector
@@ -103,7 +103,7 @@ def __init__(
         pref_vector: Tensor | None = None,
         norm_eps: float = 0.0001,
         reg_eps: float = 0.0001,
-        solver: SUPPORTED_SOLVER = "quadprog",
+        solver: SUPPORTED_SOLVER = "proxsuite",
     ) -> None:
         self._solver: SUPPORTED_SOLVER = solver
 
diff --git a/src/torchjd/aggregation/_utils/dual_cone.py b/src/torchjd/aggregation/_utils/dual_cone.py
index b076366b..53756b05 100644
--- a/src/torchjd/aggregation/_utils/dual_cone.py
+++ b/src/torchjd/aggregation/_utils/dual_cone.py
@@ -1,11 +1,10 @@
 from typing import Literal, TypeAlias
 
-import numpy as np
 import torch
-from qpsolvers import solve_qp
+from proxsuite.torch.qplayer import QPFunction
 from torch import Tensor
 
-SUPPORTED_SOLVER: TypeAlias = Literal["quadprog"]
+SUPPORTED_SOLVER: TypeAlias = Literal["proxsuite"]
 
 
 def project_weights(U: Tensor, G: Tensor, solver: SUPPORTED_SOLVER) -> Tensor:
@@ -19,44 +18,62 @@ def project_weights(U: Tensor, G: Tensor, solver: SUPPORTED_SOLVER) -> Tensor:
     :return: A tensor of projection weights with the same shape as `U`.
     """
 
-    G_ = _to_array(G)
-    U_ = _to_array(U)
+    original_shape = U.shape
+    m = G.shape[0]
+    U_flat = U.reshape(-1, m)  # [nBatch, m]
 
-    W = np.apply_along_axis(lambda u: _project_weight_vector(u, G_, solver), axis=-1, arr=U_)
+    W = _project_weight_vector_batch(U_flat, G, solver)
 
-    return torch.as_tensor(W, device=G.device, dtype=G.dtype)
+    return W.reshape(original_shape)
 
 
-def _project_weight_vector(u: np.ndarray, G: np.ndarray, solver: SUPPORTED_SOLVER) -> np.ndarray:
-    r"""
-    Computes the weights `w` of the projection of `J^T u` onto the dual cone of the rows of `J`,
-    given `G = J J^T` and `u`. In other words, this computes the `w` that satisfies
-    `\pi_J(J^T u) = J^T w`, with `\pi_J` defined in Equation 3 of [1].
+# TODO: should merge docstrings appropriately
 
-    By Proposition 1 of [1], this is equivalent to solving for `v` the following quadratic program:
-    minimize        v^T G v
-    subject to      u \preceq v
+# def _project_weight_vector(u: np.ndarray, G: np.ndarray, solver: SUPPORTED_SOLVER) -> np.ndarray:
+#     r"""
+#     Computes the weights `w` of the projection of `J^T u` onto the dual cone of the rows of `J`,
+#     given `G = J J^T` and `u`. In other words, this computes the `w` that satisfies
+#     `\pi_J(J^T u) = J^T w`, with `\pi_J` defined in Equation 3 of [1].
 
-    Reference:
-    [1] `Jacobian Descent For Multi-Objective Optimization <https://arxiv.org/pdf/2406.16232>`_.
+#     By Proposition 1 of [1], this is equivalent to solving for `v` the following quadratic program:
+#     minimize        v^T G v
+#     subject to      u \preceq v
 
-    :param u: The vector of weights `u` of shape `[m]` corresponding to the vector `J^T u` to
-        project.
-    :param G: The Gramian matrix of `J`, equal to `J J^T`, and of shape `[m, m]`. It must be
-        symmetric and positive definite.
-    :param solver: The quadratic programming solver to use.
-    """
+#     Reference:
+#     [1] `Jacobian Descent For Multi-Objective Optimization <https://arxiv.org/pdf/2406.16232>`_.
+
+#     :param u: The vector of weights `u` of shape `[m]` corresponding to the vector `J^T u` to
+#         project.
+#     :param G: The Gramian matrix of `J`, equal to `J J^T`, and of shape `[m, m]`. It must be
+#         symmetric and positive definite.
+#     :param solver: The quadratic programming solver to use.
+#     """
+#     ...
 
-    m = G.shape[0]
-    w = solve_qp(G, np.zeros(m), -np.eye(m), -u, solver=solver)
 
-    if w is None:  # This may happen when G has large values.
-        raise ValueError("Failed to solve the quadratic programming problem.")
+@torch.no_grad()
+def _project_weight_vector_batch(U: Tensor, G: Tensor, _solver: SUPPORTED_SOLVER) -> Tensor:
+    r"""
+    Solves the batch of quadratic programs minimizing `v^T G v` subject to `u_i \preceq v_i` for
+    each row `u_i` of `U`.
+
+    :param U: The tensor of vectors `u_i` of shape `[n, m]`.
+    :param G: The Gramian matrix of shape `[m, m]`. It must be symmetric and positive definite.
+    :param solver: The quadratic programming solver to use.
+    :return: A tensor of projection weights of shape `[n, m]`.
+    """
 
-    return w
+    _, m = U.shape
+    device = U.device
+    dtype = U.dtype
 
+    Q = G.cpu().to(dtype=torch.float64)
+    p = torch.zeros(m, dtype=torch.float64)
+    C = -torch.eye(m, dtype=torch.float64)
+    lb = torch.full((m,), -1e20, dtype=torch.float64)
+    ub = -U.cpu().to(dtype=torch.float64)
 
-def _to_array(tensor: Tensor) -> np.ndarray:
-    """Transforms a tensor into a numpy array with float64 dtype."""
+    solver_fn = QPFunction(structural_feasibility=True)
+    zhats, _, _ = solver_fn(Q, p, torch.Tensor(), torch.Tensor(), C, lb, ub)
 
-    return tensor.cpu().detach().numpy().astype(np.float64)
+    return zhats.to(device=device, dtype=dtype)
diff --git a/tests/unit/aggregation/_utils/test_dual_cone.py b/tests/unit/aggregation/_utils/test_dual_cone.py
index 68a8a75d..2ada2ded 100644
--- a/tests/unit/aggregation/_utils/test_dual_cone.py
+++ b/tests/unit/aggregation/_utils/test_dual_cone.py
@@ -34,7 +34,7 @@ def test_solution_weights(shape: tuple[int, int]) -> None:
     G = J @ J.T
     u = rand_(shape[0])
 
-    w = project_weights(u, G, "quadprog")
+    w = project_weights(u, G, "proxsuite")
     dual_gap = w - u
 
     # Dual feasibility
@@ -63,8 +63,8 @@ def test_scale_invariant(shape: tuple[int, int], scaling: float) -> None:
     G = J @ J.T
     u = rand_(shape[0])
 
-    w = project_weights(u, G, "quadprog")
-    w_scaled = project_weights(u, scaling * G, "quadprog")
+    w = project_weights(u, G, "proxsuite")
+    w_scaled = project_weights(u, scaling * G, "proxsuite")
 
     assert_close(w_scaled, w)
 
@@ -82,8 +82,8 @@ def test_tensorization_shape(shape: tuple[int, ...]) -> None:
 
     G = matrix @ matrix.T
 
-    W_tensor = project_weights(U_tensor, G, "quadprog")
-    W_matrix = project_weights(U_matrix, G, "quadprog")
+    W_tensor = project_weights(U_tensor, G, "proxsuite")
+    W_matrix = project_weights(U_matrix, G, "proxsuite")
 
     assert_close(W_matrix.reshape(shape), W_tensor)
 
@@ -94,4 +94,4 @@ def test_project_weight_vector_failure() -> None:
     large_J = np.random.randn(10, 100) * 1e5
     large_G = large_J @ large_J.T
     with raises(ValueError):
-        _project_weight_vector(np.ones(10), large_G, "quadprog")
+        _project_weight_vector(np.ones(10), large_G, "proxsuite")
diff --git a/tests/unit/aggregation/test_dualproj.py b/tests/unit/aggregation/test_dualproj.py
index 34fe8d46..c8bea16e 100644
--- a/tests/unit/aggregation/test_dualproj.py
+++ b/tests/unit/aggregation/test_dualproj.py
@@ -47,9 +47,9 @@ def test_non_differentiable(aggregator: DualProj, matrix: Tensor) -> None:
 
 
 def test_representations() -> None:
-    A = DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="quadprog")
+    A = DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="proxsuite")
     assert (
-        repr(A) == "DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='quadprog')"
+        repr(A) == "DualProj(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='proxsuite')"
     )
     assert str(A) == "DualProj"
 
@@ -57,11 +57,11 @@ def test_representations() -> None:
         pref_vector=torch.tensor([1.0, 2.0, 3.0], device="cpu"),
         norm_eps=0.0001,
         reg_eps=0.0001,
-        solver="quadprog",
+        solver="proxsuite",
     )
     assert (
         repr(A) == "DualProj(pref_vector=tensor([1., 2., 3.]), norm_eps=0.0001, reg_eps=0.0001, "
-        "solver='quadprog')"
+        "solver='proxsuite')"
     )
     assert str(A) == "DualProj([1., 2., 3.])"
 
diff --git a/tests/unit/aggregation/test_pcgrad.py b/tests/unit/aggregation/test_pcgrad.py
index b776071d..b9c4cf63 100644
--- a/tests/unit/aggregation/test_pcgrad.py
+++ b/tests/unit/aggregation/test_pcgrad.py
@@ -55,7 +55,7 @@ def test_equivalence_upgrad_sum_two_rows(shape: tuple[int, int]) -> None:
         ones_((2,)),
         norm_eps=0.0,
         reg_eps=0.0,
-        solver="quadprog",
+        solver="proxsuite",
     )
 
     result = pc_grad_weighting(gramian)
diff --git a/tests/unit/aggregation/test_upgrad.py b/tests/unit/aggregation/test_upgrad.py
index 075680a0..b2f7df4e 100644
--- a/tests/unit/aggregation/test_upgrad.py
+++ b/tests/unit/aggregation/test_upgrad.py
@@ -53,19 +53,21 @@ def test_non_differentiable(aggregator: UPGrad, matrix: Tensor) -> None:
 
 
 def test_representations() -> None:
-    A = UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="quadprog")
-    assert repr(A) == "UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='quadprog')"
+    A = UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver="proxsuite")
+    assert (
+        repr(A) == "UPGrad(pref_vector=None, norm_eps=0.0001, reg_eps=0.0001, solver='proxsuite')"
+    )
     assert str(A) == "UPGrad"
 
     A = UPGrad(
         pref_vector=torch.tensor([1.0, 2.0, 3.0], device="cpu"),
         norm_eps=0.0001,
         reg_eps=0.0001,
-        solver="quadprog",
+        solver="proxsuite",
     )
     assert (
         repr(A) == "UPGrad(pref_vector=tensor([1., 2., 3.]), norm_eps=0.0001, reg_eps=0.0001, "
-        "solver='quadprog')"
+        "solver='proxsuite')"
     )
     assert str(A) == "UPGrad([1., 2., 3.])"
 

From 952e6171224507dd8f102b721cc3efc3a6d24b0d Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Thu, 7 May 2026 14:04:35 +0200
Subject: [PATCH 3/5] remove `project_weights_vector`

---
 tests/unit/aggregation/_utils/test_dual_cone.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/tests/unit/aggregation/_utils/test_dual_cone.py b/tests/unit/aggregation/_utils/test_dual_cone.py
index 2ada2ded..0fa3e8a0 100644
--- a/tests/unit/aggregation/_utils/test_dual_cone.py
+++ b/tests/unit/aggregation/_utils/test_dual_cone.py
@@ -1,10 +1,9 @@
-import numpy as np
 import torch
-from pytest import mark, raises
+from pytest import mark
 from torch.testing import assert_close
 from utils.tensors import rand_, randn_
 
-from torchjd.aggregation._utils.dual_cone import _project_weight_vector, project_weights
+from torchjd.aggregation._utils.dual_cone import project_weights
 
 
 @mark.parametrize("shape", [(5, 7), (9, 37), (2, 14), (32, 114), (50, 100)])
@@ -88,10 +87,10 @@ def test_tensorization_shape(shape: tuple[int, ...]) -> None:
     assert_close(W_matrix.reshape(shape), W_tensor)
 
 
-def test_project_weight_vector_failure() -> None:
-    """Tests that `_project_weight_vector` raises an error when the input G has too large values."""
+# def test_project_weight_vector_failure() -> None:
+#     """Tests that `_project_weight_vector` raises an error when the input G has too large values."""
 
-    large_J = np.random.randn(10, 100) * 1e5
-    large_G = large_J @ large_J.T
-    with raises(ValueError):
-        _project_weight_vector(np.ones(10), large_G, "proxsuite")
+#     large_J = np.random.randn(10, 100) * 1e5
+#     large_G = large_J @ large_J.T
+#     with raises(ValueError):
+#         _project_weight_vector(np.ones(10), large_G, "proxsuite")

From b1b8c8dbef16433e6aa74dfb02024cbf0ee3305e Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Fri, 8 May 2026 09:24:30 +0200
Subject: [PATCH 4/5] Change from qplayer to BatchQP (lower level)

---
 src/torchjd/aggregation/_utils/dual_cone.py | 38 +++++++++++++++------
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/src/torchjd/aggregation/_utils/dual_cone.py b/src/torchjd/aggregation/_utils/dual_cone.py
index 53756b05..2f4d2693 100644
--- a/src/torchjd/aggregation/_utils/dual_cone.py
+++ b/src/torchjd/aggregation/_utils/dual_cone.py
@@ -1,7 +1,8 @@
 from typing import Literal, TypeAlias
 
+import numpy as np
 import torch
-from proxsuite.torch.qplayer import QPFunction
+from proxsuite import proxqp
 from torch import Tensor
 
 SUPPORTED_SOLVER: TypeAlias = Literal["proxsuite"]
@@ -63,17 +64,34 @@ def _project_weight_vector_batch(U: Tensor, G: Tensor, _solver: SUPPORTED_SOLVER
     :return: A tensor of projection weights of shape `[n, m]`.
     """
 
-    _, m = U.shape
+    n, m = U.shape
     device = U.device
     dtype = U.dtype
 
-    Q = G.cpu().to(dtype=torch.float64)
-    p = torch.zeros(m, dtype=torch.float64)
-    C = -torch.eye(m, dtype=torch.float64)
-    lb = torch.full((m,), -1e20, dtype=torch.float64)
-    ub = -U.cpu().to(dtype=torch.float64)
-
-    solver_fn = QPFunction(structural_feasibility=True)
-    zhats, _, _ = solver_fn(Q, p, torch.Tensor(), torch.Tensor(), C, lb, ub)
+    Q_np = G.cpu().to(dtype=torch.float64).numpy()
+    p_np = np.zeros(m, dtype=np.float64)
+    C_np = -np.eye(m, dtype=np.float64)
+    lb_np = np.full(m, -1e20, dtype=np.float64)
+    ub_np = (-U.cpu().to(dtype=torch.float64)).numpy()
+
+    batch_qps = proxqp.dense.BatchQP()
+    default_rho = 5.0e-5
+
+    for i in range(n):
+        qp = batch_qps.init_qp_in_place(m, 0, m)
+        qp.settings.primal_infeasibility_solving = False
+        qp.settings.max_iter = 1000
+        qp.settings.max_iter_in = 100
+        qp.settings.default_rho = default_rho
+        qp.settings.refactor_rho_threshold = default_rho
+        qp.settings.eps_abs = 1e-9
+        qp.init(H=Q_np, g=p_np, A=None, b=None, C=C_np, l=lb_np, u=ub_np[i], rho=default_rho)
+
+    for i in range(n):
+        batch_qps.get(i).solve()
+
+    zhats = torch.empty((n, m), dtype=torch.float64)
+    for i in range(n):
+        zhats[i] = torch.from_numpy(batch_qps.get(i).results.x)
 
     return zhats.to(device=device, dtype=dtype)

From 8968715a44e46ab3422a1899c2347bd1481beafa Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Sun, 10 May 2026 11:41:28 +0200
Subject: [PATCH 5/5] Make proxsuite run QPs in parallel, put the cast of
 result to tensor at the end.

---
 src/torchjd/aggregation/_utils/dual_cone.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/torchjd/aggregation/_utils/dual_cone.py b/src/torchjd/aggregation/_utils/dual_cone.py
index 2f4d2693..ae25bdbd 100644
--- a/src/torchjd/aggregation/_utils/dual_cone.py
+++ b/src/torchjd/aggregation/_utils/dual_cone.py
@@ -1,3 +1,4 @@
+import os
 from typing import Literal, TypeAlias
 
 import numpy as np
@@ -87,11 +88,11 @@ def _project_weight_vector_batch(U: Tensor, G: Tensor, _solver: SUPPORTED_SOLVER
         qp.settings.eps_abs = 1e-9
         qp.init(H=Q_np, g=p_np, A=None, b=None, C=C_np, l=lb_np, u=ub_np[i], rho=default_rho)
 
-    for i in range(n):
-        batch_qps.get(i).solve()
+    num_threads = max(1, (os.cpu_count() or 2) // 2)
+    proxqp.dense.solve_in_parallel(num_threads=num_threads, qps=batch_qps)
 
-    zhats = torch.empty((n, m), dtype=torch.float64)
+    zhats_np = np.empty((n, m), dtype=np.float64)
     for i in range(n):
-        zhats[i] = torch.from_numpy(batch_qps.get(i).results.x)
+        zhats_np[i] = batch_qps.get(i).results.x
 
-    return zhats.to(device=device, dtype=dtype)
+    return torch.from_numpy(zhats_np).to(device=device, dtype=dtype)