From b758b9f3424e6599987b93a140e2513c9ea76a9b Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 11 Feb 2026 04:12:07 -0800 Subject: [PATCH 01/44] fix timer issues --- cpp/src/dual_simplex/branch_and_bound.cpp | 90 +++++++---- cpp/src/dual_simplex/cuts.cpp | 142 ++++++++++++------ cpp/src/dual_simplex/cuts.hpp | 49 +++--- cpp/src/dual_simplex/pseudo_costs.cpp | 15 +- cpp/src/mip/solver.cu | 2 + .../unit_tests/presolve_test.cu | 2 - 6 files changed, 204 insertions(+), 96 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index e74f69d13..176ff719d 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1989,12 +1989,9 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (root_status == lp_status_t::INFEASIBLE) { settings_.log.printf("MIP Infeasible\n"); - // FIXME: rarely dual simplex detects infeasible whereas it is feasible. - // to add a small safety net, check if there is a primal solution already. - // Uncomment this if the issue with cost266-UUE is resolved - // if (settings.heuristic_preemption_callback != nullptr) { - // settings.heuristic_preemption_callback(); - // } + if (settings_.heuristic_preemption_callback != nullptr) { + settings_.heuristic_preemption_callback(); + } return mip_status_t::INFEASIBLE; } if (root_status == lp_status_t::UNBOUNDED) { @@ -2074,9 +2071,19 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut f_t last_upper_bound = std::numeric_limits::infinity(); f_t last_objective = root_objective_; f_t root_relax_objective = root_objective_; + auto stop_for_time_limit = [&]() -> bool { + const f_t elapsed = toc(exploration_stats_.start_time); + if (elapsed > settings_.time_limit) { + solver_status_ = mip_status_t::TIME_LIMIT; + set_final_solution(solution, root_objective_); + return true; + } + return false; + }; i_t cut_pool_size = 0; for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) { + if (stop_for_time_limit()) { return solver_status_; } if (num_fractional == 0) { set_solution_at_root(solution, cut_info); return mip_status_t::OPTIMAL; @@ -2094,6 +2101,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut #endif // Generate cuts and add them to the cut pool + if (stop_for_time_limit()) { return solver_status_; } f_t cut_start_time = tic(); cut_generation.generate_cuts(original_lp_, settings_, @@ -2103,21 +2111,27 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut basis_update, root_relax_soln_.x, basic_list, - nonbasic_list); + nonbasic_list, + exploration_stats_.start_time); + if (stop_for_time_limit()) { return solver_status_; } f_t cut_generation_time = toc(cut_start_time); if (cut_generation_time > 1.0) { settings_.log.debug("Cut generation time %.2f seconds\n", cut_generation_time); } // Score the cuts + if (stop_for_time_limit()) { return solver_status_; } f_t score_start_time = tic(); - cut_pool.score_cuts(root_relax_soln_.x); + cut_pool.score_cuts(root_relax_soln_.x, exploration_stats_.start_time); + if (stop_for_time_limit()) { return solver_status_; } f_t score_time = toc(score_start_time); if (score_time > 1.0) { settings_.log.debug("Cut scoring time %.2f seconds\n", score_time); } // Get the best cuts from the cut pool csr_matrix_t cuts_to_add(0, original_lp_.num_cols, 0); std::vector cut_rhs; std::vector cut_types; - i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types); + i_t num_cuts = + cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types, exploration_stats_.start_time); + if (stop_for_time_limit()) { return solver_status_; } if (num_cuts == 0) { break; } cut_info.record_cut_types(cut_types); #ifdef PRINT_CUT_POOL_TYPES @@ -2150,6 +2164,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut cuts_to_add.m + original_lp_.num_rows); lp_settings.log.log = false; + if (stop_for_time_limit()) { return solver_status_; } f_t add_cuts_start_time = tic(); mutex_original_lp_.lock(); i_t add_cuts_status = add_cuts(settings_, @@ -2162,14 +2177,20 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut basic_list, nonbasic_list, root_vstatus_, - edge_norms_); + edge_norms_, + exploration_stats_.start_time); var_types_.resize(original_lp_.num_cols, variable_type_t::CONTINUOUS); mutex_original_lp_.unlock(); + if (stop_for_time_limit()) { return solver_status_; } f_t add_cuts_time = toc(add_cuts_start_time); if (add_cuts_time > 1.0) { settings_.log.debug("Add cuts time %.2f seconds\n", add_cuts_time); } - if (add_cuts_status != 0) { + if (add_cuts_status == -2) { + solver_status_ = mip_status_t::TIME_LIMIT; + set_final_solution(solution, root_objective_); + return solver_status_; + } else if (add_cuts_status != 0) { settings_.log.printf("Failed to add cuts\n"); return mip_status_t::NUMERICAL; } @@ -2196,6 +2217,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut #endif original_lp_.A.to_compressed_row(Arow_); + if (stop_for_time_limit()) { return solver_status_; } f_t node_presolve_start_time = tic(); bounds_strengthening_t node_presolve(original_lp_, Arow_, row_sense, var_types_); std::vector new_lower = original_lp_.lower; @@ -2206,6 +2228,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut original_lp_.lower = new_lower; original_lp_.upper = new_upper; mutex_original_lp_.unlock(); + if (stop_for_time_limit()) { return solver_status_; } f_t node_presolve_time = toc(node_presolve_start_time); if (node_presolve_time > 1.0) { settings_.log.debug("Node presolve time %.2f seconds\n", node_presolve_time); @@ -2218,8 +2241,9 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut i_t iter = 0; bool initialize_basis = false; lp_settings.concurrent_halt = NULL; - f_t dual_phase2_start_time = tic(); - dual::status_t cut_status = dual_phase2_with_advanced_basis(2, + if (stop_for_time_limit()) { return solver_status_; } + f_t dual_phase2_start_time = tic(); + dual::status_t cut_status = dual_phase2_with_advanced_basis(2, 0, initialize_basis, exploration_stats_.start_time, @@ -2238,6 +2262,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (dual_phase2_time > 1.0) { settings_.log.debug("Dual phase2 time %.2f seconds\n", dual_phase2_time); } + if (stop_for_time_limit()) { return solver_status_; } if (cut_status == dual::status_t::TIME_LIMIT) { solver_status_ = mip_status_t::TIME_LIMIT; set_final_solution(solution, root_objective_); @@ -2245,6 +2270,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } if (cut_status != dual::status_t::OPTIMAL) { + if (stop_for_time_limit()) { return solver_status_; } settings_.log.printf("Numerical issue at root node. Resolving from scratch\n"); lp_status_t scratch_status = solve_linear_program_with_advanced_basis(original_lp_, @@ -2256,6 +2282,11 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut nonbasic_list, root_vstatus_, edge_norms_); + if (stop_for_time_limit() || scratch_status == lp_status_t::TIME_LIMIT) { + solver_status_ = mip_status_t::TIME_LIMIT; + set_final_solution(solution, root_objective_); + return solver_status_; + } if (scratch_status == lp_status_t::OPTIMAL) { // We recovered cut_status = convert_lp_status_to_dual_status(scratch_status); @@ -2267,23 +2298,26 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } } + if (stop_for_time_limit()) { return solver_status_; } f_t remove_cuts_start_time = tic(); mutex_original_lp_.lock(); - remove_cuts(original_lp_, - settings_, - Arow_, - new_slacks_, - original_rows, - var_types_, - root_vstatus_, - edge_norms_, - root_relax_soln_.x, - root_relax_soln_.y, - root_relax_soln_.z, - basic_list, - nonbasic_list, - basis_update); + i_t remove_cuts_status = remove_cuts(original_lp_, + settings_, + Arow_, + new_slacks_, + original_rows, + var_types_, + root_vstatus_, + edge_norms_, + root_relax_soln_.x, + root_relax_soln_.y, + root_relax_soln_.z, + basic_list, + nonbasic_list, + basis_update, + exploration_stats_.start_time); mutex_original_lp_.unlock(); + if (stop_for_time_limit()) { return solver_status_; } f_t remove_cuts_time = toc(remove_cuts_start_time); if (remove_cuts_time > 1.0) { settings_.log.debug("Remove cuts time %.2f seconds\n", remove_cuts_time); @@ -2335,7 +2369,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_); pc_.resize(original_lp_.num_cols); - { + if (toc(exploration_stats_.start_time) < settings_.time_limit) { raft::common::nvtx::range scope_sb("BB::strong_branching"); strong_branching(original_problem_, original_lp_, diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 0bb4e1744..ced10be89 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -95,7 +95,7 @@ f_t cut_pool_t::cut_orthogonality(i_t i, i_t j) } template -void cut_pool_t::score_cuts(std::vector& x_relax) +void cut_pool_t::score_cuts(std::vector& x_relax, f_t start_time) { const f_t min_cut_distance = 1e-4; cut_distances_.resize(cut_storage_.m, 0.0); @@ -103,6 +103,11 @@ void cut_pool_t::score_cuts(std::vector& x_relax) const bool verbose = false; for (i_t i = 0; i < cut_storage_.m; i++) { + if (toc(start_time) >= settings_.time_limit) { + best_cuts_.clear(); + scored_cuts_ = 0; + return; + } f_t violation; f_t cut_dist = cut_distance(i, x_relax, violation, cut_norms_[i]); cut_distances_[i] = cut_dist <= min_cut_distance ? 0.0 : cut_dist; @@ -133,6 +138,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) } while (scored_cuts_ < max_cuts && !sorted_indices.empty()) { + if (toc(start_time) >= settings_.time_limit) { return; } const i_t i = sorted_indices.back(); sorted_indices.pop_back(); @@ -141,6 +147,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) f_t cut_ortho = 1.0; const i_t best_cuts_size = best_cuts_.size(); for (i_t k = 0; k < best_cuts_size; k++) { + if (toc(start_time) >= settings_.time_limit) { return; } const i_t j = best_cuts_[k]; cut_ortho = std::min(cut_ortho, cut_orthogonality(i, j)); } @@ -154,7 +161,8 @@ void cut_pool_t::score_cuts(std::vector& x_relax) template i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, std::vector& best_rhs, - std::vector& best_cut_types) + std::vector& best_cut_types, + f_t start_time) { best_cuts.m = 0; best_cuts.n = original_vars_; @@ -168,7 +176,9 @@ i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, best_cut_types.clear(); best_cut_types.reserve(scored_cuts_); - for (i_t i : best_cuts_) { + for (i_t k = 0; k < static_cast(best_cuts_.size()); ++k) { + if (toc(start_time) >= settings_.time_limit) { break; } + const i_t i = best_cuts_[k]; sparse_vector_t cut(cut_storage_, i); cut.negate(); best_cuts.append_row(cut); @@ -559,33 +569,46 @@ void cut_generation_t::generate_cuts(const lp_problem_t& lp, basis_update_mpf_t& basis_update, const std::vector& xstar, const std::vector& basic_list, - const std::vector& nonbasic_list) + const std::vector& nonbasic_list, + f_t start_time) { + if (toc(start_time) >= settings.time_limit) { return; } + // Generate Gomory and CG Cuts if (settings.mixed_integer_gomory_cuts != 0 || settings.strong_chvatal_gomory_cuts != 0) { f_t cut_start_time = tic(); - generate_gomory_cuts( - lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list); + generate_gomory_cuts(lp, + settings, + Arow, + new_slacks, + var_types, + basis_update, + xstar, + basic_list, + nonbasic_list, + start_time); f_t cut_generation_time = toc(cut_start_time); if (cut_generation_time > 1.0) { settings.log.debug("Gomory and CG cut generation time %.2f seconds\n", cut_generation_time); } } + if (toc(start_time) >= settings.time_limit) { return; } // Generate Knapsack cuts if (settings.knapsack_cuts != 0) { f_t cut_start_time = tic(); - generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar); + generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar, start_time); f_t cut_generation_time = toc(cut_start_time); if (cut_generation_time > 1.0) { settings.log.debug("Knapsack cut generation time %.2f seconds\n", cut_generation_time); } } + if (toc(start_time) >= settings.time_limit) { return; } // Generate MIR and CG cuts if (settings.mir_cuts != 0 || settings.strong_chvatal_gomory_cuts != 0) { f_t cut_start_time = tic(); - generate_mir_cuts(lp, settings, Arow, new_slacks, var_types, xstar); + generate_mir_cuts(lp, settings, Arow, new_slacks, var_types, xstar, start_time); f_t cut_generation_time = toc(cut_start_time); if (cut_generation_time > 1.0) { settings.log.debug("MIR and CG cut generation time %.2f seconds\n", cut_generation_time); @@ -600,10 +623,12 @@ void cut_generation_t::generate_knapsack_cuts( csr_matrix_t& Arow, const std::vector& new_slacks, const std::vector& var_types, - const std::vector& xstar) + const std::vector& xstar, + f_t start_time) { if (knapsack_generation_.num_knapsack_constraints() > 0) { for (i_t knapsack_row : knapsack_generation_.get_knapsack_constraints()) { + if (toc(start_time) >= settings.time_limit) { return; } sparse_vector_t cut(lp.num_cols, 0); f_t cut_rhs; i_t knapsack_status = knapsack_generation_.generate_knapsack_cuts( @@ -620,8 +645,10 @@ void cut_generation_t::generate_mir_cuts( csr_matrix_t& Arow, const std::vector& new_slacks, const std::vector& var_types, - const std::vector& xstar) + const std::vector& xstar, + f_t start_time) { + if (toc(start_time) >= settings.time_limit) { return; } f_t mir_start_time = tic(); mixed_integer_rounding_cut_t mir(lp, settings, new_slacks, xstar); strong_cg_cut_t cg(lp, var_types, xstar); @@ -639,6 +666,7 @@ void cut_generation_t::generate_mir_cuts( // Compute initial scores for all rows std::vector score(lp.num_rows, 0.0); for (i_t i = 0; i < lp.num_rows; i++) { + if (toc(start_time) >= settings.time_limit) { return; } const i_t row_start = Arow.row_start[i]; const i_t row_end = Arow.row_start[i + 1]; @@ -688,6 +716,7 @@ void cut_generation_t::generate_mir_cuts( const i_t max_cuts = std::min(lp.num_rows, 1000); f_t work_estimate = 0.0; for (i_t h = 0; h < max_cuts; h++) { + if (toc(start_time) >= settings.time_limit) { return; } // Get the row with the highest score const i_t i = sorted_indices.back(); sorted_indices.pop_back(); @@ -768,6 +797,7 @@ void cut_generation_t::generate_mir_cuts( work_estimate += lp.num_cols; while (!add_cut && num_aggregated < max_aggregated) { + if (toc(start_time) >= settings.time_limit) { return; } sparse_vector_t transformed_inequality; inequality.squeeze(transformed_inequality); f_t transformed_rhs = inequality_rhs; @@ -979,13 +1009,15 @@ void cut_generation_t::generate_gomory_cuts( basis_update_mpf_t& basis_update, const std::vector& xstar, const std::vector& basic_list, - const std::vector& nonbasic_list) + const std::vector& nonbasic_list, + f_t start_time) { tableau_equality_t tableau(lp, basis_update, nonbasic_list); mixed_integer_rounding_cut_t mir(lp, settings, new_slacks, xstar); strong_cg_cut_t cg(lp, var_types, xstar); for (i_t i = 0; i < lp.num_rows; i++) { + if (toc(start_time) >= settings.time_limit) { return; } sparse_vector_t inequality(lp.num_cols, 0); f_t inequality_rhs; const i_t j = basic_list[i]; @@ -2312,9 +2344,13 @@ i_t add_cuts(const simplex_solver_settings_t& settings, std::vector& basic_list, std::vector& nonbasic_list, std::vector& vstatus, - std::vector& edge_norms) + std::vector& edge_norms, + f_t start_time) { + constexpr i_t time_limit_status = -2; + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } + // Given a set of cuts: C*x <= d that are currently violated // by the current solution x* (i.e. C*x* > d), this function // adds the cuts into the LP and solves again. @@ -2342,6 +2378,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, settings.log.debug("Original lp rows %d\n", lp.num_rows); settings.log.debug("Original lp cols %d\n", lp.num_cols); + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } csr_matrix_t new_A_row(lp.num_rows, lp.num_cols, 1); lp.A.to_compressed_row(new_A_row); @@ -2351,6 +2388,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, assert(append_status == 0); } + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } csc_matrix_t new_A_col(lp.num_rows + p, lp.num_cols, 1); new_A_row.to_compressed_col(new_A_col); @@ -2405,6 +2443,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, settings.log.debug("Done adding rhs\n"); // Construct C_B = C(:, basic_list) + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } std::vector C_col_degree(lp.num_cols, 0); i_t cuts_nz = cuts.row_start[p]; for (i_t q = 0; q < cuts_nz; q++) { @@ -2434,6 +2473,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, } settings.log.debug("Done estimating C_B_nz\n"); + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } csr_matrix_t C_B(p, num_basic, C_B_nz); nz = 0; for (i_t i = 0; i < p; i++) { @@ -2458,6 +2498,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, settings.log.debug("C_B rows %d cols %d nz %d\n", C_B.m, C_B.n, nz); // Adjust the basis update to include the new cuts + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } basis_update.append_cuts(C_B); basic_list.resize(lp.num_rows, 0); @@ -2495,25 +2536,30 @@ i_t add_cuts(const simplex_solver_settings_t& settings, solution.y.resize(lp.num_rows, 0.0); solution.z.resize(lp.num_cols, 0.0); + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } return 0; } template -void remove_cuts(lp_problem_t& lp, - const simplex_solver_settings_t& settings, - csr_matrix_t& Arow, - std::vector& new_slacks, - i_t original_rows, - std::vector& var_types, - std::vector& vstatus, - std::vector& edge_norms, - std::vector& x, - std::vector& y, - std::vector& z, - std::vector& basic_list, - std::vector& nonbasic_list, - basis_update_mpf_t& basis_update) +i_t remove_cuts(lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csr_matrix_t& Arow, + std::vector& new_slacks, + i_t original_rows, + std::vector& var_types, + std::vector& vstatus, + std::vector& edge_norms, + std::vector& x, + std::vector& y, + std::vector& z, + std::vector& basic_list, + std::vector& nonbasic_list, + basis_update_mpf_t& basis_update, + f_t start_time) { + constexpr i_t time_limit_status = -2; + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } + std::vector cuts_to_remove; cuts_to_remove.reserve(lp.num_rows - original_rows); std::vector slacks_to_remove; @@ -2522,6 +2568,7 @@ void remove_cuts(lp_problem_t& lp, std::vector is_slack(lp.num_cols, 0); for (i_t j : new_slacks) { + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } is_slack[j] = 1; #ifdef CHECK_SLACKS // Check that slack column length is 1 @@ -2536,12 +2583,14 @@ void remove_cuts(lp_problem_t& lp, } for (i_t k = original_rows; k < lp.num_rows; k++) { + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } if (std::abs(y[k]) < dual_tol) { const i_t row_start = Arow.row_start[k]; const i_t row_end = Arow.row_start[k + 1]; i_t last_slack = -1; const f_t slack_tol = 1e-3; for (i_t p = row_start; p < row_end; p++) { + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } const i_t j = Arow.j[p]; if (is_slack[j]) { if (vstatus[j] == variable_status_t::BASIC && x[j] > slack_tol) { last_slack = j; } @@ -2555,6 +2604,7 @@ void remove_cuts(lp_problem_t& lp, } if (cuts_to_remove.size() > 0) { + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } std::vector marked_rows(lp.num_rows, 0); for (i_t i : cuts_to_remove) { marked_rows[i] = 1; @@ -2568,6 +2618,7 @@ void remove_cuts(lp_problem_t& lp, std::vector new_solution_y(lp.num_rows - cuts_to_remove.size()); i_t h = 0; for (i_t i = 0; i < lp.num_rows; i++) { + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } if (!marked_rows[i]) { new_rhs[h] = lp.rhs[i]; new_solution_y[h] = y[i]; @@ -2594,6 +2645,7 @@ void remove_cuts(lp_problem_t& lp, std::vector new_is_slacks(lp.num_cols - slacks_to_remove.size(), 0); h = 0; for (i_t k = 0; k < lp.num_cols; k++) { + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } if (!marked_cols[k]) { new_objective[h] = lp.objective[k]; new_lower[h] = lp.lower[k]; @@ -2641,10 +2693,14 @@ void remove_cuts(lp_problem_t& lp, lp.num_cols, lp.A.col_start[lp.A.n]); + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } basis_update.resize(lp.num_rows); basis_update.refactor_basis( lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus); } + + if (toc(start_time) >= settings.time_limit) { return time_limit_status; } + return 0; } template @@ -2789,22 +2845,24 @@ template int add_cuts(const simplex_solver_settings_t& settings, std::vector& basic_list, std::vector& nonbasic_list, std::vector& vstatus, - std::vector& edge_norms); - -template void remove_cuts(lp_problem_t& lp, - const simplex_solver_settings_t& settings, - csr_matrix_t& Arow, - std::vector& new_slacks, - int original_rows, - std::vector& var_types, - std::vector& vstatus, - std::vector& edge_norms, - std::vector& x, - std::vector& y, - std::vector& z, - std::vector& basic_list, - std::vector& nonbasic_list, - basis_update_mpf_t& basis_update); + std::vector& edge_norms, + double start_time); + +template int remove_cuts(lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csr_matrix_t& Arow, + std::vector& new_slacks, + int original_rows, + std::vector& var_types, + std::vector& vstatus, + std::vector& edge_norms, + std::vector& x, + std::vector& y, + std::vector& z, + std::vector& basic_list, + std::vector& nonbasic_list, + basis_update_mpf_t& basis_update, + double start_time); template void read_saved_solution_for_cut_verification( const lp_problem_t& lp, diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp index a4a36d75b..17f5e032a 100644 --- a/cpp/src/dual_simplex/cuts.hpp +++ b/cpp/src/dual_simplex/cuts.hpp @@ -138,12 +138,13 @@ class cut_pool_t { // cut'*xstart < rhs void add_cut(cut_type_t cut_type, const sparse_vector_t& cut, f_t rhs); - void score_cuts(std::vector& x_relax); + void score_cuts(std::vector& x_relax, f_t start_time); // We return the cuts in the form best_cuts*x <= best_rhs i_t get_best_cuts(csr_matrix_t& best_cuts, std::vector& best_rhs, - std::vector& best_cut_types); + std::vector& best_cut_types, + f_t start_time); void age_cuts(); @@ -239,7 +240,8 @@ class cut_generation_t { basis_update_mpf_t& basis_update, const std::vector& xstar, const std::vector& basic_list, - const std::vector& nonbasic_list); + const std::vector& nonbasic_list, + f_t start_time); private: // Generate all mixed integer gomory cuts @@ -251,7 +253,8 @@ class cut_generation_t { basis_update_mpf_t& basis_update, const std::vector& xstar, const std::vector& basic_list, - const std::vector& nonbasic_list); + const std::vector& nonbasic_list, + f_t start_time); // Generate all mixed integer rounding cuts void generate_mir_cuts(const lp_problem_t& lp, @@ -259,7 +262,8 @@ class cut_generation_t { csr_matrix_t& Arow, const std::vector& new_slacks, const std::vector& var_types, - const std::vector& xstar); + const std::vector& xstar, + f_t start_time); // Generate all knapsack cuts void generate_knapsack_cuts(const lp_problem_t& lp, @@ -267,7 +271,8 @@ class cut_generation_t { csr_matrix_t& Arow, const std::vector& new_slacks, const std::vector& var_types, - const std::vector& xstar); + const std::vector& xstar, + f_t start_time); cut_pool_t& cut_pool_; knapsack_generation_t knapsack_generation_; @@ -458,22 +463,24 @@ i_t add_cuts(const simplex_solver_settings_t& settings, std::vector& basic_list, std::vector& nonbasic_list, std::vector& vstatus, - std::vector& edge_norms); + std::vector& edge_norms, + f_t start_time); template -void remove_cuts(lp_problem_t& lp, - const simplex_solver_settings_t& settings, - csr_matrix_t& Arow, - std::vector& new_slacks, - i_t original_rows, - std::vector& var_types, - std::vector& vstatus, - std::vector& edge_norms, - std::vector& x, - std::vector& y, - std::vector& z, - std::vector& basic_list, - std::vector& nonbasic_list, - basis_update_mpf_t& basis_update); +i_t remove_cuts(lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csr_matrix_t& Arow, + std::vector& new_slacks, + i_t original_rows, + std::vector& var_types, + std::vector& vstatus, + std::vector& edge_norms, + std::vector& x, + std::vector& y, + std::vector& z, + std::vector& basic_list, + std::vector& nonbasic_list, + basis_update_mpf_t& basis_update, + f_t start_time); } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp index de8994e80..40949391e 100644 --- a/cpp/src/dual_simplex/pseudo_costs.cpp +++ b/cpp/src/dual_simplex/pseudo_costs.cpp @@ -313,6 +313,9 @@ void strong_branching(const user_problem_t& original_problem, pc.strong_branch_up.assign(fractional.size(), 0); pc.num_strong_branches_completed = 0; + const f_t elapsed_time = toc(start_time); + if (elapsed_time > settings.time_limit) { return; } + if (settings.mip_batch_pdlp_strong_branching) { settings.log.printf("Batch PDLP strong branching enabled\n"); @@ -333,9 +336,15 @@ void strong_branching(const user_problem_t& original_problem, fraction_values.push_back(original_root_soln_x[j]); } - const auto mps_model = simplex_problem_to_mps_data_model(original_problem); - const auto solutions = - batch_pdlp_solve(original_problem.handle_ptr, mps_model, fractional, fraction_values); + const auto mps_model = simplex_problem_to_mps_data_model(original_problem); + const f_t batch_elapsed_time = toc(start_time); + const f_t batch_remaining_time = + std::max(static_cast(0.0), settings.time_limit - batch_elapsed_time); + if (batch_remaining_time <= 0.0) { return; } + pdlp_solver_settings_t pdlp_settings; + pdlp_settings.time_limit = batch_remaining_time; + const auto solutions = batch_pdlp_solve( + original_problem.handle_ptr, mps_model, fractional, fraction_values, pdlp_settings); f_t batch_pdlp_strong_branching_time = toc(start_batch); // Find max iteration on how many are done accross the batch diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 21fa93bfa..201cf3b62 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -310,9 +310,11 @@ solution_t mip_solver_t::run_solver() if (!is_feasible.value(sol.handle_ptr->get_stream())) { CUOPT_LOG_ERROR( "Solution is not feasible due to variable bounds, returning infeasible solution!"); + context.stats.total_solve_time = timer_.elapsed_time(); context.problem_ptr->post_process_solution(sol); return sol; } + context.stats.total_solve_time = timer_.elapsed_time(); context.problem_ptr->post_process_solution(sol); dm.rins.stop_rins(); return sol; diff --git a/cpp/tests/linear_programming/unit_tests/presolve_test.cu b/cpp/tests/linear_programming/unit_tests/presolve_test.cu index 3fd05d97f..77a26cff7 100644 --- a/cpp/tests/linear_programming/unit_tests/presolve_test.cu +++ b/cpp/tests/linear_programming/unit_tests/presolve_test.cu @@ -166,7 +166,6 @@ TEST(pslp_presolve, postsolve_accuracy_afiro) TEST(pslp_presolve, postsolve_dual_accuracy_afiro) { const raft::handle_t handle_{}; - constexpr double tolerance = 1e-5; auto path = make_path_absolute("linear_programming/afiro_original.mps"); auto mps_data_model = cuopt::mps_parser::parse_mps(path, true); @@ -351,7 +350,6 @@ TEST(pslp_presolve, postsolve_reduced_costs) TEST(pslp_presolve, postsolve_multiple_problems) { const raft::handle_t handle_{}; - constexpr double tolerance = 1e-4; std::vector> instances{ {"afiro_original", -464.75314}, From 0392a6247a4402e0fca3ce3c82e97bff9f6456ae Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Wed, 11 Feb 2026 15:30:43 +0000 Subject: [PATCH 02/44] disable jobserver flag when not actually using jobserver --- build.sh | 10 +++++++++- cpp/CMakeLists.txt | 4 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/build.sh b/build.sh index b5c35f510..f790ae16d 100755 --- a/build.sh +++ b/build.sh @@ -360,6 +360,13 @@ fi ################################################################################ # Configure, build, and install libcuopt if buildAll || hasArg libcuopt; then + # Enable nvcc --jobserver only when building through the Makefile, + # which starts a GNU Make jobserver that nvcc can participate in. + if hasArg -n; then + USE_NVCC_JOBSERVER=ON + else + USE_NVCC_JOBSERVER=OFF + fi mkdir -p "${LIBCUOPT_BUILD_DIR}" cd "${LIBCUOPT_BUILD_DIR}" cmake -DDEFINE_ASSERT=${DEFINE_ASSERT} \ @@ -381,12 +388,13 @@ if buildAll || hasArg libcuopt; then -DWRITE_FATBIN=${WRITE_FATBIN} \ -DHOST_LINEINFO=${HOST_LINEINFO} \ -DPARALLEL_LEVEL="${PARALLEL_LEVEL}" \ + -DUSE_NVCC_JOBSERVER="${USE_NVCC_JOBSERVER}" \ -DINSTALL_TARGET="${INSTALL_TARGET}" \ "${CACHE_ARGS[@]}" \ "${EXTRA_CMAKE_ARGS[@]}" \ "${REPODIR}"/cpp JFLAG="${PARALLEL_LEVEL:+-j${PARALLEL_LEVEL}}" - if hasArg -n; then + if [ "${USE_NVCC_JOBSERVER}" = "ON" ]; then # Manual make invocation to start its jobserver make ${JFLAG} -C "${REPODIR}/cpp" LIBCUOPT_BUILD_DIR="${LIBCUOPT_BUILD_DIR}" VERBOSE_FLAG="${VERBOSE_FLAG}" PARALLEL_LEVEL="${PARALLEL_LEVEL}" ninja-build else diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index af7b7c1c3..aa377b315 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -153,11 +153,11 @@ if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.9 AND CMAKE_CUDA_COMPILE endif() list(APPEND CUOPT_CUDA_FLAGS -fopenmp) -# Add jobserver flags for parallel compilation if PARALLEL_LEVEL is set +# Add parallel compilation flags if PARALLEL_LEVEL is set if(PARALLEL_LEVEL AND NOT "${PARALLEL_LEVEL}" STREQUAL "") message(STATUS "Enabling nvcc parallel compilation support") list(APPEND CUOPT_CUDA_FLAGS --threads=0 --split-compile=0) - if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) + if(USE_NVCC_JOBSERVER AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) message(STATUS "Enabling nvcc jobserver support (NVCC >= 13.0)") list(APPEND CUOPT_CUDA_FLAGS --jobserver) endif() From ee544770a1cfbf7642e527957ffdea40664c2da1 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Wed, 11 Feb 2026 15:58:10 +0000 Subject: [PATCH 03/44] disable jobserver unless explicitely requested --- build.sh | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/build.sh b/build.sh index f790ae16d..927acd664 100755 --- a/build.sh +++ b/build.sh @@ -362,11 +362,7 @@ fi if buildAll || hasArg libcuopt; then # Enable nvcc --jobserver only when building through the Makefile, # which starts a GNU Make jobserver that nvcc can participate in. - if hasArg -n; then - USE_NVCC_JOBSERVER=ON - else - USE_NVCC_JOBSERVER=OFF - fi + USE_NVCC_JOBSERVER=${USE_NVCC_JOBSERVER:-OFF} mkdir -p "${LIBCUOPT_BUILD_DIR}" cd "${LIBCUOPT_BUILD_DIR}" cmake -DDEFINE_ASSERT=${DEFINE_ASSERT} \ From f876fc09db2fe5b6c03612d8a85a863a470b2838 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Wed, 11 Feb 2026 16:23:22 +0000 Subject: [PATCH 04/44] better workaround fix build --- build.sh | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/build.sh b/build.sh index 927acd664..b5c35f510 100755 --- a/build.sh +++ b/build.sh @@ -360,9 +360,6 @@ fi ################################################################################ # Configure, build, and install libcuopt if buildAll || hasArg libcuopt; then - # Enable nvcc --jobserver only when building through the Makefile, - # which starts a GNU Make jobserver that nvcc can participate in. - USE_NVCC_JOBSERVER=${USE_NVCC_JOBSERVER:-OFF} mkdir -p "${LIBCUOPT_BUILD_DIR}" cd "${LIBCUOPT_BUILD_DIR}" cmake -DDEFINE_ASSERT=${DEFINE_ASSERT} \ @@ -384,13 +381,12 @@ if buildAll || hasArg libcuopt; then -DWRITE_FATBIN=${WRITE_FATBIN} \ -DHOST_LINEINFO=${HOST_LINEINFO} \ -DPARALLEL_LEVEL="${PARALLEL_LEVEL}" \ - -DUSE_NVCC_JOBSERVER="${USE_NVCC_JOBSERVER}" \ -DINSTALL_TARGET="${INSTALL_TARGET}" \ "${CACHE_ARGS[@]}" \ "${EXTRA_CMAKE_ARGS[@]}" \ "${REPODIR}"/cpp JFLAG="${PARALLEL_LEVEL:+-j${PARALLEL_LEVEL}}" - if [ "${USE_NVCC_JOBSERVER}" = "ON" ]; then + if hasArg -n; then # Manual make invocation to start its jobserver make ${JFLAG} -C "${REPODIR}/cpp" LIBCUOPT_BUILD_DIR="${LIBCUOPT_BUILD_DIR}" VERBOSE_FLAG="${VERBOSE_FLAG}" PARALLEL_LEVEL="${PARALLEL_LEVEL}" ninja-build else From 4bbf743b0f4de3c51c515518dfcb85611c67f3fe Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Mon, 16 Feb 2026 05:16:39 -0800 Subject: [PATCH 05/44] scaling test --- cpp/include/cuopt/linear_programming/mip/solver_settings.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 6d32cd5ed..60534a36f 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -107,7 +107,7 @@ class mip_solver_settings_t { /** Initial primal solutions */ std::vector>> initial_solutions; - bool mip_scaling = false; + bool mip_scaling = true; presolver_t presolver{presolver_t::Default}; /** * @brief Determinism mode for MIP solver. From 7de08d2dfd124bf5fba7faa3ea391c3cf08c85f8 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Tue, 17 Feb 2026 02:45:54 -0800 Subject: [PATCH 06/44] add timers to right_looking_lu and refactoring the basis --- cpp/src/branch_and_bound/branch_and_bound.cpp | 18 +++++- cpp/src/cuts/cuts.cpp | 6 +- cpp/src/dual_simplex/basis_solves.cpp | 20 ++++-- cpp/src/dual_simplex/basis_solves.hpp | 3 +- cpp/src/dual_simplex/basis_updates.cpp | 11 +++- cpp/src/dual_simplex/basis_updates.hpp | 3 +- cpp/src/dual_simplex/crossover.cpp | 61 +++++++++++-------- cpp/src/dual_simplex/phase2.cpp | 31 +++++++--- cpp/src/dual_simplex/presolve.cpp | 2 + cpp/src/dual_simplex/primal.cpp | 19 ++++-- cpp/src/dual_simplex/right_looking_lu.cpp | 20 +++--- cpp/src/dual_simplex/right_looking_lu.hpp | 3 +- cpp/src/dual_simplex/solve.cpp | 2 + cpp/src/dual_simplex/types.hpp | 2 + 14 files changed, 134 insertions(+), 67 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 842845b0d..bd06dd3a0 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1888,8 +1888,13 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( original_lp_.upper, basic_list, nonbasic_list, - crossover_vstatus_); - if (refactor_status != 0) { + crossover_vstatus_, + exploration_stats_.start_time); + if (refactor_status == TIME_LIMIT_RETURN) { + root_status = lp_status_t::TIME_LIMIT; + } else if (refactor_status == CONCURRENT_HALT_RETURN) { + root_status = lp_status_t::TIME_LIMIT; + } else if (refactor_status != 0) { settings_.log.printf("Failed to refactor basis. %d deficient columns.\n", refactor_status); assert(refactor_status == 0); root_status = lp_status_t::NUMERICAL_ISSUES; @@ -1901,6 +1906,15 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( user_objective = root_crossover_soln_.user_objective; iter = root_crossover_soln_.iterations; solver_name = "Barrier/PDLP and Crossover"; + } else if (crossover_status == crossover_status_t::TIME_LIMIT || + toc(exploration_stats_.start_time) > settings_.time_limit) { + set_root_concurrent_halt(1); + root_status = root_status_future.get(); + set_root_concurrent_halt(0); + root_status = lp_status_t::TIME_LIMIT; + user_objective = root_relax_soln_.user_objective; + iter = root_relax_soln_.iterations; + solver_name = "Dual Simplex"; } else { root_status = root_status_future.get(); user_objective = root_relax_soln_.user_objective; diff --git a/cpp/src/cuts/cuts.cpp b/cpp/src/cuts/cuts.cpp index aae893b22..2d98badb8 100644 --- a/cpp/src/cuts/cuts.cpp +++ b/cpp/src/cuts/cuts.cpp @@ -2697,8 +2697,10 @@ i_t remove_cuts(lp_problem_t& lp, if (toc(start_time) >= settings.time_limit) { return time_limit_status; } basis_update.resize(lp.num_rows); - basis_update.refactor_basis( - lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus); + i_t refactor_status = basis_update.refactor_basis( + lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus, start_time); + if (refactor_status == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } + if (refactor_status == TIME_LIMIT_RETURN) { return time_limit_status; } } if (toc(start_time) >= settings.time_limit) { return time_limit_status; } diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp index 17f997f4a..9a8cfc143 100644 --- a/cpp/src/dual_simplex/basis_solves.cpp +++ b/cpp/src/dual_simplex/basis_solves.cpp @@ -165,7 +165,8 @@ i_t factorize_basis(const csc_matrix_t& A, std::vector& pinv, std::vector& q, std::vector& deficient, - std::vector& slacks_needed) + std::vector& slacks_needed, + f_t start_time) { raft::common::nvtx::range scope("LU::factorize_basis"); const i_t m = basic_list.size(); @@ -363,11 +364,12 @@ i_t factorize_basis(const csc_matrix_t& A, S_col_perm, SL, SU, - S_perm_inv); + S_perm_inv, + start_time); if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { - settings.log.printf("Concurrent halt\n"); return CONCURRENT_HALT_RETURN; } + if (Srank < 0) { return Srank; } if (Srank != Sdim) { // Get the rank deficient columns deficient.clear(); @@ -568,7 +570,13 @@ i_t factorize_basis(const csc_matrix_t& A, } q.resize(m); f_t fact_start = tic(); - rank = right_looking_lu(A, settings, medium_tol, basic_list, q, L, U, pinv); + rank = right_looking_lu(A, settings, medium_tol, basic_list, q, L, U, pinv, start_time); + if (rank < 0) { + if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + return CONCURRENT_HALT_RETURN; + } + return rank; + } inverse_permutation(pinv, p); if (rank != m) { // Get the rank deficient columns @@ -584,7 +592,6 @@ i_t factorize_basis(const csc_matrix_t& A, } } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { - settings.log.printf("Concurrent halt\n"); return CONCURRENT_HALT_RETURN; } if (verbose) { @@ -874,7 +881,8 @@ template int factorize_basis(const csc_matrix_t& A, std::vector& pinv, std::vector& q, std::vector& deficient, - std::vector& slacks_needed); + std::vector& slacks_needed, + double start_time); template int basis_repair(const csc_matrix_t& A, const simplex_solver_settings_t& settings, diff --git a/cpp/src/dual_simplex/basis_solves.hpp b/cpp/src/dual_simplex/basis_solves.hpp index 59b4725e4..13227a832 100644 --- a/cpp/src/dual_simplex/basis_solves.hpp +++ b/cpp/src/dual_simplex/basis_solves.hpp @@ -36,7 +36,8 @@ i_t factorize_basis(const csc_matrix_t& A, std::vector& pinv, std::vector& q, std::vector& deficient, - std::vector& slacks_need); + std::vector& slacks_need, + f_t start_time); // Repair the basis by bringing in slacks template diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 71dce2e39..5f8ef4934 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -2264,7 +2264,8 @@ int basis_update_mpf_t::refactor_basis( const std::vector& upper, std::vector& basic_list, std::vector& nonbasic_list, - std::vector& vstatus) + std::vector& vstatus, + f_t start_time) { raft::common::nvtx::range scope("LU::refactor_basis"); std::vector deficient; @@ -2282,8 +2283,10 @@ int basis_update_mpf_t::refactor_basis( inverse_row_permutation_, q, deficient, - slacks_needed); + slacks_needed, + start_time); if (status == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } + if (status == TIME_LIMIT_RETURN) { return TIME_LIMIT_RETURN; } if (status == -1) { settings.log.debug("Initial factorization failed\n"); basis_repair(A, @@ -2323,8 +2326,10 @@ int basis_update_mpf_t::refactor_basis( inverse_row_permutation_, q, deficient, - slacks_needed); + slacks_needed, + start_time); if (status == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } + if (status == TIME_LIMIT_RETURN) { return TIME_LIMIT_RETURN; } if (status == -1) { #ifdef CHECK_L_FACTOR if (L0_.check_matrix() == -1) { settings.log.printf("Bad L after basis repair\n"); } diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index d9783053f..6fd7b13f3 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -382,7 +382,8 @@ class basis_update_mpf_t { const std::vector& upper, std::vector& basic_list, std::vector& nonbasic_list, - std::vector& vstatus); + std::vector& vstatus, + f_t start_time); void set_refactor_frequency(i_t new_frequency) { refactor_frequency_ = new_frequency; } diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 597628e73..65da8d45c 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -25,7 +25,7 @@ namespace { crossover_status_t return_to_status(int status) { - if (status == -1) { + if (status == TIME_LIMIT_RETURN) { return crossover_status_t::TIME_LIMIT; } else if (status == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; @@ -505,10 +505,12 @@ i_t dual_push(const lp_problem_t& lp, std::vector q(m); std::vector deficient; std::vector slacks_needed; - i_t rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + i_t rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; + } else if (rank < 0) { + return rank; } else if (rank != m) { settings.log.printf("Failed to factorize basis. rank %d m %d\n", rank, m); basis_repair(lp.A, @@ -521,13 +523,12 @@ i_t dual_push(const lp_problem_t& lp, nonbasic_list, superbasic_list, vstatus); - rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; - } else if (rank == -1) { - settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); - return -1; + } else if (rank < 0) { + return rank; } else { settings.log.printf("Basis repaired\n"); } @@ -560,7 +561,7 @@ i_t dual_push(const lp_problem_t& lp, } if (toc(start_time) > settings.time_limit) { settings.log.printf("Crossover time exceeded\n"); - return -1; + return TIME_LIMIT_RETURN; } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { settings.log.printf("Concurrent halt\n"); @@ -860,10 +861,12 @@ i_t primal_push(const lp_problem_t& lp, std::vector q(m); std::vector deficient; std::vector slacks_needed; - i_t rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + i_t rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; + } else if (rank < 0) { + return rank; } else if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); basis_repair(lp.A, @@ -879,13 +882,12 @@ i_t primal_push(const lp_problem_t& lp, // We need to be careful. As basis_repair may have changed the superbasic list find_primal_superbasic_variables( lp, settings, solution, solution, vstatus, nonbasic_list, superbasic_list); - rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; - } else if (rank == -1) { - settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); - return -1; + } else if (rank < 0) { + return rank; } else { settings.log.debug("Basis repaired\n"); } @@ -915,7 +917,7 @@ i_t primal_push(const lp_problem_t& lp, if (toc(start_time) > settings.time_limit) { settings.log.printf("Crossover time limit exceeded\n"); - return -1; + return TIME_LIMIT_RETURN; } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { settings.log.printf("Concurrent halt\n"); @@ -1223,8 +1225,10 @@ crossover_status_t crossover(const lp_problem_t& lp, std::vector deficient; std::vector slacks_needed; - rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; } + if (rank < 0) { return return_to_status(rank); } if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); basis_repair(lp.A, @@ -1237,12 +1241,13 @@ crossover_status_t crossover(const lp_problem_t& lp, nonbasic_list, superbasic_list, vstatus); - rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; - } else if (rank == -1) { + } else if (rank < 0) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); - return crossover_status_t::NUMERICAL_ISSUES; + return return_to_status(rank); } else { settings.log.debug("Basis repaired\n"); } @@ -1392,10 +1397,12 @@ crossover_status_t crossover(const lp_problem_t& lp, nonbasic_list.clear(); superbasic_list.clear(); get_basis_from_vstatus(m, vstatus, basic_list, nonbasic_list, superbasic_list); - rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; + } else if (rank < 0) { + return return_to_status(rank); } else if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); basis_repair(lp.A, @@ -1408,13 +1415,13 @@ crossover_status_t crossover(const lp_problem_t& lp, nonbasic_list, superbasic_list, vstatus); - rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; - } else if (rank == -1) { + } else if (rank < 0) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); - return crossover_status_t::NUMERICAL_ISSUES; + return return_to_status(rank); } else { settings.log.debug("Basis repaired\n"); } diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 1e057a16b..25d1c5fbe 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -2504,10 +2504,11 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, assert(superbasic_list.size() == 0); assert(nonbasic_list.size() == n - m); - if (ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > - 0) { - return dual::status_t::NUMERICAL; - } + i_t refactor_status = ft.refactor_basis( + lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus, start_time); + if (refactor_status == CONCURRENT_HALT_RETURN) { return dual::status_t::CONCURRENT_LIMIT; } + if (refactor_status == TIME_LIMIT_RETURN) { return dual::status_t::TIME_LIMIT; } + if (refactor_status > 0) { return dual::status_t::NUMERICAL; } if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } } @@ -3371,15 +3372,24 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, PHASE2_NVTX_RANGE("DualSimplex::refactorization"); num_refactors++; bool should_recompute_x = false; - if (ft.refactor_basis( - lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > 0) { + i_t refactor_status = ft.refactor_basis( + lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus, start_time); + if (refactor_status == CONCURRENT_HALT_RETURN) { return dual::status_t::CONCURRENT_LIMIT; } + if (refactor_status == TIME_LIMIT_RETURN) { return dual::status_t::TIME_LIMIT; } + if (refactor_status > 0) { should_recompute_x = true; settings.log.printf("Failed to factorize basis. Iteration %d\n", iter); if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } - i_t count = 0; - i_t deficient_size; - while ((deficient_size = ft.refactor_basis( - lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus)) > 0) { + i_t count = 0; + i_t deficient_size = 0; + while (true) { + deficient_size = ft.refactor_basis( + lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus, start_time); + if (deficient_size == CONCURRENT_HALT_RETURN) { + return dual::status_t::CONCURRENT_LIMIT; + } + if (deficient_size == TIME_LIMIT_RETURN) { return dual::status_t::TIME_LIMIT; } + if (deficient_size <= 0) { break; } settings.log.printf("Failed to repair basis. Iteration %d. %d deficient columns.\n", iter, static_cast(deficient_size)); @@ -3390,6 +3400,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, count++; if (count > 10) { return dual::status_t::NUMERICAL; } } + if (deficient_size < 0) { return dual::status_t::NUMERICAL; } settings.log.printf("Successfully repaired basis. Iteration %d\n", iter); } diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 8d4a533e9..f4602ac56 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -445,6 +445,7 @@ i_t find_dependent_rows(lp_problem_t& problem, i_t pivots = right_looking_lu_row_permutation_only(C, settings, 1e-13, tic(), q, pinv); if (pivots == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } + if (pivots == TIME_LIMIT_RETURN) { return TIME_LIMIT_RETURN; } if (pivots < m) { settings.log.printf("Found %d dependent rows\n", m - pivots); const i_t num_dependent = m - pivots; @@ -1101,6 +1102,7 @@ i_t presolve(const lp_problem_t& original, f_t dependent_row_start = tic(); const i_t independent_rows = find_dependent_rows(problem, settings, dependent_rows, infeasible); if (independent_rows == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } + if (independent_rows == TIME_LIMIT_RETURN) { return TIME_LIMIT_RETURN; } if (infeasible != kOk) { settings.log.printf("Found problem infeasible in presolve\n"); return -1; diff --git a/cpp/src/dual_simplex/primal.cpp b/cpp/src/dual_simplex/primal.cpp index 98f5f4193..2d0944542 100644 --- a/cpp/src/dual_simplex/primal.cpp +++ b/cpp/src/dual_simplex/primal.cpp @@ -294,10 +294,15 @@ primal::status_t primal_phase2(i_t phase, std::vector q(m); std::vector deficient; std::vector slacks_needed; - i_t rank = - factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + i_t rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); if (rank == CONCURRENT_HALT_RETURN) { return primal::status_t::CONCURRENT_LIMIT; + } else if (rank == TIME_LIMIT_RETURN) { + return primal::status_t::TIME_LIMIT; + } else if (rank < 0) { + return toc(start_time) > settings.time_limit ? primal::status_t::TIME_LIMIT + : primal::status_t::NUMERICAL; } else if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); basis_repair(lp.A, @@ -310,12 +315,16 @@ primal::status_t primal_phase2(i_t phase, nonbasic_list, superbasic_list, vstatus); - rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); + rank = factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); if (rank == CONCURRENT_HALT_RETURN) { return primal::status_t::CONCURRENT_LIMIT; - } else if (rank == -1) { + } else if (rank == TIME_LIMIT_RETURN) { + return primal::status_t::TIME_LIMIT; + } else if (rank < 0) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); - return primal::status_t::NUMERICAL; + return toc(start_time) > settings.time_limit ? primal::status_t::TIME_LIMIT + : primal::status_t::NUMERICAL; } else { settings.log.debug("Basis repaired\n"); } diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index cb9834705..4c55e1f19 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -580,7 +580,8 @@ i_t right_looking_lu(const csc_matrix_t& A, VectorI& q, csc_matrix_t& L, csc_matrix_t& U, - VectorI& pinv) + VectorI& pinv, + f_t start_time) { raft::common::nvtx::range scope("LU::right_looking_lu"); const i_t n = column_list.size(); @@ -634,7 +635,10 @@ i_t right_looking_lu(const csc_matrix_t& A, i_t pivots = 0; for (i_t k = 0; k < n; ++k) { - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return -1; } + if (toc(start_time) > settings.time_limit) { return TIME_LIMIT_RETURN; } + if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + return CONCURRENT_HALT_RETURN; + } // Find pivot that satisfies // abs(pivot) >= abstol, // abs(pivot) >= threshold_tol * max abs[pivot column] @@ -1114,11 +1118,7 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, toc(factorization_start_time)); last_print = tic(); } - if (toc(factorization_start_time) > settings.time_limit) { - settings.log.printf("Right-looking LU factorization time exceeded\n"); - return -1; - } - + if (toc(start_time) > settings.time_limit) { return TIME_LIMIT_RETURN; } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { settings.log.printf("Concurrent halt\n"); return CONCURRENT_HALT_RETURN; @@ -1157,7 +1157,8 @@ template int right_looking_lu>( std::vector& q, csc_matrix_t& L, csc_matrix_t& U, - std::vector& pinv); + std::vector& pinv, + double start_time); template int right_looking_lu>( const csc_matrix_t& A, @@ -1167,7 +1168,8 @@ template int right_looking_lu>( ins_vector& q, csc_matrix_t& L, csc_matrix_t& U, - ins_vector& pinv); + ins_vector& pinv, + double start_time); template int right_looking_lu_row_permutation_only( const csc_matrix_t& A, diff --git a/cpp/src/dual_simplex/right_looking_lu.hpp b/cpp/src/dual_simplex/right_looking_lu.hpp index 179fff01b..3f83da42f 100644 --- a/cpp/src/dual_simplex/right_looking_lu.hpp +++ b/cpp/src/dual_simplex/right_looking_lu.hpp @@ -22,7 +22,8 @@ i_t right_looking_lu(const csc_matrix_t& A, VectorI& q, csc_matrix_t& L, csc_matrix_t& U, - VectorI& pinv); + VectorI& pinv, + f_t start_time); template i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 1e825b5e4..42f99a629 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -154,6 +154,7 @@ lp_status_t solve_linear_program_with_advanced_basis( ok = presolve(original_lp, settings, presolved_lp, presolve_info); } if (ok == CONCURRENT_HALT_RETURN) { return lp_status_t::CONCURRENT_LIMIT; } + if (ok == TIME_LIMIT_RETURN) { return lp_status_t::TIME_LIMIT; } if (ok == -1) { return lp_status_t::INFEASIBLE; } constexpr bool write_out_matlab = false; @@ -349,6 +350,7 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t& us lp_problem_t presolved_lp(user_problem.handle_ptr, 1, 1, 1); const i_t ok = presolve(original_lp, barrier_settings, presolved_lp, presolve_info); if (ok == CONCURRENT_HALT_RETURN) { return lp_status_t::CONCURRENT_LIMIT; } + if (ok == TIME_LIMIT_RETURN) { return lp_status_t::TIME_LIMIT; } if (ok == -1) { return lp_status_t::INFEASIBLE; } // Apply columns scaling to the presolve LP diff --git a/cpp/src/dual_simplex/types.hpp b/cpp/src/dual_simplex/types.hpp index 9de33ed3b..ea46a1f67 100644 --- a/cpp/src/dual_simplex/types.hpp +++ b/cpp/src/dual_simplex/types.hpp @@ -21,5 +21,7 @@ constexpr float64_t inf = std::numeric_limits::infinity(); // We return this constant to signal that a concurrent halt has occurred #define CONCURRENT_HALT_RETURN -2 +// We return this constant to signal that a time limit has occurred +#define TIME_LIMIT_RETURN -3 } // namespace cuopt::linear_programming::dual_simplex From 942de9c5937debdecf58ba4cd27139670629fa21 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Tue, 17 Feb 2026 02:56:56 -0800 Subject: [PATCH 07/44] remove timers from cuts --- cpp/src/branch_and_bound/branch_and_bound.cpp | 17 ++-- cpp/src/cuts/cuts.cpp | 79 +++---------------- cpp/src/cuts/cuts.hpp | 20 ++--- 3 files changed, 25 insertions(+), 91 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index bd06dd3a0..33a67acbf 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -2139,8 +2139,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut basis_update, root_relax_soln_.x, basic_list, - nonbasic_list, - exploration_stats_.start_time); + nonbasic_list); if (stop_for_time_limit()) { return solver_status_; } f_t cut_generation_time = toc(cut_start_time); if (cut_generation_time > 1.0) { @@ -2149,7 +2148,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut // Score the cuts if (stop_for_time_limit()) { return solver_status_; } f_t score_start_time = tic(); - cut_pool.score_cuts(root_relax_soln_.x, exploration_stats_.start_time); + cut_pool.score_cuts(root_relax_soln_.x); if (stop_for_time_limit()) { return solver_status_; } f_t score_time = toc(score_start_time); if (score_time > 1.0) { settings_.log.debug("Cut scoring time %.2f seconds\n", score_time); } @@ -2157,8 +2156,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut csr_matrix_t cuts_to_add(0, original_lp_.num_cols, 0); std::vector cut_rhs; std::vector cut_types; - i_t num_cuts = - cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types, exploration_stats_.start_time); + i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types); if (stop_for_time_limit()) { return solver_status_; } if (num_cuts == 0) { break; } cut_info.record_cut_types(cut_types); @@ -2205,8 +2203,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut basic_list, nonbasic_list, root_vstatus_, - edge_norms_, - exploration_stats_.start_time); + edge_norms_); var_types_.resize(original_lp_.num_cols, variable_type_t::CONTINUOUS); mutex_original_lp_.unlock(); if (stop_for_time_limit()) { return solver_status_; } @@ -2214,11 +2211,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (add_cuts_time > 1.0) { settings_.log.debug("Add cuts time %.2f seconds\n", add_cuts_time); } - if (add_cuts_status == -2) { - solver_status_ = mip_status_t::TIME_LIMIT; - set_final_solution(solution, root_objective_); - return solver_status_; - } else if (add_cuts_status != 0) { + if (add_cuts_status != 0) { settings_.log.printf("Failed to add cuts\n"); return mip_status_t::NUMERICAL; } diff --git a/cpp/src/cuts/cuts.cpp b/cpp/src/cuts/cuts.cpp index 2d98badb8..724233b1f 100644 --- a/cpp/src/cuts/cuts.cpp +++ b/cpp/src/cuts/cuts.cpp @@ -97,7 +97,7 @@ f_t cut_pool_t::cut_orthogonality(i_t i, i_t j) } template -void cut_pool_t::score_cuts(std::vector& x_relax, f_t start_time) +void cut_pool_t::score_cuts(std::vector& x_relax) { const f_t min_cut_distance = 1e-4; cut_distances_.resize(cut_storage_.m, 0.0); @@ -105,11 +105,6 @@ void cut_pool_t::score_cuts(std::vector& x_relax, f_t start_time) const bool verbose = false; for (i_t i = 0; i < cut_storage_.m; i++) { - if (toc(start_time) >= settings_.time_limit) { - best_cuts_.clear(); - scored_cuts_ = 0; - return; - } f_t violation; f_t cut_dist = cut_distance(i, x_relax, violation, cut_norms_[i]); cut_distances_[i] = cut_dist <= min_cut_distance ? 0.0 : cut_dist; @@ -140,7 +135,6 @@ void cut_pool_t::score_cuts(std::vector& x_relax, f_t start_time) } while (scored_cuts_ < max_cuts && !sorted_indices.empty()) { - if (toc(start_time) >= settings_.time_limit) { return; } const i_t i = sorted_indices.back(); sorted_indices.pop_back(); @@ -149,7 +143,6 @@ void cut_pool_t::score_cuts(std::vector& x_relax, f_t start_time) f_t cut_ortho = 1.0; const i_t best_cuts_size = best_cuts_.size(); for (i_t k = 0; k < best_cuts_size; k++) { - if (toc(start_time) >= settings_.time_limit) { return; } const i_t j = best_cuts_[k]; cut_ortho = std::min(cut_ortho, cut_orthogonality(i, j)); } @@ -163,8 +156,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax, f_t start_time) template i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, std::vector& best_rhs, - std::vector& best_cut_types, - f_t start_time) + std::vector& best_cut_types) { best_cuts.m = 0; best_cuts.n = original_vars_; @@ -179,7 +171,6 @@ i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, best_cut_types.reserve(scored_cuts_); for (i_t k = 0; k < static_cast(best_cuts_.size()); ++k) { - if (toc(start_time) >= settings_.time_limit) { break; } const i_t i = best_cuts_[k]; sparse_vector_t cut(cut_storage_, i); cut.negate(); @@ -571,46 +562,33 @@ void cut_generation_t::generate_cuts(const lp_problem_t& lp, basis_update_mpf_t& basis_update, const std::vector& xstar, const std::vector& basic_list, - const std::vector& nonbasic_list, - f_t start_time) + const std::vector& nonbasic_list) { - if (toc(start_time) >= settings.time_limit) { return; } - // Generate Gomory and CG Cuts if (settings.mixed_integer_gomory_cuts != 0 || settings.strong_chvatal_gomory_cuts != 0) { f_t cut_start_time = tic(); - generate_gomory_cuts(lp, - settings, - Arow, - new_slacks, - var_types, - basis_update, - xstar, - basic_list, - nonbasic_list, - start_time); + generate_gomory_cuts( + lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list); f_t cut_generation_time = toc(cut_start_time); if (cut_generation_time > 1.0) { settings.log.debug("Gomory and CG cut generation time %.2f seconds\n", cut_generation_time); } } - if (toc(start_time) >= settings.time_limit) { return; } // Generate Knapsack cuts if (settings.knapsack_cuts != 0) { f_t cut_start_time = tic(); - generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar, start_time); + generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar); f_t cut_generation_time = toc(cut_start_time); if (cut_generation_time > 1.0) { settings.log.debug("Knapsack cut generation time %.2f seconds\n", cut_generation_time); } } - if (toc(start_time) >= settings.time_limit) { return; } // Generate MIR and CG cuts if (settings.mir_cuts != 0 || settings.strong_chvatal_gomory_cuts != 0) { f_t cut_start_time = tic(); - generate_mir_cuts(lp, settings, Arow, new_slacks, var_types, xstar, start_time); + generate_mir_cuts(lp, settings, Arow, new_slacks, var_types, xstar); f_t cut_generation_time = toc(cut_start_time); if (cut_generation_time > 1.0) { settings.log.debug("MIR and CG cut generation time %.2f seconds\n", cut_generation_time); @@ -625,12 +603,10 @@ void cut_generation_t::generate_knapsack_cuts( csr_matrix_t& Arow, const std::vector& new_slacks, const std::vector& var_types, - const std::vector& xstar, - f_t start_time) + const std::vector& xstar) { if (knapsack_generation_.num_knapsack_constraints() > 0) { for (i_t knapsack_row : knapsack_generation_.get_knapsack_constraints()) { - if (toc(start_time) >= settings.time_limit) { return; } sparse_vector_t cut(lp.num_cols, 0); f_t cut_rhs; i_t knapsack_status = knapsack_generation_.generate_knapsack_cuts( @@ -647,10 +623,8 @@ void cut_generation_t::generate_mir_cuts( csr_matrix_t& Arow, const std::vector& new_slacks, const std::vector& var_types, - const std::vector& xstar, - f_t start_time) + const std::vector& xstar) { - if (toc(start_time) >= settings.time_limit) { return; } f_t mir_start_time = tic(); mixed_integer_rounding_cut_t mir(lp, settings, new_slacks, xstar); strong_cg_cut_t cg(lp, var_types, xstar); @@ -668,7 +642,6 @@ void cut_generation_t::generate_mir_cuts( // Compute initial scores for all rows std::vector score(lp.num_rows, 0.0); for (i_t i = 0; i < lp.num_rows; i++) { - if (toc(start_time) >= settings.time_limit) { return; } const i_t row_start = Arow.row_start[i]; const i_t row_end = Arow.row_start[i + 1]; @@ -718,7 +691,6 @@ void cut_generation_t::generate_mir_cuts( const i_t max_cuts = std::min(lp.num_rows, 1000); f_t work_estimate = 0.0; for (i_t h = 0; h < max_cuts; h++) { - if (toc(start_time) >= settings.time_limit) { return; } // Get the row with the highest score const i_t i = sorted_indices.back(); sorted_indices.pop_back(); @@ -799,7 +771,6 @@ void cut_generation_t::generate_mir_cuts( work_estimate += lp.num_cols; while (!add_cut && num_aggregated < max_aggregated) { - if (toc(start_time) >= settings.time_limit) { return; } sparse_vector_t transformed_inequality; inequality.squeeze(transformed_inequality); f_t transformed_rhs = inequality_rhs; @@ -1011,15 +982,13 @@ void cut_generation_t::generate_gomory_cuts( basis_update_mpf_t& basis_update, const std::vector& xstar, const std::vector& basic_list, - const std::vector& nonbasic_list, - f_t start_time) + const std::vector& nonbasic_list) { tableau_equality_t tableau(lp, basis_update, nonbasic_list); mixed_integer_rounding_cut_t mir(lp, settings, new_slacks, xstar); strong_cg_cut_t cg(lp, var_types, xstar); for (i_t i = 0; i < lp.num_rows; i++) { - if (toc(start_time) >= settings.time_limit) { return; } sparse_vector_t inequality(lp.num_cols, 0); f_t inequality_rhs; const i_t j = basic_list[i]; @@ -2346,13 +2315,9 @@ i_t add_cuts(const simplex_solver_settings_t& settings, std::vector& basic_list, std::vector& nonbasic_list, std::vector& vstatus, - std::vector& edge_norms, - f_t start_time) + std::vector& edge_norms) { - constexpr i_t time_limit_status = -2; - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } - // Given a set of cuts: C*x <= d that are currently violated // by the current solution x* (i.e. C*x* > d), this function // adds the cuts into the LP and solves again. @@ -2380,7 +2345,6 @@ i_t add_cuts(const simplex_solver_settings_t& settings, settings.log.debug("Original lp rows %d\n", lp.num_rows); settings.log.debug("Original lp cols %d\n", lp.num_cols); - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } csr_matrix_t new_A_row(lp.num_rows, lp.num_cols, 1); lp.A.to_compressed_row(new_A_row); @@ -2390,7 +2354,6 @@ i_t add_cuts(const simplex_solver_settings_t& settings, assert(append_status == 0); } - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } csc_matrix_t new_A_col(lp.num_rows + p, lp.num_cols, 1); new_A_row.to_compressed_col(new_A_col); @@ -2445,7 +2408,6 @@ i_t add_cuts(const simplex_solver_settings_t& settings, settings.log.debug("Done adding rhs\n"); // Construct C_B = C(:, basic_list) - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } std::vector C_col_degree(lp.num_cols, 0); i_t cuts_nz = cuts.row_start[p]; for (i_t q = 0; q < cuts_nz; q++) { @@ -2475,7 +2437,6 @@ i_t add_cuts(const simplex_solver_settings_t& settings, } settings.log.debug("Done estimating C_B_nz\n"); - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } csr_matrix_t C_B(p, num_basic, C_B_nz); nz = 0; for (i_t i = 0; i < p; i++) { @@ -2500,7 +2461,6 @@ i_t add_cuts(const simplex_solver_settings_t& settings, settings.log.debug("C_B rows %d cols %d nz %d\n", C_B.m, C_B.n, nz); // Adjust the basis update to include the new cuts - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } basis_update.append_cuts(C_B); basic_list.resize(lp.num_rows, 0); @@ -2538,7 +2498,6 @@ i_t add_cuts(const simplex_solver_settings_t& settings, solution.y.resize(lp.num_rows, 0.0); solution.z.resize(lp.num_cols, 0.0); - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } return 0; } @@ -2559,9 +2518,6 @@ i_t remove_cuts(lp_problem_t& lp, basis_update_mpf_t& basis_update, f_t start_time) { - constexpr i_t time_limit_status = -2; - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } - std::vector cuts_to_remove; cuts_to_remove.reserve(lp.num_rows - original_rows); std::vector slacks_to_remove; @@ -2570,7 +2526,6 @@ i_t remove_cuts(lp_problem_t& lp, std::vector is_slack(lp.num_cols, 0); for (i_t j : new_slacks) { - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } is_slack[j] = 1; #ifdef CHECK_SLACKS // Check that slack column length is 1 @@ -2585,14 +2540,12 @@ i_t remove_cuts(lp_problem_t& lp, } for (i_t k = original_rows; k < lp.num_rows; k++) { - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } if (std::abs(y[k]) < dual_tol) { const i_t row_start = Arow.row_start[k]; const i_t row_end = Arow.row_start[k + 1]; i_t last_slack = -1; const f_t slack_tol = 1e-3; for (i_t p = row_start; p < row_end; p++) { - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } const i_t j = Arow.j[p]; if (is_slack[j]) { if (vstatus[j] == variable_status_t::BASIC && x[j] > slack_tol) { last_slack = j; } @@ -2606,7 +2559,6 @@ i_t remove_cuts(lp_problem_t& lp, } if (cuts_to_remove.size() > 0) { - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } std::vector marked_rows(lp.num_rows, 0); for (i_t i : cuts_to_remove) { marked_rows[i] = 1; @@ -2620,7 +2572,6 @@ i_t remove_cuts(lp_problem_t& lp, std::vector new_solution_y(lp.num_rows - cuts_to_remove.size()); i_t h = 0; for (i_t i = 0; i < lp.num_rows; i++) { - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } if (!marked_rows[i]) { new_rhs[h] = lp.rhs[i]; new_solution_y[h] = y[i]; @@ -2647,7 +2598,6 @@ i_t remove_cuts(lp_problem_t& lp, std::vector new_is_slacks(lp.num_cols - slacks_to_remove.size(), 0); h = 0; for (i_t k = 0; k < lp.num_cols; k++) { - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } if (!marked_cols[k]) { new_objective[h] = lp.objective[k]; new_lower[h] = lp.lower[k]; @@ -2695,15 +2645,13 @@ i_t remove_cuts(lp_problem_t& lp, lp.num_cols, lp.A.col_start[lp.A.n]); - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } basis_update.resize(lp.num_rows); i_t refactor_status = basis_update.refactor_basis( lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus, start_time); if (refactor_status == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } - if (refactor_status == TIME_LIMIT_RETURN) { return time_limit_status; } + if (refactor_status == TIME_LIMIT_RETURN) { return TIME_LIMIT_RETURN; } } - if (toc(start_time) >= settings.time_limit) { return time_limit_status; } return 0; } @@ -2849,8 +2797,7 @@ template int add_cuts(const simplex_solver_settings_t& settings, std::vector& basic_list, std::vector& nonbasic_list, std::vector& vstatus, - std::vector& edge_norms, - double start_time); + std::vector& edge_norms); template int remove_cuts(lp_problem_t& lp, const simplex_solver_settings_t& settings, diff --git a/cpp/src/cuts/cuts.hpp b/cpp/src/cuts/cuts.hpp index 17f5e032a..96df6240b 100644 --- a/cpp/src/cuts/cuts.hpp +++ b/cpp/src/cuts/cuts.hpp @@ -138,13 +138,12 @@ class cut_pool_t { // cut'*xstart < rhs void add_cut(cut_type_t cut_type, const sparse_vector_t& cut, f_t rhs); - void score_cuts(std::vector& x_relax, f_t start_time); + void score_cuts(std::vector& x_relax); // We return the cuts in the form best_cuts*x <= best_rhs i_t get_best_cuts(csr_matrix_t& best_cuts, std::vector& best_rhs, - std::vector& best_cut_types, - f_t start_time); + std::vector& best_cut_types); void age_cuts(); @@ -240,8 +239,7 @@ class cut_generation_t { basis_update_mpf_t& basis_update, const std::vector& xstar, const std::vector& basic_list, - const std::vector& nonbasic_list, - f_t start_time); + const std::vector& nonbasic_list); private: // Generate all mixed integer gomory cuts @@ -253,8 +251,7 @@ class cut_generation_t { basis_update_mpf_t& basis_update, const std::vector& xstar, const std::vector& basic_list, - const std::vector& nonbasic_list, - f_t start_time); + const std::vector& nonbasic_list); // Generate all mixed integer rounding cuts void generate_mir_cuts(const lp_problem_t& lp, @@ -262,8 +259,7 @@ class cut_generation_t { csr_matrix_t& Arow, const std::vector& new_slacks, const std::vector& var_types, - const std::vector& xstar, - f_t start_time); + const std::vector& xstar); // Generate all knapsack cuts void generate_knapsack_cuts(const lp_problem_t& lp, @@ -271,8 +267,7 @@ class cut_generation_t { csr_matrix_t& Arow, const std::vector& new_slacks, const std::vector& var_types, - const std::vector& xstar, - f_t start_time); + const std::vector& xstar); cut_pool_t& cut_pool_; knapsack_generation_t knapsack_generation_; @@ -463,8 +458,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, std::vector& basic_list, std::vector& nonbasic_list, std::vector& vstatus, - std::vector& edge_norms, - f_t start_time); + std::vector& edge_norms); template i_t remove_cuts(lp_problem_t& lp, From 0b944d1e01661ae4c39eff7036324f329ee6895a Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Tue, 17 Feb 2026 04:17:26 -0800 Subject: [PATCH 08/44] convert lambda to function and remove unnecessary checks --- cpp/src/branch_and_bound/branch_and_bound.cpp | 57 +++++++------------ cpp/src/branch_and_bound/branch_and_bound.hpp | 2 + 2 files changed, 22 insertions(+), 37 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 33a67acbf..6624e42e8 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1946,6 +1946,18 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( return root_status; } +template +bool branch_and_bound_t::stop_for_time_limit(mip_solution_t& solution) +{ + const f_t elapsed = toc(exploration_stats_.start_time); + if (elapsed > settings_.time_limit) { + solver_status_ = mip_status_t::TIME_LIMIT; + set_final_solution(solution, root_objective_); + return true; + } + return false; +}; + template mip_status_t branch_and_bound_t::solve(mip_solution_t& solution) { @@ -2099,19 +2111,10 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut f_t last_upper_bound = std::numeric_limits::infinity(); f_t last_objective = root_objective_; f_t root_relax_objective = root_objective_; - auto stop_for_time_limit = [&]() -> bool { - const f_t elapsed = toc(exploration_stats_.start_time); - if (elapsed > settings_.time_limit) { - solver_status_ = mip_status_t::TIME_LIMIT; - set_final_solution(solution, root_objective_); - return true; - } - return false; - }; i_t cut_pool_size = 0; for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) { - if (stop_for_time_limit()) { return solver_status_; } + if (stop_for_time_limit(solution)) { return solver_status_; } if (num_fractional == 0) { set_solution_at_root(solution, cut_info); return mip_status_t::OPTIMAL; @@ -2128,8 +2131,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } #endif - // Generate cuts and add them to the cut pool - if (stop_for_time_limit()) { return solver_status_; } f_t cut_start_time = tic(); cut_generation.generate_cuts(original_lp_, settings_, @@ -2140,16 +2141,14 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut root_relax_soln_.x, basic_list, nonbasic_list); - if (stop_for_time_limit()) { return solver_status_; } f_t cut_generation_time = toc(cut_start_time); if (cut_generation_time > 1.0) { settings_.log.debug("Cut generation time %.2f seconds\n", cut_generation_time); } // Score the cuts - if (stop_for_time_limit()) { return solver_status_; } f_t score_start_time = tic(); cut_pool.score_cuts(root_relax_soln_.x); - if (stop_for_time_limit()) { return solver_status_; } + if (stop_for_time_limit(solution)) { return solver_status_; } f_t score_time = toc(score_start_time); if (score_time > 1.0) { settings_.log.debug("Cut scoring time %.2f seconds\n", score_time); } // Get the best cuts from the cut pool @@ -2157,7 +2156,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut std::vector cut_rhs; std::vector cut_types; i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types); - if (stop_for_time_limit()) { return solver_status_; } if (num_cuts == 0) { break; } cut_info.record_cut_types(cut_types); #ifdef PRINT_CUT_POOL_TYPES @@ -2190,7 +2188,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut cuts_to_add.m + original_lp_.num_rows); lp_settings.log.log = false; - if (stop_for_time_limit()) { return solver_status_; } f_t add_cuts_start_time = tic(); mutex_original_lp_.lock(); i_t add_cuts_status = add_cuts(settings_, @@ -2206,7 +2203,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut edge_norms_); var_types_.resize(original_lp_.num_cols, variable_type_t::CONTINUOUS); mutex_original_lp_.unlock(); - if (stop_for_time_limit()) { return solver_status_; } f_t add_cuts_time = toc(add_cuts_start_time); if (add_cuts_time > 1.0) { settings_.log.debug("Add cuts time %.2f seconds\n", add_cuts_time); @@ -2238,7 +2234,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut #endif original_lp_.A.to_compressed_row(Arow_); - if (stop_for_time_limit()) { return solver_status_; } f_t node_presolve_start_time = tic(); bounds_strengthening_t node_presolve(original_lp_, Arow_, row_sense, var_types_); std::vector new_lower = original_lp_.lower; @@ -2249,7 +2244,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut original_lp_.lower = new_lower; original_lp_.upper = new_upper; mutex_original_lp_.unlock(); - if (stop_for_time_limit()) { return solver_status_; } f_t node_presolve_time = toc(node_presolve_start_time); if (node_presolve_time > 1.0) { settings_.log.debug("Node presolve time %.2f seconds\n", node_presolve_time); @@ -2262,9 +2256,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut i_t iter = 0; bool initialize_basis = false; lp_settings.concurrent_halt = NULL; - if (stop_for_time_limit()) { return solver_status_; } - f_t dual_phase2_start_time = tic(); - dual::status_t cut_status = dual_phase2_with_advanced_basis(2, + f_t dual_phase2_start_time = tic(); + dual::status_t cut_status = dual_phase2_with_advanced_basis(2, 0, initialize_basis, exploration_stats_.start_time, @@ -2283,15 +2276,10 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (dual_phase2_time > 1.0) { settings_.log.debug("Dual phase2 time %.2f seconds\n", dual_phase2_time); } - if (stop_for_time_limit()) { return solver_status_; } - if (cut_status == dual::status_t::TIME_LIMIT) { - solver_status_ = mip_status_t::TIME_LIMIT; - set_final_solution(solution, root_objective_); - return solver_status_; - } + + if (stop_for_time_limit(solution)) { return solver_status_; } if (cut_status != dual::status_t::OPTIMAL) { - if (stop_for_time_limit()) { return solver_status_; } settings_.log.printf("Numerical issue at root node. Resolving from scratch\n"); lp_status_t scratch_status = solve_linear_program_with_advanced_basis(original_lp_, @@ -2303,11 +2291,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut nonbasic_list, root_vstatus_, edge_norms_); - if (stop_for_time_limit() || scratch_status == lp_status_t::TIME_LIMIT) { - solver_status_ = mip_status_t::TIME_LIMIT; - set_final_solution(solution, root_objective_); - return solver_status_; - } + if (stop_for_time_limit(solution)) { return solver_status_; } if (scratch_status == lp_status_t::OPTIMAL) { // We recovered cut_status = convert_lp_status_to_dual_status(scratch_status); @@ -2319,7 +2303,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } } - if (stop_for_time_limit()) { return solver_status_; } f_t remove_cuts_start_time = tic(); mutex_original_lp_.lock(); i_t remove_cuts_status = remove_cuts(original_lp_, @@ -2338,7 +2321,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut basis_update, exploration_stats_.start_time); mutex_original_lp_.unlock(); - if (stop_for_time_limit()) { return solver_status_; } + if (stop_for_time_limit(solution)) { return solver_status_; } f_t remove_cuts_time = toc(remove_cuts_start_time); if (remove_cuts_time > 1.0) { settings_.log.debug("Remove cuts time %.2f seconds\n", remove_cuts_time); diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 84ee986c4..a13d5cedc 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -106,6 +106,8 @@ class branch_and_bound_t { void set_concurrent_lp_root_solve(bool enable) { enable_concurrent_lp_root_solve_ = enable; } + bool stop_for_time_limit(mip_solution_t& solution); + // Repair a low-quality solution from the heuristics. bool repair_solution(const std::vector& leaf_edge_norms, const std::vector& potential_solution, From 71b7f2f41247b6b0bcec382457f7cfeb694cc6b7 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Tue, 17 Feb 2026 08:01:00 -0800 Subject: [PATCH 09/44] fix thrust changes --- cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu index bc12fb360..31916e2c3 100644 --- a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu +++ b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu @@ -1995,14 +1995,14 @@ void pdlp_restart_strategy_t::solve_bound_constrained_trust_region( f_t* end = threshold_.data() + primal_size_h_ + dual_size_h_; auto highest_negInf_primal = thrust::find(handle_ptr_->get_thrust_policy(), - thrust::make_reverse_iterator(thrust::device_ptr(end)), - thrust::make_reverse_iterator(thrust::device_ptr(start)), + cuda::std::reverse_iterator(thrust::device_ptr(end)), + cuda::std::reverse_iterator(thrust::device_ptr(start)), -std::numeric_limits::infinity()); // Set ranges accordingly i_t index_start_primal = 0; i_t index_end_primal = primal_size_h_ + dual_size_h_; - if (highest_negInf_primal != thrust::make_reverse_iterator(thrust::device_ptr(start))) { + if (highest_negInf_primal != cuda::std::reverse_iterator(thrust::device_ptr(start))) { cuopt_assert(device_to_host_value(thrust::raw_pointer_cast(&*highest_negInf_primal)) == -std::numeric_limits::infinity(), "Incorrect primal reverse iterator"); From 82b2d64408abcaced475e47fda80df332ff48d9c Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 18 Feb 2026 02:12:13 -0800 Subject: [PATCH 10/44] handle review comments --- cpp/src/branch_and_bound/branch_and_bound.cpp | 10 ++++++++-- cpp/src/cuts/cuts.cpp | 3 +-- cpp/src/dual_simplex/crossover.cpp | 9 ++------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 6624e42e8..8c32f5494 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -1893,7 +1894,7 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( if (refactor_status == TIME_LIMIT_RETURN) { root_status = lp_status_t::TIME_LIMIT; } else if (refactor_status == CONCURRENT_HALT_RETURN) { - root_status = lp_status_t::TIME_LIMIT; + root_status = lp_status_t::CONCURRENT_LIMIT; } else if (refactor_status != 0) { settings_.log.printf("Failed to refactor basis. %d deficient columns.\n", refactor_status); assert(refactor_status == 0); @@ -2373,7 +2374,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_); pc_.resize(original_lp_.num_cols); - if (toc(exploration_stats_.start_time) < settings_.time_limit) { + if (toc(exploration_stats_.start_time) >= settings_.time_limit) { + solver_status_ = mip_status_t::TIME_LIMIT; + set_final_solution(solution, root_objective_); + return solver_status_; + } + { raft::common::nvtx::range scope_sb("BB::strong_branching"); strong_branching(original_problem_, original_lp_, diff --git a/cpp/src/cuts/cuts.cpp b/cpp/src/cuts/cuts.cpp index 724233b1f..fab6297de 100644 --- a/cpp/src/cuts/cuts.cpp +++ b/cpp/src/cuts/cuts.cpp @@ -170,8 +170,7 @@ i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, best_cut_types.clear(); best_cut_types.reserve(scored_cuts_); - for (i_t k = 0; k < static_cast(best_cuts_.size()); ++k) { - const i_t i = best_cuts_[k]; + for (i_t i : best_cuts_) { sparse_vector_t cut(cut_storage_, i); cut.negate(); best_cuts.append_row(cut); diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 65da8d45c..626b15035 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -1227,7 +1227,6 @@ crossover_status_t crossover(const lp_problem_t& lp, rank = factorize_basis( lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); - if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; } if (rank < 0) { return return_to_status(rank); } if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); @@ -1399,9 +1398,7 @@ crossover_status_t crossover(const lp_problem_t& lp, get_basis_from_vstatus(m, vstatus, basic_list, nonbasic_list, superbasic_list); rank = factorize_basis( lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); - if (rank == CONCURRENT_HALT_RETURN) { - return crossover_status_t::CONCURRENT_LIMIT; - } else if (rank < 0) { + if (rank < 0) { return return_to_status(rank); } else if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); @@ -1417,9 +1414,7 @@ crossover_status_t crossover(const lp_problem_t& lp, vstatus); rank = factorize_basis( lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); - if (rank == CONCURRENT_HALT_RETURN) { - return crossover_status_t::CONCURRENT_LIMIT; - } else if (rank < 0) { + if (rank < 0) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); return return_to_status(rank); } else { From 0c811732b97d9e945e843c8eaa4647b0172126ae Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 18 Feb 2026 06:04:46 -0800 Subject: [PATCH 11/44] handle review comments --- cpp/src/branch_and_bound/branch_and_bound.cpp | 36 +++++++++---------- cpp/src/cuts/cuts.cpp | 8 ++--- cpp/src/cuts/cuts.hpp | 4 +-- cpp/src/dual_simplex/basis_solves.cpp | 6 ++-- cpp/src/dual_simplex/right_looking_lu.cpp | 14 ++++---- cpp/src/dual_simplex/right_looking_lu.hpp | 4 +-- 6 files changed, 35 insertions(+), 37 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 8c32f5494..30b913e6e 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -1957,7 +1956,7 @@ bool branch_and_bound_t::stop_for_time_limit(mip_solution_t& return true; } return false; -}; +} template mip_status_t branch_and_bound_t::solve(mip_solution_t& solution) @@ -2115,7 +2114,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut i_t cut_pool_size = 0; for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) { - if (stop_for_time_limit(solution)) { return solver_status_; } if (num_fractional == 0) { set_solution_at_root(solution, cut_info); return mip_status_t::OPTIMAL; @@ -2132,6 +2130,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } #endif + // Generate cuts and add them to the cut pool f_t cut_start_time = tic(); cut_generation.generate_cuts(original_lp_, settings_, @@ -2306,23 +2305,22 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut f_t remove_cuts_start_time = tic(); mutex_original_lp_.lock(); - i_t remove_cuts_status = remove_cuts(original_lp_, - settings_, - Arow_, - new_slacks_, - original_rows, - var_types_, - root_vstatus_, - edge_norms_, - root_relax_soln_.x, - root_relax_soln_.y, - root_relax_soln_.z, - basic_list, - nonbasic_list, - basis_update, - exploration_stats_.start_time); + remove_cuts(original_lp_, + settings_, + exploration_stats_.start_time, + Arow_, + new_slacks_, + original_rows, + var_types_, + root_vstatus_, + edge_norms_, + root_relax_soln_.x, + root_relax_soln_.y, + root_relax_soln_.z, + basic_list, + nonbasic_list, + basis_update); mutex_original_lp_.unlock(); - if (stop_for_time_limit(solution)) { return solver_status_; } f_t remove_cuts_time = toc(remove_cuts_start_time); if (remove_cuts_time > 1.0) { settings_.log.debug("Remove cuts time %.2f seconds\n", remove_cuts_time); diff --git a/cpp/src/cuts/cuts.cpp b/cpp/src/cuts/cuts.cpp index fab6297de..bbb4d1f15 100644 --- a/cpp/src/cuts/cuts.cpp +++ b/cpp/src/cuts/cuts.cpp @@ -2503,6 +2503,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, template i_t remove_cuts(lp_problem_t& lp, const simplex_solver_settings_t& settings, + f_t start_time, csr_matrix_t& Arow, std::vector& new_slacks, i_t original_rows, @@ -2514,8 +2515,7 @@ i_t remove_cuts(lp_problem_t& lp, std::vector& z, std::vector& basic_list, std::vector& nonbasic_list, - basis_update_mpf_t& basis_update, - f_t start_time) + basis_update_mpf_t& basis_update) { std::vector cuts_to_remove; cuts_to_remove.reserve(lp.num_rows - original_rows); @@ -2800,6 +2800,7 @@ template int add_cuts(const simplex_solver_settings_t& settings, template int remove_cuts(lp_problem_t& lp, const simplex_solver_settings_t& settings, + double start_time, csr_matrix_t& Arow, std::vector& new_slacks, int original_rows, @@ -2811,8 +2812,7 @@ template int remove_cuts(lp_problem_t& lp, std::vector& z, std::vector& basic_list, std::vector& nonbasic_list, - basis_update_mpf_t& basis_update, - double start_time); + basis_update_mpf_t& basis_update); template void read_saved_solution_for_cut_verification( const lp_problem_t& lp, diff --git a/cpp/src/cuts/cuts.hpp b/cpp/src/cuts/cuts.hpp index 96df6240b..4f55e96e4 100644 --- a/cpp/src/cuts/cuts.hpp +++ b/cpp/src/cuts/cuts.hpp @@ -463,6 +463,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, template i_t remove_cuts(lp_problem_t& lp, const simplex_solver_settings_t& settings, + f_t start_time, csr_matrix_t& Arow, std::vector& new_slacks, i_t original_rows, @@ -474,7 +475,6 @@ i_t remove_cuts(lp_problem_t& lp, std::vector& z, std::vector& basic_list, std::vector& nonbasic_list, - basis_update_mpf_t& basis_update, - f_t start_time); + basis_update_mpf_t& basis_update); } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp index 9a8cfc143..1c17fc557 100644 --- a/cpp/src/dual_simplex/basis_solves.cpp +++ b/cpp/src/dual_simplex/basis_solves.cpp @@ -361,11 +361,11 @@ i_t factorize_basis(const csc_matrix_t& A, settings, settings.threshold_partial_pivoting_tol, identity, + start_time, S_col_perm, SL, SU, - S_perm_inv, - start_time); + S_perm_inv); if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return CONCURRENT_HALT_RETURN; } @@ -570,7 +570,7 @@ i_t factorize_basis(const csc_matrix_t& A, } q.resize(m); f_t fact_start = tic(); - rank = right_looking_lu(A, settings, medium_tol, basic_list, q, L, U, pinv, start_time); + rank = right_looking_lu(A, settings, medium_tol, basic_list, start_time, q, L, U, pinv); if (rank < 0) { if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return CONCURRENT_HALT_RETURN; diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 4c55e1f19..0f3a9bc2b 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -577,11 +577,11 @@ i_t right_looking_lu(const csc_matrix_t& A, const simplex_solver_settings_t& settings, f_t tol, const VectorI& column_list, + f_t start_time, VectorI& q, csc_matrix_t& L, csc_matrix_t& U, - VectorI& pinv, - f_t start_time) + VectorI& pinv) { raft::common::nvtx::range scope("LU::right_looking_lu"); const i_t n = column_list.size(); @@ -635,10 +635,10 @@ i_t right_looking_lu(const csc_matrix_t& A, i_t pivots = 0; for (i_t k = 0; k < n; ++k) { - if (toc(start_time) > settings.time_limit) { return TIME_LIMIT_RETURN; } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return CONCURRENT_HALT_RETURN; } + if (toc(start_time) > settings.time_limit) { return TIME_LIMIT_RETURN; } // Find pivot that satisfies // abs(pivot) >= abstol, // abs(pivot) >= threshold_tol * max abs[pivot column] @@ -1154,22 +1154,22 @@ template int right_looking_lu>( const simplex_solver_settings_t& settings, double tol, const std::vector& column_list, + double start_time, std::vector& q, csc_matrix_t& L, csc_matrix_t& U, - std::vector& pinv, - double start_time); + std::vector& pinv); template int right_looking_lu>( const csc_matrix_t& A, const simplex_solver_settings_t& settings, double tol, const ins_vector& column_list, + double start_time, ins_vector& q, csc_matrix_t& L, csc_matrix_t& U, - ins_vector& pinv, - double start_time); + ins_vector& pinv); template int right_looking_lu_row_permutation_only( const csc_matrix_t& A, diff --git a/cpp/src/dual_simplex/right_looking_lu.hpp b/cpp/src/dual_simplex/right_looking_lu.hpp index 3f83da42f..02883ef75 100644 --- a/cpp/src/dual_simplex/right_looking_lu.hpp +++ b/cpp/src/dual_simplex/right_looking_lu.hpp @@ -19,11 +19,11 @@ i_t right_looking_lu(const csc_matrix_t& A, const simplex_solver_settings_t& settings, f_t tol, const VectorI& column_list, + f_t start_time, VectorI& q, csc_matrix_t& L, csc_matrix_t& U, - VectorI& pinv, - f_t start_time); + VectorI& pinv); template i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, From 8f93926a87e0d45b4378d72a93325ab7e6a78ea3 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 18 Feb 2026 06:14:50 -0800 Subject: [PATCH 12/44] revert scaling --- cpp/include/cuopt/linear_programming/mip/solver_settings.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 60534a36f..6d32cd5ed 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -107,7 +107,7 @@ class mip_solver_settings_t { /** Initial primal solutions */ std::vector>> initial_solutions; - bool mip_scaling = true; + bool mip_scaling = false; presolver_t presolver{presolver_t::Default}; /** * @brief Determinism mode for MIP solver. From 80e265079de3afd3af56af1c24f782f1e82fdda2 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 18 Feb 2026 08:13:27 -0800 Subject: [PATCH 13/44] initial row scaling for mip --- cpp/src/mip_heuristics/CMakeLists.txt | 1 + .../diversity/diversity_manager.cu | 1 + .../mip_heuristics/diversity/population.cu | 11 +- .../diversity/recombiners/sub_mip.cuh | 17 +- .../mip_heuristics/mip_scaling_strategy.cu | 265 ++++++++++++++++++ .../mip_heuristics/mip_scaling_strategy.cuh | 31 ++ cpp/src/mip_heuristics/solve.cu | 52 +--- cpp/src/mip_heuristics/solver.cu | 2 +- cpp/src/mip_heuristics/solver.cuh | 2 +- cpp/src/mip_heuristics/solver_context.cuh | 6 +- cpp/tests/mip/feasibility_jump_tests.cu | 12 +- cpp/tests/mip/load_balancing_test.cu | 13 +- cpp/tests/mip/multi_probe_test.cu | 14 +- 13 files changed, 325 insertions(+), 102 deletions(-) create mode 100644 cpp/src/mip_heuristics/mip_scaling_strategy.cu create mode 100644 cpp/src/mip_heuristics/mip_scaling_strategy.cuh diff --git a/cpp/src/mip_heuristics/CMakeLists.txt b/cpp/src/mip_heuristics/CMakeLists.txt index 538e3c49a..410736654 100644 --- a/cpp/src/mip_heuristics/CMakeLists.txt +++ b/cpp/src/mip_heuristics/CMakeLists.txt @@ -18,6 +18,7 @@ set(MIP_LP_NECESSARY_FILES # Files that are MIP-specific and not needed for pure LP set(MIP_NON_LP_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/mip_scaling_strategy.cu ${CMAKE_CURRENT_SOURCE_DIR}/solve.cu ${CMAKE_CURRENT_SOURCE_DIR}/solver.cu ${CMAKE_CURRENT_SOURCE_DIR}/diversity/assignment_hash_map.cu diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 25b021ac3..649b4889e 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -215,6 +215,7 @@ bool diversity_manager_t::run_presolve(f_t time_limit) if (!check_bounds_sanity(*problem_ptr)) { return false; } } } + if (context.settings.mip_scaling) { context.scaling.scale_problem(); } stats.presolve_time = presolve_timer.elapsed_time(); lp_optimal_solution.resize(problem_ptr->n_variables, problem_ptr->handle_ptr->get_stream()); lp_dual_optimal_solution.resize(problem_ptr->n_constraints, diff --git a/cpp/src/mip_heuristics/diversity/population.cu b/cpp/src/mip_heuristics/diversity/population.cu index c2138c91c..4629028cd 100644 --- a/cpp/src/mip_heuristics/diversity/population.cu +++ b/cpp/src/mip_heuristics/diversity/population.cu @@ -267,10 +267,6 @@ void population_t::invoke_get_solution_callback( f_t user_bound = context.stats.get_solution_bound(); solution_t temp_sol(sol); problem_ptr->post_process_assignment(temp_sol.assignment); - if (context.settings.mip_scaling) { - rmm::device_uvector dummy(0, temp_sol.handle_ptr->get_stream()); - context.scaling.unscale_solutions(temp_sol.assignment, dummy); - } if (problem_ptr->has_papilo_presolve_data()) { problem_ptr->papilo_uncrush_assignment(temp_sol.assignment); } @@ -310,10 +306,8 @@ void population_t::run_solution_callbacks(solution_t& sol) invoke_get_solution_callback(sol, get_sol_callback); } } - // save the best objective here, because we might not have been able to return the solution to - // the user because of the unscaling that causes infeasibility. - // This prevents an issue of repaired, or a fully feasible solution being reported in the call - // back in next run. + // Save the best objective here even if callback handling later exits early. + // This prevents older solutions from being reported as "new best" in subsequent callbacks. best_feasible_objective = sol.get_objective(); } @@ -346,7 +340,6 @@ void population_t::run_solution_callbacks(solution_t& sol) incumbent_assignment.size(), sol.handle_ptr->get_stream()); - if (context.settings.mip_scaling) { context.scaling.scale_solutions(incumbent_assignment); } bool is_valid = problem_ptr->pre_process_assignment(incumbent_assignment); if (!is_valid) { return; } cuopt_assert(outside_sol.assignment.size() == incumbent_assignment.size(), diff --git a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh index b2f7f8006..02c79be15 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh @@ -71,18 +71,7 @@ class sub_mip_recombiner_t : public recombiner_t { "n_vars_from_guiding %d n_vars_from_other %d", n_vars_from_guiding, n_vars_from_other); this->compute_vars_to_fix(offspring, vars_to_fix, n_vars_from_other, n_vars_from_guiding); auto [fixed_problem, fixed_assignment, variable_map] = offspring.fix_variables(vars_to_fix); - // TODO ask Akif and Alice if this is ok - pdlp_initial_scaling_strategy_t scaling( - fixed_problem.handle_ptr, - fixed_problem, - context.settings.hyper_params.default_l_inf_ruiz_iterations, - (f_t)context.settings.hyper_params.default_alpha_pock_chambolle_rescaling, - fixed_problem.reverse_coefficients, - fixed_problem.reverse_offsets, - fixed_problem.reverse_constraints, - nullptr, - context.settings.hyper_params, - true); + mip_scaling_strategy_t scaling(fixed_problem); scaling.scale_problem(); fixed_problem.presolve_data.reset_additional_vars(fixed_problem, offspring.handle_ptr); fixed_problem.presolve_data.initialize_var_mapping(fixed_problem, offspring.handle_ptr); @@ -141,8 +130,6 @@ class sub_mip_recombiner_t : public recombiner_t { offspring.handle_ptr->sync_stream(); } if (solution_vector.size() > 0) { - rmm::device_uvector dummy(0, offspring.handle_ptr->get_stream()); - scaling.unscale_solutions(fixed_assignment, dummy); // unfix the assignment on given result no matter if it is feasible offspring.unfix_variables(fixed_assignment, variable_map); offspring @@ -174,8 +161,6 @@ class sub_mip_recombiner_t : public recombiner_t { solution.size(), offspring.handle_ptr->get_stream()); fixed_problem.post_process_assignment(fixed_assignment, false); - rmm::device_uvector dummy(0, offspring.handle_ptr->get_stream()); - scaling.unscale_solutions(fixed_assignment, dummy); sol.unfix_variables(fixed_assignment, variable_map); sol.clamp_within_bounds(); // Scaling might bring some very slight variable bound violations sol.compute_feasibility(); diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu new file mode 100644 index 000000000..9523128d4 --- /dev/null +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -0,0 +1,265 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include +#include + +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include + +namespace cuopt::linear_programming::detail { +namespace { + +constexpr int scaling_threads = 256; +constexpr int row_scaling_num_iterations = 3; +constexpr int row_scaling_k_min = -20; +constexpr int row_scaling_k_max = 20; +constexpr double nearest_pow2_mantissa_threshold = 0.7071067811865476; + +constexpr double big_m_abs_threshold = 1.0e4; +constexpr double big_m_ratio_threshold = 1.0e4; + +inline int get_num_blocks(size_t n) +{ + if (n == 0) { return 1; } + size_t blocks = (n + scaling_threads - 1) / scaling_threads; + return static_cast(std::min(65535, std::max(1, blocks))); +} + +template +__global__ void analyze_rows_for_scaling_kernel( + const typename problem_t::view_t op_problem, f_t* row_inf_norm, i_t* row_skip_scaling) +{ + for (i_t row = blockIdx.x * blockDim.x + threadIdx.x; row < op_problem.n_constraints; + row += blockDim.x * gridDim.x) { + const auto [row_start, row_end] = op_problem.range_for_constraint(row); + f_t row_norm = f_t(0); + f_t row_min_nonzero = std::numeric_limits::infinity(); + i_t row_nonzero_count = 0; + for (i_t idx = row_start; idx < row_end; ++idx) { + f_t abs_value = raft::abs(op_problem.coefficients[idx]); + if (abs_value > row_norm) { row_norm = abs_value; } + if (abs_value > f_t(0)) { + row_min_nonzero = abs_value < row_min_nonzero ? abs_value : row_min_nonzero; + row_nonzero_count++; + } + } + row_inf_norm[row] = row_norm; + + bool skip_big_m_scaling = false; + if (row_nonzero_count >= 2 && row_min_nonzero < std::numeric_limits::infinity()) { + const f_t row_ratio = row_norm / row_min_nonzero; + skip_big_m_scaling = row_norm >= static_cast(big_m_abs_threshold) && + row_ratio >= static_cast(big_m_ratio_threshold); + } + row_skip_scaling[row] = skip_big_m_scaling ? i_t(1) : i_t(0); + } +} + +template +__global__ void compute_row_inf_norm_kernel(const typename problem_t::view_t op_problem, + f_t* row_inf_norm) +{ + for (i_t row = blockIdx.x * blockDim.x + threadIdx.x; row < op_problem.n_constraints; + row += blockDim.x * gridDim.x) { + const auto [row_start, row_end] = op_problem.range_for_constraint(row); + f_t row_norm = f_t(0); + for (i_t idx = row_start; idx < row_end; ++idx) { + const f_t abs_value = raft::abs(op_problem.coefficients[idx]); + if (abs_value > row_norm) { row_norm = abs_value; } + } + row_inf_norm[row] = row_norm; + } +} + +template +__global__ void compute_row_log2_and_active_kernel(const f_t* row_inf_norm, + const i_t* row_skip_scaling, + i_t n_rows, + double* row_log2_values, + i_t* row_active) +{ + for (i_t row = blockIdx.x * blockDim.x + threadIdx.x; row < n_rows; + row += blockDim.x * gridDim.x) { + if (row_skip_scaling[row] || row_inf_norm[row] <= f_t(0)) { + row_log2_values[row] = 0.0; + row_active[row] = i_t(0); + continue; + } + row_log2_values[row] = ::log2(static_cast(row_inf_norm[row])); + row_active[row] = i_t(1); + } +} + +template +__global__ void compute_iteration_scaling_kernel(const f_t* row_inf_norm, + const i_t* row_skip_scaling, + i_t n_rows, + f_t target_norm, + f_t* iteration_scaling) +{ + for (i_t row = blockIdx.x * blockDim.x + threadIdx.x; row < n_rows; + row += blockDim.x * gridDim.x) { + if (row_skip_scaling[row] || row_inf_norm[row] <= f_t(0) || target_norm <= f_t(0)) { + iteration_scaling[row] = f_t(1); + continue; + } + const f_t desired_scaling = target_norm / row_inf_norm[row]; + if (!::isfinite(desired_scaling) || desired_scaling <= f_t(0)) { + iteration_scaling[row] = f_t(1); + continue; + } + + int exponent = 0; + f_t mantissa = ::frexp(desired_scaling, &exponent); // desired_scaling = mantissa * 2^exponent + int k = mantissa >= static_cast(nearest_pow2_mantissa_threshold) ? exponent : exponent - 1; + if (k < row_scaling_k_min) { k = row_scaling_k_min; } + if (k > row_scaling_k_max) { k = row_scaling_k_max; } + iteration_scaling[row] = ::ldexp(f_t(1), k); + } +} + +template +__global__ void apply_row_scaling_kernel(const typename problem_t::view_t op_problem, + const f_t* constraint_scaling) +{ + for (i_t row = blockIdx.x * blockDim.x + threadIdx.x; row < op_problem.n_constraints; + row += blockDim.x * gridDim.x) { + const f_t scaling = constraint_scaling[row]; + const auto [row_start, row_end] = op_problem.range_for_constraint(row); + for (i_t idx = row_start; idx < row_end; ++idx) { + op_problem.coefficients[idx] *= scaling; + } + op_problem.constraint_lower_bounds[row] *= scaling; + op_problem.constraint_upper_bounds[row] *= scaling; + } +} + +template +__global__ void apply_row_scaling_transpose_kernel( + const typename problem_t::view_t op_problem, const f_t* constraint_scaling) +{ + for (i_t var = blockIdx.x * blockDim.x + threadIdx.x; var < op_problem.n_variables; + var += blockDim.x * gridDim.x) { + const auto [start, end] = op_problem.reverse_range_for_var(var); + for (i_t idx = start; idx < end; ++idx) { + i_t row = op_problem.reverse_constraints[idx]; + op_problem.reverse_coefficients[idx] *= constraint_scaling[row]; + } + } +} + +} // namespace + +template +mip_scaling_strategy_t::mip_scaling_strategy_t(problem_t& op_problem_scaled) + : handle_ptr_(op_problem_scaled.handle_ptr), + stream_view_(handle_ptr_->get_stream()), + op_problem_scaled_(op_problem_scaled) +{ +} + +template +void mip_scaling_strategy_t::scale_problem() +{ + raft::common::nvtx::range fun_scope("mip_scale_problem"); + + const i_t n_rows = op_problem_scaled_.n_constraints; + const i_t n_cols = op_problem_scaled_.n_variables; + + if (n_rows == 0) { + op_problem_scaled_.is_scaled_ = true; + return; + } + + auto problem_view = op_problem_scaled_.view(); + int row_scaling_blocks = get_num_blocks(static_cast(n_rows)); + int transpose_num_blocks = get_num_blocks(static_cast(n_cols)); + + rmm::device_uvector row_inf_norm(static_cast(n_rows), stream_view_); + rmm::device_uvector row_skip_scaling(static_cast(n_rows), stream_view_); + rmm::device_uvector iteration_scaling(static_cast(n_rows), stream_view_); + rmm::device_uvector row_log2_values(static_cast(n_rows), stream_view_); + rmm::device_uvector row_active(static_cast(n_rows), stream_view_); + + analyze_rows_for_scaling_kernel + <<>>( + problem_view, row_inf_norm.data(), row_skip_scaling.data()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + for (int iteration = 0; iteration < row_scaling_num_iterations; ++iteration) { + compute_row_inf_norm_kernel + <<>>(problem_view, row_inf_norm.data()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + compute_row_log2_and_active_kernel + <<>>(row_inf_norm.data(), + row_skip_scaling.data(), + n_rows, + row_log2_values.data(), + row_active.data()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + double log2_sum = thrust::reduce(handle_ptr_->get_thrust_policy(), + row_log2_values.begin(), + row_log2_values.end(), + 0.0, + thrust::plus()); + i_t active_row_count = thrust::reduce(handle_ptr_->get_thrust_policy(), + row_active.begin(), + row_active.end(), + i_t(0), + thrust::plus()); + if (active_row_count == 0) { break; } + + f_t target_norm = static_cast(::exp2(log2_sum / static_cast(active_row_count))); + if (!::isfinite(target_norm) || target_norm <= f_t(0)) { break; } + + compute_iteration_scaling_kernel + <<>>(row_inf_norm.data(), + row_skip_scaling.data(), + n_rows, + target_norm, + iteration_scaling.data()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + apply_row_scaling_kernel<<>>( + problem_view, iteration_scaling.data()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + + if (n_cols > 0) { + apply_row_scaling_transpose_kernel + <<>>(problem_view, + iteration_scaling.data()); + RAFT_CUDA_TRY(cudaPeekAtLastError()); + } + } + + op_problem_scaled_.is_scaled_ = true; +} + +#define INSTANTIATE(F_TYPE) template class mip_scaling_strategy_t; + +#if MIP_INSTANTIATE_FLOAT +INSTANTIATE(float) +#endif + +#if MIP_INSTANTIATE_DOUBLE +INSTANTIATE(double) +#endif + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cuh b/cpp/src/mip_heuristics/mip_scaling_strategy.cuh new file mode 100644 index 000000000..51fe4187c --- /dev/null +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cuh @@ -0,0 +1,31 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include + +#include + +#include + +namespace cuopt::linear_programming::detail { + +template +class mip_scaling_strategy_t { + public: + explicit mip_scaling_strategy_t(problem_t& op_problem_scaled); + + void scale_problem(); + + private: + raft::handle_t const* handle_ptr_{nullptr}; + rmm::cuda_stream_view stream_view_; + problem_t& op_problem_scaled_; +}; + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 1d29cba4d..0b21c83a9 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -8,13 +8,13 @@ #include #include +#include #include #include #include #include #include -#include #include #include #include @@ -26,7 +26,6 @@ #include #include -#include #include #include @@ -57,12 +56,7 @@ mip_solution_t run_mip(detail::problem_t& problem, timer_t& timer) { raft::common::nvtx::range fun_scope("run_mip"); - auto constexpr const running_mip = true; - // TODO ask Akif and Alice how was this passed down? - auto hyper_params = settings.hyper_params; - hyper_params.update_primal_weight_on_initial_solution = false; - hyper_params.update_step_size_on_initial_solution = true; if (settings.get_mip_callbacks().size() > 0) { auto callback_num_variables = problem.original_problem_ptr->get_n_variables(); if (problem.has_papilo_presolve_data()) { @@ -126,47 +120,27 @@ mip_solution_t run_mip(detail::problem_t& problem, "Size mismatch"); cuopt_assert(problem.original_problem_ptr->get_n_constraints() == scaled_problem.n_constraints, "Size mismatch"); - detail::pdlp_initial_scaling_strategy_t scaling( - scaled_problem.handle_ptr, - scaled_problem, - hyper_params.default_l_inf_ruiz_iterations, - (f_t)hyper_params.default_alpha_pock_chambolle_rescaling, - scaled_problem.reverse_coefficients, - scaled_problem.reverse_offsets, - scaled_problem.reverse_constraints, - nullptr, - hyper_params, - running_mip); - - cuopt_func_call(auto saved_problem = scaled_problem); - if (settings.mip_scaling) { - scaling.scale_problem(); - if (settings.initial_solutions.size() > 0) { - for (const auto& initial_solution : settings.initial_solutions) { - scaling.scale_primal(*initial_solution); - } - } - } + detail::mip_scaling_strategy_t scaling(scaled_problem); + // only call preprocess on scaled problem, so we can compute feasibility on the original problem scaled_problem.preprocess_problem(); - // cuopt_func_call((check_scaled_problem(scaled_problem, saved_problem))); detail::trivial_presolve(scaled_problem); detail::mip_solver_t solver(scaled_problem, settings, scaling, timer); - auto scaled_sol = solver.run_solver(); - bool is_feasible_before_scaling = scaled_sol.get_feasible(); - scaled_sol.problem_ptr = &problem; - if (settings.mip_scaling) { scaling.unscale_solutions(scaled_sol); } - // at this point we need to compute the feasibility on the original problem not the presolved one - bool is_feasible_after_unscaling = scaled_sol.compute_feasibility(); - if (!scaled_problem.empty && is_feasible_before_scaling != is_feasible_after_unscaling) { + auto scaled_sol = solver.run_solver(); + bool is_feasible_on_scaled_problem = scaled_sol.get_feasible(); + scaled_sol.problem_ptr = &problem; + bool is_feasible_on_original_problem = scaled_sol.compute_feasibility(); + if (!scaled_problem.empty && is_feasible_on_scaled_problem != is_feasible_on_original_problem) { CUOPT_LOG_WARN( - "The feasibility does not match on scaled and unscaled problems. To overcome this issue, " + "The feasibility does not match on scaled and original problems. To overcome this issue, " "please provide a more numerically stable problem."); } - auto sol = scaled_sol.get_solution( - is_feasible_before_scaling || is_feasible_after_unscaling, solver.get_solver_stats(), false); + auto sol = + scaled_sol.get_solution(is_feasible_on_scaled_problem || is_feasible_on_original_problem, + solver.get_solver_stats(), + false); int hidesol = std::getenv("CUOPT_MIP_HIDE_SOLUTION") ? atoi(std::getenv("CUOPT_MIP_HIDE_SOLUTION")) : 0; diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index 6300114c0..483d46d19 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -41,7 +41,7 @@ static void init_handler(const raft::handle_t* handle_ptr) template mip_solver_t::mip_solver_t(const problem_t& op_problem, const mip_solver_settings_t& solver_settings, - pdlp_initial_scaling_strategy_t& scaling, + mip_scaling_strategy_t& scaling, timer_t timer) : op_problem_(op_problem), solver_settings_(solver_settings), diff --git a/cpp/src/mip_heuristics/solver.cuh b/cpp/src/mip_heuristics/solver.cuh index 1b5fe1724..ace58022d 100644 --- a/cpp/src/mip_heuristics/solver.cuh +++ b/cpp/src/mip_heuristics/solver.cuh @@ -20,7 +20,7 @@ class mip_solver_t { public: explicit mip_solver_t(const problem_t& op_problem, const mip_solver_settings_t& solver_settings, - pdlp_initial_scaling_strategy_t& scaling, + mip_scaling_strategy_t& scaling, timer_t timer); solution_t run_solver(); diff --git a/cpp/src/mip_heuristics/solver_context.cuh b/cpp/src/mip_heuristics/solver_context.cuh index baac1dd9d..d05367a35 100644 --- a/cpp/src/mip_heuristics/solver_context.cuh +++ b/cpp/src/mip_heuristics/solver_context.cuh @@ -7,9 +7,9 @@ #include +#include #include #include -#include #include #include @@ -35,7 +35,7 @@ struct mip_solver_context_t { explicit mip_solver_context_t(raft::handle_t const* handle_ptr_, problem_t* problem_ptr_, mip_solver_settings_t settings_, - pdlp_initial_scaling_strategy_t& scaling) + mip_scaling_strategy_t& scaling) : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_), scaling(scaling) { cuopt_assert(problem_ptr != nullptr, "problem_ptr is nullptr"); @@ -53,7 +53,7 @@ struct mip_solver_context_t { diversity_manager_t* diversity_manager_ptr{nullptr}; std::atomic preempt_heuristic_solver_ = false; const mip_solver_settings_t settings; - pdlp_initial_scaling_strategy_t& scaling; + mip_scaling_strategy_t& scaling; solver_stats_t stats; // Work limit context for tracking work units in deterministic mode (shared across all timers in // GPU heuristic loop) diff --git a/cpp/tests/mip/feasibility_jump_tests.cu b/cpp/tests/mip/feasibility_jump_tests.cu index baa3e9b80..4e8a51852 100644 --- a/cpp/tests/mip/feasibility_jump_tests.cu +++ b/cpp/tests/mip/feasibility_jump_tests.cu @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -77,16 +78,7 @@ static fj_state_t run_fj(std::string test_instance, // run the problem constructor of MIP, so that we do bounds standardization detail::problem_t problem(op_problem); problem.preprocess_problem(); - detail::pdhg_solver_t pdhg_solver(problem.handle_ptr, problem); - detail::pdlp_initial_scaling_strategy_t scaling(&handle_, - problem, - 10, - 1.0, - pdhg_solver, - problem.reverse_coefficients, - problem.reverse_offsets, - problem.reverse_constraints, - true); + detail::mip_scaling_strategy_t scaling(problem); auto settings = mip_solver_settings_t{}; settings.time_limit = 30.; diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu index 5e2f08007..1f825a26f 100644 --- a/cpp/tests/mip/load_balancing_test.cu +++ b/cpp/tests/mip/load_balancing_test.cu @@ -9,11 +9,11 @@ #include "mip_utils.cuh" #include +#include #include #include #include #include -#include #include #include #include @@ -128,16 +128,7 @@ void test_multi_probe(std::string path) problem_checking_t::check_problem_representation(op_problem); detail::problem_t problem(op_problem); mip_solver_settings_t default_settings{}; - detail::pdhg_solver_t pdhg_solver(problem.handle_ptr, problem); - detail::pdlp_initial_scaling_strategy_t scaling(&handle_, - problem, - 10, - 1.0, - problem.reverse_coefficients, - problem.reverse_offsets, - problem.reverse_constraints, - nullptr, - true); + detail::mip_scaling_strategy_t scaling(problem); detail::mip_solver_t solver(problem, default_settings, scaling, cuopt::timer_t(0)); detail::load_balanced_problem_t lb_problem(problem); detail::load_balanced_bounds_presolve_t lb_prs(lb_problem, solver.context); diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu index 073c15348..8f415e878 100644 --- a/cpp/tests/mip/multi_probe_test.cu +++ b/cpp/tests/mip/multi_probe_test.cu @@ -9,10 +9,10 @@ #include "mip_utils.cuh" #include +#include #include #include #include -#include #include #include #include @@ -150,17 +150,7 @@ void test_multi_probe(std::string path) problem_checking_t::check_problem_representation(op_problem); detail::problem_t problem(op_problem); mip_solver_settings_t default_settings{}; - pdlp_hyper_params::pdlp_hyper_params_t hyper_params{}; - detail::pdlp_initial_scaling_strategy_t scaling(&handle_, - problem, - 10, - 1.0, - problem.reverse_coefficients, - problem.reverse_offsets, - problem.reverse_constraints, - nullptr, - hyper_params, - true); + detail::mip_scaling_strategy_t scaling(problem); detail::mip_solver_t solver(problem, default_settings, scaling, cuopt::timer_t(0)); detail::bound_presolve_t bnd_prb_0(solver.context); detail::bound_presolve_t bnd_prb_1(solver.context); From f021ba34e50a6ccf47046a0b2b330c9418f3fa77 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 18 Feb 2026 21:57:28 -0800 Subject: [PATCH 14/44] fix pdlp issues and finalize lp scaling --- .../mip/solver_settings.hpp | 2 +- .../diversity/diversity_manager.cu | 1 + .../mip_heuristics/mip_scaling_strategy.cu | 13 +++++ .../mip_heuristics/relaxed_lp/relaxed_lp.cu | 1 - .../initial_scaling.cu | 11 +++++ cpp/src/pdlp/solve.cu | 48 +++++++++++++++++-- 6 files changed, 70 insertions(+), 6 deletions(-) diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 6d32cd5ed..60534a36f 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -107,7 +107,7 @@ class mip_solver_settings_t { /** Initial primal solutions */ std::vector>> initial_solutions; - bool mip_scaling = false; + bool mip_scaling = true; presolver_t presolver{presolver_t::Default}; /** * @brief Determinism mode for MIP solver. diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 649b4889e..edf124b92 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -405,6 +405,7 @@ solution_t diversity_manager_t::run_solver() pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; pdlp_settings.num_gpus = context.settings.num_gpus; pdlp_settings.presolver = presolver_t::None; + set_pdlp_solver_mode(pdlp_settings); timer_t lp_timer(lp_time_limit); auto lp_result = solve_lp_with_method(*problem_ptr, pdlp_settings, lp_timer); diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu index 9523128d4..84b5d1da2 100644 --- a/cpp/src/mip_heuristics/mip_scaling_strategy.cu +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -200,6 +201,17 @@ void mip_scaling_strategy_t::scale_problem() <<>>( problem_view, row_inf_norm.data(), row_skip_scaling.data()); RAFT_CUDA_TRY(cudaPeekAtLastError()); + i_t skipped_big_m_rows = thrust::reduce(handle_ptr_->get_thrust_policy(), + row_skip_scaling.begin(), + row_skip_scaling.end(), + i_t(0), + thrust::plus()); + + CUOPT_LOG_INFO("MIP row scaling start: rows=%d cols=%d iterations=%d skip_big_m_rows=%d", + n_rows, + n_cols, + row_scaling_num_iterations, + skipped_big_m_rows); for (int iteration = 0; iteration < row_scaling_num_iterations; ++iteration) { compute_row_inf_norm_kernel @@ -250,6 +262,7 @@ void mip_scaling_strategy_t::scale_problem() } op_problem_scaled_.is_scaled_ = true; + CUOPT_LOG_INFO("MIP row scaling completed"); } #define INSTANTIATE(F_TYPE) template class mip_scaling_strategy_t; diff --git a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu index 235745c60..ea1bbd109 100644 --- a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu +++ b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu @@ -8,7 +8,6 @@ #include "relaxed_lp.cuh" #include -#include #include #include #include diff --git a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu index 031cd9c3b..da7edb99d 100644 --- a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu +++ b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -417,6 +418,15 @@ template void pdlp_initial_scaling_strategy_t::scale_problem() { raft::common::nvtx::range fun_scope("scale_problem"); + CUOPT_LOG_INFO( + "PDLP initial scaling start: rows=%d cols=%d ruiz=%d pock_chambolle=%d " + "bound_objective_rescaling=%d running_mip=%d", + dual_size_h_, + primal_size_h_, + static_cast(hyper_params_.do_ruiz_scaling), + static_cast(hyper_params_.do_pock_chambolle_scaling), + static_cast(hyper_params_.bound_objective_rescaling && !running_mip_), + static_cast(running_mip_)); // scale A i_t number_of_blocks = op_problem_scaled_.n_constraints / block_size; @@ -565,6 +575,7 @@ void pdlp_initial_scaling_strategy_t::scale_problem() if (!running_mip_) { scale_solutions(pdhg_solver_ptr_->get_primal_solution(), pdhg_solver_ptr_->get_dual_solution()); } + CUOPT_LOG_INFO("PDLP initial scaling completed"); } template diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index a2766be98..a9474dc48 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -950,6 +950,35 @@ optimization_problem_solution_t run_concurrent( global_concurrent_halt = 0; settings_pdlp.concurrent_halt = &global_concurrent_halt; + detail::problem_t* concurrent_problem = &problem; + std::unique_ptr> pre_scaled_problem_ptr; + std::unique_ptr> concurrent_scaling_ptr; + + // For MIP concurrent solves (PDLP + Barrier), pre-scale once with PDLP scaling and run both + // methods on the same scaled problem. + if (settings.inside_mip) { + pre_scaled_problem_ptr = + std::make_unique>(problem, false /* no_deep_copy */); + concurrent_scaling_ptr = std::make_unique>( + problem.handle_ptr, + *pre_scaled_problem_ptr, + settings_pdlp.hyper_params.default_l_inf_ruiz_iterations, + static_cast(settings_pdlp.hyper_params.default_alpha_pock_chambolle_rescaling), + pre_scaled_problem_ptr->reverse_coefficients, + pre_scaled_problem_ptr->reverse_offsets, + pre_scaled_problem_ptr->reverse_constraints, + nullptr, + settings_pdlp.hyper_params, + true); + concurrent_scaling_ptr->scale_problem(); + concurrent_problem = pre_scaled_problem_ptr.get(); + + // Avoid scaling twice on the PDLP branch once the shared pre-scaling has been applied. + settings_pdlp.hyper_params.do_ruiz_scaling = false; + settings_pdlp.hyper_params.do_pock_chambolle_scaling = false; + settings_pdlp.hyper_params.bound_objective_rescaling = false; + } + // Make sure allocations are done on the original stream problem.handle_ptr->sync_stream(); @@ -965,7 +994,7 @@ optimization_problem_solution_t run_concurrent( // Otherwise, CUDA API calls to the problem stream may occur in both threads and throw graph // capture off dual_simplex::user_problem_t dual_simplex_problem = - cuopt_problem_to_simplex_problem(problem.handle_ptr, problem); + cuopt_problem_to_simplex_problem(problem.handle_ptr, *concurrent_problem); // Create a thread for dual simplex std::unique_ptr< std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t>> @@ -1010,7 +1039,7 @@ optimization_problem_solution_t run_concurrent( CUOPT_LOG_DEBUG("PDLP device: %d", raft::device_setter::get_current_device()); } // Run pdlp in the main thread - auto sol_pdlp = run_pdlp(problem, settings_pdlp, timer, is_batch_mode); + auto sol_pdlp = run_pdlp(*concurrent_problem, settings_pdlp, timer, is_batch_mode); // Wait for dual simplex thread to finish if (!settings.inside_mip) { dual_simplex_thread.join(); } @@ -1020,7 +1049,7 @@ optimization_problem_solution_t run_concurrent( // copy the dual simplex solution to the device auto sol_dual_simplex = !settings.inside_mip - ? convert_dual_simplex_sol(problem, + ? convert_dual_simplex_sol(*concurrent_problem, std::get<0>(*sol_dual_simplex_ptr), std::get<1>(*sol_dual_simplex_ptr), std::get<2>(*sol_dual_simplex_ptr), @@ -1031,7 +1060,7 @@ optimization_problem_solution_t run_concurrent( problem.handle_ptr->get_stream()}; // copy the barrier solution to the device - auto sol_barrier = convert_dual_simplex_sol(problem, + auto sol_barrier = convert_dual_simplex_sol(*concurrent_problem, std::get<0>(*sol_barrier_ptr), std::get<1>(*sol_barrier_ptr), std::get<2>(*sol_barrier_ptr), @@ -1039,6 +1068,17 @@ optimization_problem_solution_t run_concurrent( std::get<4>(*sol_barrier_ptr), 1); + if (concurrent_scaling_ptr) { + concurrent_scaling_ptr->unscale_solutions( + sol_pdlp.get_primal_solution(), sol_pdlp.get_dual_solution(), sol_pdlp.get_reduced_cost()); + concurrent_scaling_ptr->unscale_solutions(sol_dual_simplex.get_primal_solution(), + sol_dual_simplex.get_dual_solution(), + sol_dual_simplex.get_reduced_cost()); + concurrent_scaling_ptr->unscale_solutions(sol_barrier.get_primal_solution(), + sol_barrier.get_dual_solution(), + sol_barrier.get_reduced_cost()); + } + f_t end_time = timer.elapsed_time(); CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "Concurrent time: %.3fs", end_time); // Check status to see if we should return the pdlp solution or the dual simplex solution From 609c578c19312fc5c284892841120e8637f1145e Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 18 Feb 2026 22:15:36 -0800 Subject: [PATCH 15/44] move timer with inout parameters --- cpp/src/branch_and_bound/branch_and_bound.cpp | 54 +++++-------------- cpp/src/cuts/cuts.cpp | 2 +- cpp/src/dual_simplex/basis_solves.cpp | 8 +-- cpp/src/dual_simplex/basis_solves.hpp | 4 +- cpp/src/dual_simplex/basis_updates.cpp | 12 ++--- cpp/src/dual_simplex/basis_updates.hpp | 4 +- cpp/src/dual_simplex/crossover.cpp | 16 +++--- cpp/src/dual_simplex/phase2.cpp | 6 +-- cpp/src/dual_simplex/primal.cpp | 4 +- 9 files changed, 42 insertions(+), 68 deletions(-) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 30b913e6e..054c7e23a 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -1886,15 +1886,11 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( root_crossover_settings, original_lp_.lower, original_lp_.upper, + exploration_stats_.start_time, basic_list, nonbasic_list, - crossover_vstatus_, - exploration_stats_.start_time); - if (refactor_status == TIME_LIMIT_RETURN) { - root_status = lp_status_t::TIME_LIMIT; - } else if (refactor_status == CONCURRENT_HALT_RETURN) { - root_status = lp_status_t::CONCURRENT_LIMIT; - } else if (refactor_status != 0) { + crossover_vstatus_); + if (refactor_status != 0) { settings_.log.printf("Failed to refactor basis. %d deficient columns.\n", refactor_status); assert(refactor_status == 0); root_status = lp_status_t::NUMERICAL_ISSUES; @@ -1906,15 +1902,6 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( user_objective = root_crossover_soln_.user_objective; iter = root_crossover_soln_.iterations; solver_name = "Barrier/PDLP and Crossover"; - } else if (crossover_status == crossover_status_t::TIME_LIMIT || - toc(exploration_stats_.start_time) > settings_.time_limit) { - set_root_concurrent_halt(1); - root_status = root_status_future.get(); - set_root_concurrent_halt(0); - root_status = lp_status_t::TIME_LIMIT; - user_objective = root_relax_soln_.user_objective; - iter = root_relax_soln_.iterations; - solver_name = "Dual Simplex"; } else { root_status = root_status_future.get(); user_objective = root_relax_soln_.user_objective; @@ -1946,18 +1933,6 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( return root_status; } -template -bool branch_and_bound_t::stop_for_time_limit(mip_solution_t& solution) -{ - const f_t elapsed = toc(exploration_stats_.start_time); - if (elapsed > settings_.time_limit) { - solver_status_ = mip_status_t::TIME_LIMIT; - set_final_solution(solution, root_objective_); - return true; - } - return false; -} - template mip_status_t branch_and_bound_t::solve(mip_solution_t& solution) { @@ -2029,9 +2004,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (root_status == lp_status_t::INFEASIBLE) { settings_.log.printf("MIP Infeasible\n"); - if (settings_.heuristic_preemption_callback != nullptr) { - settings_.heuristic_preemption_callback(); - } + // FIXME: rarely dual simplex detects infeasible whereas it is feasible. + // to add a small safety net, check if there is a primal solution already. + // Uncomment this if the issue with cost266-UUE is resolved + // if (settings.heuristic_preemption_callback != nullptr) { + // settings.heuristic_preemption_callback(); + // } return mip_status_t::INFEASIBLE; } if (root_status == lp_status_t::UNBOUNDED) { @@ -2148,7 +2126,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut // Score the cuts f_t score_start_time = tic(); cut_pool.score_cuts(root_relax_soln_.x); - if (stop_for_time_limit(solution)) { return solver_status_; } f_t score_time = toc(score_start_time); if (score_time > 1.0) { settings_.log.debug("Cut scoring time %.2f seconds\n", score_time); } // Get the best cuts from the cut pool @@ -2276,8 +2253,11 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (dual_phase2_time > 1.0) { settings_.log.debug("Dual phase2 time %.2f seconds\n", dual_phase2_time); } - - if (stop_for_time_limit(solution)) { return solver_status_; } + if (cut_status == dual::status_t::TIME_LIMIT) { + solver_status_ = mip_status_t::TIME_LIMIT; + set_final_solution(solution, root_objective_); + return solver_status_; + } if (cut_status != dual::status_t::OPTIMAL) { settings_.log.printf("Numerical issue at root node. Resolving from scratch\n"); @@ -2291,7 +2271,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut nonbasic_list, root_vstatus_, edge_norms_); - if (stop_for_time_limit(solution)) { return solver_status_; } if (scratch_status == lp_status_t::OPTIMAL) { // We recovered cut_status = convert_lp_status_to_dual_status(scratch_status); @@ -2372,11 +2351,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_); pc_.resize(original_lp_.num_cols); - if (toc(exploration_stats_.start_time) >= settings_.time_limit) { - solver_status_ = mip_status_t::TIME_LIMIT; - set_final_solution(solution, root_objective_); - return solver_status_; - } { raft::common::nvtx::range scope_sb("BB::strong_branching"); strong_branching(original_problem_, diff --git a/cpp/src/cuts/cuts.cpp b/cpp/src/cuts/cuts.cpp index bbb4d1f15..ad1d34f7a 100644 --- a/cpp/src/cuts/cuts.cpp +++ b/cpp/src/cuts/cuts.cpp @@ -2646,7 +2646,7 @@ i_t remove_cuts(lp_problem_t& lp, basis_update.resize(lp.num_rows); i_t refactor_status = basis_update.refactor_basis( - lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus, start_time); + lp.A, settings, lp.lower, lp.upper, start_time, basic_list, nonbasic_list, vstatus); if (refactor_status == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } if (refactor_status == TIME_LIMIT_RETURN) { return TIME_LIMIT_RETURN; } } diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp index 1c17fc557..2189dd57b 100644 --- a/cpp/src/dual_simplex/basis_solves.cpp +++ b/cpp/src/dual_simplex/basis_solves.cpp @@ -159,14 +159,14 @@ template i_t factorize_basis(const csc_matrix_t& A, const simplex_solver_settings_t& settings, const std::vector& basic_list, + f_t start_time, csc_matrix_t& L, csc_matrix_t& U, std::vector& p, std::vector& pinv, std::vector& q, std::vector& deficient, - std::vector& slacks_needed, - f_t start_time) + std::vector& slacks_needed) { raft::common::nvtx::range scope("LU::factorize_basis"); const i_t m = basic_list.size(); @@ -875,14 +875,14 @@ template void get_basis_from_vstatus(int m, template int factorize_basis(const csc_matrix_t& A, const simplex_solver_settings_t& settings, const std::vector& basis_list, + double start_time, csc_matrix_t& L, csc_matrix_t& U, std::vector& p, std::vector& pinv, std::vector& q, std::vector& deficient, - std::vector& slacks_needed, - double start_time); + std::vector& slacks_needed); template int basis_repair(const csc_matrix_t& A, const simplex_solver_settings_t& settings, diff --git a/cpp/src/dual_simplex/basis_solves.hpp b/cpp/src/dual_simplex/basis_solves.hpp index 13227a832..3050e791e 100644 --- a/cpp/src/dual_simplex/basis_solves.hpp +++ b/cpp/src/dual_simplex/basis_solves.hpp @@ -30,14 +30,14 @@ template i_t factorize_basis(const csc_matrix_t& A, const simplex_solver_settings_t& settings, const std::vector& basis_list, + f_t start_time, csc_matrix_t& L, csc_matrix_t& U, std::vector& p, std::vector& pinv, std::vector& q, std::vector& deficient, - std::vector& slacks_need, - f_t start_time); + std::vector& slacks_need); // Repair the basis by bringing in slacks template diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 5f8ef4934..7814d57e7 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -2262,10 +2262,10 @@ int basis_update_mpf_t::refactor_basis( const simplex_solver_settings_t& settings, const std::vector& lower, const std::vector& upper, + f_t start_time, std::vector& basic_list, std::vector& nonbasic_list, - std::vector& vstatus, - f_t start_time) + std::vector& vstatus) { raft::common::nvtx::range scope("LU::refactor_basis"); std::vector deficient; @@ -2277,14 +2277,14 @@ int basis_update_mpf_t::refactor_basis( i_t status = factorize_basis(A, settings, basic_list, + start_time, L0_, U0_, row_permutation_, inverse_row_permutation_, q, deficient, - slacks_needed, - start_time); + slacks_needed); if (status == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } if (status == TIME_LIMIT_RETURN) { return TIME_LIMIT_RETURN; } if (status == -1) { @@ -2320,14 +2320,14 @@ int basis_update_mpf_t::refactor_basis( status = factorize_basis(A, settings, basic_list, + start_time, L0_, U0_, row_permutation_, inverse_row_permutation_, q, deficient, - slacks_needed, - start_time); + slacks_needed); if (status == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } if (status == TIME_LIMIT_RETURN) { return TIME_LIMIT_RETURN; } if (status == -1) { diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index 6fd7b13f3..8c4c6da29 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -380,10 +380,10 @@ class basis_update_mpf_t { const simplex_solver_settings_t& settings, const std::vector& lower, const std::vector& upper, + f_t start_time, std::vector& basic_list, std::vector& nonbasic_list, - std::vector& vstatus, - f_t start_time); + std::vector& vstatus); void set_refactor_frequency(i_t new_frequency) { refactor_frequency_ = new_frequency; } diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 626b15035..b044a8678 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -506,7 +506,7 @@ i_t dual_push(const lp_problem_t& lp, std::vector deficient; std::vector slacks_needed; i_t rank = factorize_basis( - lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); + lp.A, settings, basic_list, start_time, L, U, p, pinv, q, deficient, slacks_needed); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } else if (rank < 0) { @@ -524,7 +524,7 @@ i_t dual_push(const lp_problem_t& lp, superbasic_list, vstatus); rank = factorize_basis( - lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); + lp.A, settings, basic_list, start_time, L, U, p, pinv, q, deficient, slacks_needed); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } else if (rank < 0) { @@ -862,7 +862,7 @@ i_t primal_push(const lp_problem_t& lp, std::vector deficient; std::vector slacks_needed; i_t rank = factorize_basis( - lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); + lp.A, settings, basic_list, start_time, L, U, p, pinv, q, deficient, slacks_needed); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } else if (rank < 0) { @@ -883,7 +883,7 @@ i_t primal_push(const lp_problem_t& lp, find_primal_superbasic_variables( lp, settings, solution, solution, vstatus, nonbasic_list, superbasic_list); rank = factorize_basis( - lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); + lp.A, settings, basic_list, start_time, L, U, p, pinv, q, deficient, slacks_needed); if (rank == CONCURRENT_HALT_RETURN) { return CONCURRENT_HALT_RETURN; } else if (rank < 0) { @@ -1226,7 +1226,7 @@ crossover_status_t crossover(const lp_problem_t& lp, std::vector slacks_needed; rank = factorize_basis( - lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); + lp.A, settings, basic_list, start_time, L, U, p, pinv, q, deficient, slacks_needed); if (rank < 0) { return return_to_status(rank); } if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); @@ -1241,7 +1241,7 @@ crossover_status_t crossover(const lp_problem_t& lp, superbasic_list, vstatus); rank = factorize_basis( - lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); + lp.A, settings, basic_list, start_time, L, U, p, pinv, q, deficient, slacks_needed); if (rank == CONCURRENT_HALT_RETURN) { return crossover_status_t::CONCURRENT_LIMIT; } else if (rank < 0) { @@ -1397,7 +1397,7 @@ crossover_status_t crossover(const lp_problem_t& lp, superbasic_list.clear(); get_basis_from_vstatus(m, vstatus, basic_list, nonbasic_list, superbasic_list); rank = factorize_basis( - lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); + lp.A, settings, basic_list, start_time, L, U, p, pinv, q, deficient, slacks_needed); if (rank < 0) { return return_to_status(rank); } else if (rank != m) { @@ -1413,7 +1413,7 @@ crossover_status_t crossover(const lp_problem_t& lp, superbasic_list, vstatus); rank = factorize_basis( - lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); + lp.A, settings, basic_list, start_time, L, U, p, pinv, q, deficient, slacks_needed); if (rank < 0) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); return return_to_status(rank); diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 25d1c5fbe..5de12cc8c 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -2505,7 +2505,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, assert(nonbasic_list.size() == n - m); i_t refactor_status = ft.refactor_basis( - lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus, start_time); + lp.A, settings, lp.lower, lp.upper, start_time, basic_list, nonbasic_list, vstatus); if (refactor_status == CONCURRENT_HALT_RETURN) { return dual::status_t::CONCURRENT_LIMIT; } if (refactor_status == TIME_LIMIT_RETURN) { return dual::status_t::TIME_LIMIT; } if (refactor_status > 0) { return dual::status_t::NUMERICAL; } @@ -3373,7 +3373,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, num_refactors++; bool should_recompute_x = false; i_t refactor_status = ft.refactor_basis( - lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus, start_time); + lp.A, settings, lp.lower, lp.upper, start_time, basic_list, nonbasic_list, vstatus); if (refactor_status == CONCURRENT_HALT_RETURN) { return dual::status_t::CONCURRENT_LIMIT; } if (refactor_status == TIME_LIMIT_RETURN) { return dual::status_t::TIME_LIMIT; } if (refactor_status > 0) { @@ -3384,7 +3384,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, i_t deficient_size = 0; while (true) { deficient_size = ft.refactor_basis( - lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus, start_time); + lp.A, settings, lp.lower, lp.upper, start_time, basic_list, nonbasic_list, vstatus); if (deficient_size == CONCURRENT_HALT_RETURN) { return dual::status_t::CONCURRENT_LIMIT; } diff --git a/cpp/src/dual_simplex/primal.cpp b/cpp/src/dual_simplex/primal.cpp index 2d0944542..628fadcbe 100644 --- a/cpp/src/dual_simplex/primal.cpp +++ b/cpp/src/dual_simplex/primal.cpp @@ -295,7 +295,7 @@ primal::status_t primal_phase2(i_t phase, std::vector deficient; std::vector slacks_needed; i_t rank = factorize_basis( - lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); + lp.A, settings, basic_list, start_time, L, U, p, pinv, q, deficient, slacks_needed); if (rank == CONCURRENT_HALT_RETURN) { return primal::status_t::CONCURRENT_LIMIT; } else if (rank == TIME_LIMIT_RETURN) { @@ -316,7 +316,7 @@ primal::status_t primal_phase2(i_t phase, superbasic_list, vstatus); rank = factorize_basis( - lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed, start_time); + lp.A, settings, basic_list, start_time, L, U, p, pinv, q, deficient, slacks_needed); if (rank == CONCURRENT_HALT_RETURN) { return primal::status_t::CONCURRENT_LIMIT; } else if (rank == TIME_LIMIT_RETURN) { From 0c54ecfb3045025bdfb6325fddccf40e8f7532e9 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 18 Feb 2026 22:38:23 -0800 Subject: [PATCH 16/44] fix merge conflicts --- cpp/src/dual_simplex/crossover.cpp | 9 ++++++--- cpp/src/dual_simplex/primal.cpp | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 931f515e7..988c9c50a 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -533,7 +533,8 @@ i_t dual_push(const lp_problem_t& lp, basic_list, nonbasic_list, superbasic_list, - vstatus); + vstatus, + work_estimate); rank = factorize_basis(lp.A, settings, basic_list, @@ -1293,7 +1294,8 @@ crossover_status_t crossover(const lp_problem_t& lp, basic_list, nonbasic_list, superbasic_list, - vstatus); + vstatus, + work_estimate); rank = factorize_basis(lp.A, settings, basic_list, @@ -1485,7 +1487,8 @@ crossover_status_t crossover(const lp_problem_t& lp, basic_list, nonbasic_list, superbasic_list, - vstatus); + vstatus, + work_estimate); rank = factorize_basis(lp.A, settings, basic_list, diff --git a/cpp/src/dual_simplex/primal.cpp b/cpp/src/dual_simplex/primal.cpp index b50e674c2..d4c6743dc 100644 --- a/cpp/src/dual_simplex/primal.cpp +++ b/cpp/src/dual_simplex/primal.cpp @@ -325,7 +325,8 @@ primal::status_t primal_phase2(i_t phase, basic_list, nonbasic_list, superbasic_list, - vstatus); + vstatus, + work_estimate); rank = factorize_basis(lp.A, settings, basic_list, From fc414e748f06a7058ff91c77affa69648a08e0d3 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 18 Feb 2026 23:06:59 -0800 Subject: [PATCH 17/44] revert cmake comment --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index aa377b315..6a684be70 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -153,7 +153,7 @@ if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.9 AND CMAKE_CUDA_COMPILE endif() list(APPEND CUOPT_CUDA_FLAGS -fopenmp) -# Add parallel compilation flags if PARALLEL_LEVEL is set +# Add jobserver flags for parallel compilation if PARALLEL_LEVEL is set if(PARALLEL_LEVEL AND NOT "${PARALLEL_LEVEL}" STREQUAL "") message(STATUS "Enabling nvcc parallel compilation support") list(APPEND CUOPT_CUDA_FLAGS --threads=0 --split-compile=0) From aa0b795fb3d5ff12737b3cff7ba3297485e1b53f Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Fri, 20 Feb 2026 02:25:13 -0800 Subject: [PATCH 18/44] root node scaling --- .../diversity/diversity_manager.cu | 9 + .../mip_heuristics/mip_scaling_strategy.cu | 461 +++++++++++------- .../initial_scaling.cu | 28 +- .../initial_scaling.cuh | 1 - cpp/src/pdlp/solve.cu | 35 +- 5 files changed, 321 insertions(+), 213 deletions(-) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index edf124b92..c5b43b1e2 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -407,6 +407,15 @@ solution_t diversity_manager_t::run_solver() pdlp_settings.presolver = presolver_t::None; set_pdlp_solver_mode(pdlp_settings); + bool enable_concurrent_root_pdlp_scaling = true; + + if (!enable_concurrent_root_pdlp_scaling) { + // Keep this toggle local to concurrent-root LP solve (not part of solver settings API). + pdlp_settings.hyper_params.do_ruiz_scaling = false; + pdlp_settings.hyper_params.do_pock_chambolle_scaling = false; + pdlp_settings.hyper_params.bound_objective_rescaling = false; + } + timer_t lp_timer(lp_time_limit); auto lp_result = solve_lp_with_method(*problem_ptr, pdlp_settings, lp_timer); diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu index 84b5d1da2..665641dee 100644 --- a/cpp/src/mip_heuristics/mip_scaling_strategy.cu +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -12,20 +12,31 @@ #include #include +#include + +#include +#include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include +#include #include namespace cuopt::linear_programming::detail { namespace { -constexpr int scaling_threads = 256; constexpr int row_scaling_num_iterations = 3; constexpr int row_scaling_k_min = -20; constexpr int row_scaling_k_max = 20; @@ -34,134 +45,132 @@ constexpr double nearest_pow2_mantissa_threshold = 0.7071067811865476; constexpr double big_m_abs_threshold = 1.0e4; constexpr double big_m_ratio_threshold = 1.0e4; -inline int get_num_blocks(size_t n) -{ - if (n == 0) { return 1; } - size_t blocks = (n + scaling_threads - 1) / scaling_threads; - return static_cast(std::min(65535, std::max(1, blocks))); -} - -template -__global__ void analyze_rows_for_scaling_kernel( - const typename problem_t::view_t op_problem, f_t* row_inf_norm, i_t* row_skip_scaling) -{ - for (i_t row = blockIdx.x * blockDim.x + threadIdx.x; row < op_problem.n_constraints; - row += blockDim.x * gridDim.x) { - const auto [row_start, row_end] = op_problem.range_for_constraint(row); - f_t row_norm = f_t(0); - f_t row_min_nonzero = std::numeric_limits::infinity(); - i_t row_nonzero_count = 0; - for (i_t idx = row_start; idx < row_end; ++idx) { - f_t abs_value = raft::abs(op_problem.coefficients[idx]); - if (abs_value > row_norm) { row_norm = abs_value; } - if (abs_value > f_t(0)) { - row_min_nonzero = abs_value < row_min_nonzero ? abs_value : row_min_nonzero; - row_nonzero_count++; - } - } - row_inf_norm[row] = row_norm; - - bool skip_big_m_scaling = false; - if (row_nonzero_count >= 2 && row_min_nonzero < std::numeric_limits::infinity()) { - const f_t row_ratio = row_norm / row_min_nonzero; - skip_big_m_scaling = row_norm >= static_cast(big_m_abs_threshold) && - row_ratio >= static_cast(big_m_ratio_threshold); - } - row_skip_scaling[row] = skip_big_m_scaling ? i_t(1) : i_t(0); +template +struct abs_value_transform_t { + __device__ f_t operator()(f_t value) const { return raft::abs(value); } +}; + +template +struct nonzero_abs_or_inf_transform_t { + __device__ f_t operator()(f_t value) const + { + const f_t abs_value = raft::abs(value); + return abs_value > f_t(0) ? abs_value : std::numeric_limits::infinity(); } -} +}; template -__global__ void compute_row_inf_norm_kernel(const typename problem_t::view_t op_problem, - f_t* row_inf_norm) -{ - for (i_t row = blockIdx.x * blockDim.x + threadIdx.x; row < op_problem.n_constraints; - row += blockDim.x * gridDim.x) { - const auto [row_start, row_end] = op_problem.range_for_constraint(row); - f_t row_norm = f_t(0); - for (i_t idx = row_start; idx < row_end; ++idx) { - const f_t abs_value = raft::abs(op_problem.coefficients[idx]); - if (abs_value > row_norm) { row_norm = abs_value; } - } - row_inf_norm[row] = row_norm; +struct nonzero_count_transform_t { + __device__ i_t operator()(f_t value) const { return raft::abs(value) > f_t(0) ? i_t(1) : i_t(0); } +}; + +template +struct max_op_t { + __host__ __device__ t_t operator()(const t_t& lhs, const t_t& rhs) const + { + return lhs > rhs ? lhs : rhs; } -} +}; -template -__global__ void compute_row_log2_and_active_kernel(const f_t* row_inf_norm, - const i_t* row_skip_scaling, - i_t n_rows, - double* row_log2_values, - i_t* row_active) -{ - for (i_t row = blockIdx.x * blockDim.x + threadIdx.x; row < n_rows; - row += blockDim.x * gridDim.x) { - if (row_skip_scaling[row] || row_inf_norm[row] <= f_t(0)) { - row_log2_values[row] = 0.0; - row_active[row] = i_t(0); - continue; - } - row_log2_values[row] = ::log2(static_cast(row_inf_norm[row])); - row_active[row] = i_t(1); +template +struct min_op_t { + __host__ __device__ t_t operator()(const t_t& lhs, const t_t& rhs) const + { + return lhs < rhs ? lhs : rhs; } -} +}; template -__global__ void compute_iteration_scaling_kernel(const f_t* row_inf_norm, - const i_t* row_skip_scaling, - i_t n_rows, - f_t target_norm, - f_t* iteration_scaling) +void compute_row_inf_norm_with_cub(const problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::device_uvector& row_inf_norm, + rmm::cuda_stream_view stream_view) { - for (i_t row = blockIdx.x * blockDim.x + threadIdx.x; row < n_rows; - row += blockDim.x * gridDim.x) { - if (row_skip_scaling[row] || row_inf_norm[row] <= f_t(0) || target_norm <= f_t(0)) { - iteration_scaling[row] = f_t(1); - continue; - } - const f_t desired_scaling = target_norm / row_inf_norm[row]; - if (!::isfinite(desired_scaling) || desired_scaling <= f_t(0)) { - iteration_scaling[row] = f_t(1); - continue; - } - - int exponent = 0; - f_t mantissa = ::frexp(desired_scaling, &exponent); // desired_scaling = mantissa * 2^exponent - int k = mantissa >= static_cast(nearest_pow2_mantissa_threshold) ? exponent : exponent - 1; - if (k < row_scaling_k_min) { k = row_scaling_k_min; } - if (k > row_scaling_k_max) { k = row_scaling_k_max; } - iteration_scaling[row] = ::ldexp(f_t(1), k); - } + auto coeff_abs_iter = + thrust::make_transform_iterator(op_problem.coefficients.data(), abs_value_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + temp_storage_bytes, + coeff_abs_iter, + row_inf_norm.data(), + op_problem.n_constraints, + op_problem.offsets.data(), + op_problem.offsets.data() + 1, + max_op_t{}, + f_t(0), + stream_view)); } template -__global__ void apply_row_scaling_kernel(const typename problem_t::view_t op_problem, - const f_t* constraint_scaling) +void compute_big_m_skip_rows_with_cub(const problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::device_uvector& row_inf_norm, + rmm::device_uvector& row_min_nonzero, + rmm::device_uvector& row_nonzero_count, + rmm::device_uvector& row_skip_scaling) { - for (i_t row = blockIdx.x * blockDim.x + threadIdx.x; row < op_problem.n_constraints; - row += blockDim.x * gridDim.x) { - const f_t scaling = constraint_scaling[row]; - const auto [row_start, row_end] = op_problem.range_for_constraint(row); - for (i_t idx = row_start; idx < row_end; ++idx) { - op_problem.coefficients[idx] *= scaling; - } - op_problem.constraint_lower_bounds[row] *= scaling; - op_problem.constraint_upper_bounds[row] *= scaling; - } -} + auto coeff_abs_iter = + thrust::make_transform_iterator(op_problem.coefficients.data(), abs_value_transform_t{}); + auto coeff_nonzero_min_iter = thrust::make_transform_iterator( + op_problem.coefficients.data(), nonzero_abs_or_inf_transform_t{}); + auto coeff_nonzero_count_iter = thrust::make_transform_iterator( + op_problem.coefficients.data(), nonzero_count_transform_t{}); + + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + temp_storage_bytes, + coeff_abs_iter, + row_inf_norm.data(), + op_problem.n_constraints, + op_problem.offsets.data(), + op_problem.offsets.data() + 1, + max_op_t{}, + f_t(0), + op_problem.handle_ptr->get_stream())); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + temp_storage_bytes, + coeff_nonzero_min_iter, + row_min_nonzero.data(), + op_problem.n_constraints, + op_problem.offsets.data(), + op_problem.offsets.data() + 1, + min_op_t{}, + std::numeric_limits::infinity(), + op_problem.handle_ptr->get_stream())); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + temp_storage_bytes, + coeff_nonzero_count_iter, + row_nonzero_count.data(), + op_problem.n_constraints, + op_problem.offsets.data(), + op_problem.offsets.data() + 1, + thrust::plus{}, + i_t(0), + op_problem.handle_ptr->get_stream())); + + auto row_begin = thrust::make_zip_iterator( + thrust::make_tuple(row_inf_norm.begin(), row_min_nonzero.begin(), row_nonzero_count.begin())); + auto row_end = thrust::make_zip_iterator( + thrust::make_tuple(row_inf_norm.end(), row_min_nonzero.end(), row_nonzero_count.end())); + thrust::transform( + op_problem.handle_ptr->get_thrust_policy(), + row_begin, + row_end, + row_skip_scaling.begin(), + [] __device__(auto row_info) -> i_t { + const f_t row_norm = thrust::get<0>(row_info); + const f_t row_min_non_zero = thrust::get<1>(row_info); + const i_t row_non_zero_size = thrust::get<2>(row_info); + if (row_non_zero_size < i_t(2) || row_min_non_zero >= std::numeric_limits::infinity()) { + return i_t(0); + } -template -__global__ void apply_row_scaling_transpose_kernel( - const typename problem_t::view_t op_problem, const f_t* constraint_scaling) -{ - for (i_t var = blockIdx.x * blockDim.x + threadIdx.x; var < op_problem.n_variables; - var += blockDim.x * gridDim.x) { - const auto [start, end] = op_problem.reverse_range_for_var(var); - for (i_t idx = start; idx < end; ++idx) { - i_t row = op_problem.reverse_constraints[idx]; - op_problem.reverse_coefficients[idx] *= constraint_scaling[row]; - } - } + const f_t row_ratio = row_norm / row_min_non_zero; + return row_norm >= static_cast(big_m_abs_threshold) && + row_ratio >= static_cast(big_m_ratio_threshold) + ? i_t(1) + : i_t(0); + }); } } // namespace @@ -181,31 +190,99 @@ void mip_scaling_strategy_t::scale_problem() const i_t n_rows = op_problem_scaled_.n_constraints; const i_t n_cols = op_problem_scaled_.n_variables; + const i_t nnz = op_problem_scaled_.nnz; if (n_rows == 0) { op_problem_scaled_.is_scaled_ = true; return; } - auto problem_view = op_problem_scaled_.view(); - int row_scaling_blocks = get_num_blocks(static_cast(n_rows)); - int transpose_num_blocks = get_num_blocks(static_cast(n_cols)); - rmm::device_uvector row_inf_norm(static_cast(n_rows), stream_view_); + rmm::device_uvector row_min_nonzero(static_cast(n_rows), stream_view_); + rmm::device_uvector row_nonzero_count(static_cast(n_rows), stream_view_); rmm::device_uvector row_skip_scaling(static_cast(n_rows), stream_view_); rmm::device_uvector iteration_scaling(static_cast(n_rows), stream_view_); - rmm::device_uvector row_log2_values(static_cast(n_rows), stream_view_); - rmm::device_uvector row_active(static_cast(n_rows), stream_view_); - - analyze_rows_for_scaling_kernel - <<>>( - problem_view, row_inf_norm.data(), row_skip_scaling.data()); - RAFT_CUDA_TRY(cudaPeekAtLastError()); - i_t skipped_big_m_rows = thrust::reduce(handle_ptr_->get_thrust_policy(), - row_skip_scaling.begin(), - row_skip_scaling.end(), - i_t(0), - thrust::plus()); + rmm::device_uvector coefficient_row_index(static_cast(nnz), stream_view_); + + if (nnz > 0) { + thrust::upper_bound(handle_ptr_->get_thrust_policy(), + op_problem_scaled_.offsets.begin(), + op_problem_scaled_.offsets.end(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(nnz), + coefficient_row_index.begin()); + thrust::transform( + handle_ptr_->get_thrust_policy(), + coefficient_row_index.begin(), + coefficient_row_index.end(), + coefficient_row_index.begin(), + [] __device__(i_t row_upper_bound_idx) -> i_t { return row_upper_bound_idx - 1; }); + } + + size_t temp_storage_bytes = 0; + size_t current_required_bytes = 0; + if (nnz > 0) { + auto coeff_abs_iter = thrust::make_transform_iterator(op_problem_scaled_.coefficients.data(), + abs_value_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_abs_iter, + row_inf_norm.data(), + n_rows, + op_problem_scaled_.offsets.data(), + op_problem_scaled_.offsets.data() + 1, + max_op_t{}, + f_t(0), + stream_view_)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + + auto coeff_nonzero_min_iter = thrust::make_transform_iterator( + op_problem_scaled_.coefficients.data(), nonzero_abs_or_inf_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_nonzero_min_iter, + row_min_nonzero.data(), + n_rows, + op_problem_scaled_.offsets.data(), + op_problem_scaled_.offsets.data() + 1, + min_op_t{}, + std::numeric_limits::infinity(), + stream_view_)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + + auto coeff_nonzero_count_iter = thrust::make_transform_iterator( + op_problem_scaled_.coefficients.data(), nonzero_count_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_nonzero_count_iter, + row_nonzero_count.data(), + n_rows, + op_problem_scaled_.offsets.data(), + op_problem_scaled_.offsets.data() + 1, + thrust::plus{}, + i_t(0), + stream_view_)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + } + + rmm::device_uvector temp_storage(temp_storage_bytes, stream_view_); + if (nnz > 0) { + compute_big_m_skip_rows_with_cub(op_problem_scaled_, + temp_storage, + temp_storage_bytes, + row_inf_norm, + row_min_nonzero, + row_nonzero_count, + row_skip_scaling); + } else { + thrust::fill( + handle_ptr_->get_thrust_policy(), row_inf_norm.begin(), row_inf_norm.end(), f_t(0)); + thrust::fill( + handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(0)); + } + + i_t skipped_big_m_rows = thrust::count( + handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(1)); CUOPT_LOG_INFO("MIP row scaling start: rows=%d cols=%d iterations=%d skip_big_m_rows=%d", n_rows, @@ -214,53 +291,95 @@ void mip_scaling_strategy_t::scale_problem() skipped_big_m_rows); for (int iteration = 0; iteration < row_scaling_num_iterations; ++iteration) { - compute_row_inf_norm_kernel - <<>>(problem_view, row_inf_norm.data()); - RAFT_CUDA_TRY(cudaPeekAtLastError()); - - compute_row_log2_and_active_kernel - <<>>(row_inf_norm.data(), - row_skip_scaling.data(), - n_rows, - row_log2_values.data(), - row_active.data()); - RAFT_CUDA_TRY(cudaPeekAtLastError()); - - double log2_sum = thrust::reduce(handle_ptr_->get_thrust_policy(), - row_log2_values.begin(), - row_log2_values.end(), - 0.0, - thrust::plus()); - i_t active_row_count = thrust::reduce(handle_ptr_->get_thrust_policy(), - row_active.begin(), - row_active.end(), - i_t(0), - thrust::plus()); + if (nnz > 0) { + compute_row_inf_norm_with_cub( + op_problem_scaled_, temp_storage, temp_storage_bytes, row_inf_norm, stream_view_); + } else { + thrust::fill( + handle_ptr_->get_thrust_policy(), row_inf_norm.begin(), row_inf_norm.end(), f_t(0)); + } + + const double log2_sum = thrust::transform_reduce( + handle_ptr_->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.begin(), row_skip_scaling.begin())), + thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.end(), row_skip_scaling.end())), + [] __device__(auto row_info) -> double { + const f_t row_norm = thrust::get<0>(row_info); + const i_t skip_row = thrust::get<1>(row_info); + if (skip_row || row_norm <= f_t(0)) { return 0.0; } + return ::log2(static_cast(row_norm)); + }, + 0.0, + thrust::plus{}); + const i_t active_row_count = thrust::transform_reduce( + handle_ptr_->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.begin(), row_skip_scaling.begin())), + thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.end(), row_skip_scaling.end())), + [] __device__(auto row_info) -> i_t { + const f_t row_norm = thrust::get<0>(row_info); + const i_t skip_row = thrust::get<1>(row_info); + return (!skip_row && row_norm > f_t(0)) ? i_t(1) : i_t(0); + }, + i_t(0), + thrust::plus{}); if (active_row_count == 0) { break; } f_t target_norm = static_cast(::exp2(log2_sum / static_cast(active_row_count))); if (!::isfinite(target_norm) || target_norm <= f_t(0)) { break; } - compute_iteration_scaling_kernel - <<>>(row_inf_norm.data(), - row_skip_scaling.data(), - n_rows, - target_norm, - iteration_scaling.data()); - RAFT_CUDA_TRY(cudaPeekAtLastError()); - - apply_row_scaling_kernel<<>>( - problem_view, iteration_scaling.data()); - RAFT_CUDA_TRY(cudaPeekAtLastError()); - - if (n_cols > 0) { - apply_row_scaling_transpose_kernel - <<>>(problem_view, - iteration_scaling.data()); - RAFT_CUDA_TRY(cudaPeekAtLastError()); + thrust::transform( + handle_ptr_->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.begin(), row_skip_scaling.begin())), + thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.end(), row_skip_scaling.end())), + iteration_scaling.begin(), + [target_norm] __device__(auto row_info) -> f_t { + const f_t row_norm = thrust::get<0>(row_info); + const i_t skip_row = thrust::get<1>(row_info); + if (skip_row || row_norm <= f_t(0) || target_norm <= f_t(0)) { return f_t(1); } + + const f_t desired_scaling = target_norm / row_norm; + if (!::isfinite(desired_scaling) || desired_scaling <= f_t(0)) { return f_t(1); } + + int exponent = 0; + const f_t mantissa = + ::frexp(desired_scaling, &exponent); // desired_scaling = mantissa * 2^exponent + int k = + mantissa >= static_cast(nearest_pow2_mantissa_threshold) ? exponent : exponent - 1; + if (k < row_scaling_k_min) { k = row_scaling_k_min; } + if (k > row_scaling_k_max) { k = row_scaling_k_max; } + return ::ldexp(f_t(1), k); + }); + + if (nnz > 0) { + thrust::transform( + handle_ptr_->get_thrust_policy(), + op_problem_scaled_.coefficients.begin(), + op_problem_scaled_.coefficients.end(), + thrust::make_permutation_iterator(iteration_scaling.begin(), coefficient_row_index.begin()), + op_problem_scaled_.coefficients.begin(), + thrust::multiplies{}); } + + thrust::transform(handle_ptr_->get_thrust_policy(), + op_problem_scaled_.constraint_lower_bounds.begin(), + op_problem_scaled_.constraint_lower_bounds.end(), + iteration_scaling.begin(), + op_problem_scaled_.constraint_lower_bounds.begin(), + thrust::multiplies{}); + thrust::transform(handle_ptr_->get_thrust_policy(), + op_problem_scaled_.constraint_upper_bounds.begin(), + op_problem_scaled_.constraint_upper_bounds.end(), + iteration_scaling.begin(), + op_problem_scaled_.constraint_upper_bounds.begin(), + thrust::multiplies{}); + + // Delay A^T updates until after all row-scaling iterations are complete. + // Rebuilding transpose from the final scaled A guarantees exact consistency for + // downstream checks (e.g., concurrent root PDLP path). } + op_problem_scaled_.compute_transpose_of_problem(); + op_problem_scaled_.check_problem_representation(true, true); op_problem_scaled_.is_scaled_ = true; CUOPT_LOG_INFO("MIP row scaling completed"); } diff --git a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu index c103c3399..663620c54 100644 --- a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu +++ b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu @@ -14,9 +14,8 @@ #include #include #include -#include -#include +#include #include #include #include @@ -196,8 +195,6 @@ void pdlp_initial_scaling_strategy_t::ruiz_inf_scaling(i_t number_of_r op_problem_scaled_.view(), this->view()); RAFT_CUDA_TRY(cudaPeekAtLastError()); - if (running_mip_) { reset_integer_variables(); } - raft::linalg::binaryOp(cummulative_constraint_matrix_scaling_.data(), cummulative_constraint_matrix_scaling_.data(), iteration_constraint_matrix_scaling_.data(), @@ -220,17 +217,6 @@ void pdlp_initial_scaling_strategy_t::ruiz_inf_scaling(i_t number_of_r } } -template -void pdlp_initial_scaling_strategy_t::reset_integer_variables() -{ - thrust::scatter( - handle_ptr_->get_thrust_policy(), - thrust::make_constant_iterator(1), - thrust::make_constant_iterator(1) + op_problem_scaled_.integer_indices.size(), - op_problem_scaled_.integer_indices.begin(), - iteration_variable_scaling_.begin()); -} - template __global__ void pock_chambolle_scaling_kernel_row( const typename problem_t::view_t op_problem, @@ -346,8 +332,6 @@ void pdlp_initial_scaling_strategy_t::pock_chambolle_scaling(f_t alpha A_T_indices_.data()); RAFT_CUDA_TRY(cudaPeekAtLastError()); - if (running_mip_) { reset_integer_variables(); } - // divide the sqrt of the vectors of the sums from above to the respective scaling vectors // (only if sqrt(sum)>0) raft::linalg::binaryOp(cummulative_constraint_matrix_scaling_.data(), @@ -418,15 +402,6 @@ template void pdlp_initial_scaling_strategy_t::scale_problem() { raft::common::nvtx::range fun_scope("scale_problem"); - CUOPT_LOG_INFO( - "PDLP initial scaling start: rows=%d cols=%d ruiz=%d pock_chambolle=%d " - "bound_objective_rescaling=%d running_mip=%d", - dual_size_h_, - primal_size_h_, - static_cast(hyper_params_.do_ruiz_scaling), - static_cast(hyper_params_.do_pock_chambolle_scaling), - static_cast(hyper_params_.bound_objective_rescaling && !running_mip_), - static_cast(running_mip_)); // scale A i_t number_of_blocks = op_problem_scaled_.n_constraints / block_size; @@ -575,7 +550,6 @@ void pdlp_initial_scaling_strategy_t::scale_problem() if (!running_mip_) { scale_solutions(pdhg_solver_ptr_->get_primal_solution(), pdhg_solver_ptr_->get_dual_solution()); } - CUOPT_LOG_INFO("PDLP initial scaling completed"); } template diff --git a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cuh b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cuh index 5a3dcfaca..b8ca2fed2 100644 --- a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cuh +++ b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cuh @@ -88,7 +88,6 @@ class pdlp_initial_scaling_strategy_t { void compute_scaling_vectors(i_t number_of_ruiz_iterations, f_t alpha); void ruiz_inf_scaling(i_t number_of_ruiz_iterations); void pock_chambolle_scaling(f_t alpha); - void reset_integer_variables(); raft::handle_t const* handle_ptr_{nullptr}; rmm::cuda_stream_view stream_view_; diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index f98edbbd6..c903f50cb 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -956,9 +956,12 @@ optimization_problem_solution_t run_concurrent( // For MIP concurrent solves (PDLP + Barrier), pre-scale once with PDLP scaling and run both // methods on the same scaled problem. - if (settings.inside_mip) { - pre_scaled_problem_ptr = - std::make_unique>(problem, false /* no_deep_copy */); + const bool apply_shared_scaling = + settings.inside_mip && (settings_pdlp.hyper_params.do_ruiz_scaling || + settings_pdlp.hyper_params.do_pock_chambolle_scaling || + settings_pdlp.hyper_params.bound_objective_rescaling); + if (apply_shared_scaling) { + pre_scaled_problem_ptr = std::make_unique>(problem); concurrent_scaling_ptr = std::make_unique>( problem.handle_ptr, *pre_scaled_problem_ptr, @@ -971,6 +974,8 @@ optimization_problem_solution_t run_concurrent( settings_pdlp.hyper_params, true); concurrent_scaling_ptr->scale_problem(); + // Keep CSR/transpose exactly consistent for strict debug validity checks. + pre_scaled_problem_ptr->compute_transpose_of_problem(); concurrent_problem = pre_scaled_problem_ptr.get(); // Avoid scaling twice on the PDLP branch once the shared pre-scaling has been applied. @@ -990,11 +995,17 @@ optimization_problem_solution_t run_concurrent( device_count > 1, error_type_t::RuntimeError, "Multi-GPU mode requires at least 2 GPUs"); } - // Initialize the dual simplex structures before we run PDLP. + // Initialize simplex/barrier structures before we run PDLP. // Otherwise, CUDA API calls to the problem stream may occur in both threads and throw graph - // capture off + // capture off. + // Keep dual simplex on the original unscaled model to preserve vertex solution semantics. dual_simplex::user_problem_t dual_simplex_problem = - cuopt_problem_to_simplex_problem(problem.handle_ptr, *concurrent_problem); + cuopt_problem_to_simplex_problem(problem.handle_ptr, problem); + // Barrier shares the same model as PDLP (scaled in MIP concurrent mode, original otherwise). + dual_simplex::user_problem_t barrier_problem = + apply_shared_scaling + ? cuopt_problem_to_simplex_problem(problem.handle_ptr, *concurrent_problem) + : dual_simplex_problem; // Create a thread for dual simplex std::unique_ptr< std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t>> @@ -1007,7 +1018,6 @@ optimization_problem_solution_t run_concurrent( std::ref(sol_dual_simplex_ptr), std::ref(timer)); } - dual_simplex::user_problem_t barrier_problem = dual_simplex_problem; // Create a thread for barrier std::unique_ptr< std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t>> @@ -1016,10 +1026,10 @@ optimization_problem_solution_t run_concurrent( auto call_barrier_thread = [&]() { rmm::cuda_stream_view barrier_stream = rmm::cuda_stream_per_thread; auto barrier_handle = raft::handle_t(barrier_stream); - auto barrier_problem = dual_simplex_problem; - barrier_problem.handle_ptr = &barrier_handle; + auto barrier_problem_local = barrier_problem; + barrier_problem_local.handle_ptr = &barrier_handle; - run_barrier_thread(std::ref(barrier_problem), + run_barrier_thread(std::ref(barrier_problem_local), std::ref(settings_pdlp), std::ref(sol_barrier_ptr), std::ref(timer)); @@ -1049,7 +1059,7 @@ optimization_problem_solution_t run_concurrent( // copy the dual simplex solution to the device auto sol_dual_simplex = !settings.inside_mip - ? convert_dual_simplex_sol(*concurrent_problem, + ? convert_dual_simplex_sol(problem, std::get<0>(*sol_dual_simplex_ptr), std::get<1>(*sol_dual_simplex_ptr), std::get<2>(*sol_dual_simplex_ptr), @@ -1071,9 +1081,6 @@ optimization_problem_solution_t run_concurrent( if (concurrent_scaling_ptr) { concurrent_scaling_ptr->unscale_solutions( sol_pdlp.get_primal_solution(), sol_pdlp.get_dual_solution(), sol_pdlp.get_reduced_cost()); - concurrent_scaling_ptr->unscale_solutions(sol_dual_simplex.get_primal_solution(), - sol_dual_simplex.get_dual_solution(), - sol_dual_simplex.get_reduced_cost()); concurrent_scaling_ptr->unscale_solutions(sol_barrier.get_primal_solution(), sol_barrier.get_dual_solution(), sol_barrier.get_reduced_cost()); From 1c03794c0e8cb8b9333ed2b7043cac7ebe57325d Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Fri, 20 Feb 2026 02:27:18 -0800 Subject: [PATCH 19/44] sscaling off --- cpp/src/mip_heuristics/diversity/diversity_manager.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index c5b43b1e2..c86f9a661 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -407,7 +407,7 @@ solution_t diversity_manager_t::run_solver() pdlp_settings.presolver = presolver_t::None; set_pdlp_solver_mode(pdlp_settings); - bool enable_concurrent_root_pdlp_scaling = true; + bool enable_concurrent_root_pdlp_scaling = false; if (!enable_concurrent_root_pdlp_scaling) { // Keep this toggle local to concurrent-root LP solve (not part of solver settings API). From e849c70837d592ee512a5aefe3c4b56ad67b803f Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Fri, 20 Feb 2026 04:25:19 -0800 Subject: [PATCH 20/44] correct mip gap computation --- cpp/src/mip_heuristics/utils.cuh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/mip_heuristics/utils.cuh b/cpp/src/mip_heuristics/utils.cuh index 33712635e..ffadc1f51 100644 --- a/cpp/src/mip_heuristics/utils.cuh +++ b/cpp/src/mip_heuristics/utils.cuh @@ -339,8 +339,9 @@ static void inline run_device_lambda(const rmm::cuda_stream_view& stream, Func f template f_t compute_rel_mip_gap(f_t user_obj, f_t solution_bound) { - if (user_obj == 0.0) { - return solution_bound == 0.0 ? 0.0 : std::numeric_limits::infinity(); + if (integer_equal(user_obj, 0.0, 1e-6)) { + return integer_equal(solution_bound, 0.0, 1e-6) ? 0.0 + : std::numeric_limits::infinity(); } return std::abs(user_obj - solution_bound) / std::abs(user_obj); } From ea09141264bf79904a43636fd10ab6369d5bb39f Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Sun, 22 Feb 2026 23:30:08 -0800 Subject: [PATCH 21/44] wip --- cpp/src/mip_heuristics/mip_scaling_strategy.cu | 12 ++++++++---- cpp/src/pdlp/solve.cu | 3 +++ .../utils/linear_programming/data_definition.py | 5 ----- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu index 665641dee..47837dab8 100644 --- a/cpp/src/mip_heuristics/mip_scaling_strategy.cu +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -89,8 +89,9 @@ void compute_row_inf_norm_with_cub(const problem_t& op_problem, { auto coeff_abs_iter = thrust::make_transform_iterator(op_problem.coefficients.data(), abs_value_transform_t{}); + size_t current_bytes = temp_storage_bytes; RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), - temp_storage_bytes, + current_bytes, coeff_abs_iter, row_inf_norm.data(), op_problem.n_constraints, @@ -117,8 +118,9 @@ void compute_big_m_skip_rows_with_cub(const problem_t& op_problem, auto coeff_nonzero_count_iter = thrust::make_transform_iterator( op_problem.coefficients.data(), nonzero_count_transform_t{}); + size_t max_bytes = temp_storage_bytes; RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), - temp_storage_bytes, + max_bytes, coeff_abs_iter, row_inf_norm.data(), op_problem.n_constraints, @@ -127,8 +129,9 @@ void compute_big_m_skip_rows_with_cub(const problem_t& op_problem, max_op_t{}, f_t(0), op_problem.handle_ptr->get_stream())); + size_t min_bytes = temp_storage_bytes; RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), - temp_storage_bytes, + min_bytes, coeff_nonzero_min_iter, row_min_nonzero.data(), op_problem.n_constraints, @@ -137,8 +140,9 @@ void compute_big_m_skip_rows_with_cub(const problem_t& op_problem, min_op_t{}, std::numeric_limits::infinity(), op_problem.handle_ptr->get_stream())); + size_t count_bytes = temp_storage_bytes; RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), - temp_storage_bytes, + count_bytes, coeff_nonzero_count_iter, row_nonzero_count.data(), op_problem.n_constraints, diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index c903f50cb..ca960ee54 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -976,6 +976,9 @@ optimization_problem_solution_t run_concurrent( concurrent_scaling_ptr->scale_problem(); // Keep CSR/transpose exactly consistent for strict debug validity checks. pre_scaled_problem_ptr->compute_transpose_of_problem(); + // Keep combined bounds consistent with scaled lower/upper bounds. + detail::combine_constraint_bounds(*pre_scaled_problem_ptr, + pre_scaled_problem_ptr->combined_bounds); concurrent_problem = pre_scaled_problem_ptr.get(); // Avoid scaling twice on the PDLP branch once the shared pre-scaling has been applied. diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py index 59ea62089..8412c745b 100644 --- a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py +++ b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py @@ -450,11 +450,6 @@ class SolverConfig(BaseModel): description="Set True to run heuristics only, False to run " "heuristics and branch and bound for MILP", ) - mip_batch_pdlp_strong_branching: Optional[int] = Field( - default=0, - description="Set 1 to enable batch PDLP strong branching " - "in the MIP solver, 0 to disable.", - ) num_cpu_threads: Optional[int] = Field( default=None, description="Set the number of CPU threads to use for branch and bound.", # noqa From adfbf7d6dfb604c493497144b4dd388e234b83c3 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Sun, 22 Feb 2026 23:58:02 -0800 Subject: [PATCH 22/44] with scaling --- cpp/src/mip_heuristics/diversity/diversity_manager.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index c86f9a661..c5b43b1e2 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -407,7 +407,7 @@ solution_t diversity_manager_t::run_solver() pdlp_settings.presolver = presolver_t::None; set_pdlp_solver_mode(pdlp_settings); - bool enable_concurrent_root_pdlp_scaling = false; + bool enable_concurrent_root_pdlp_scaling = true; if (!enable_concurrent_root_pdlp_scaling) { // Keep this toggle local to concurrent-root LP solve (not part of solver settings API). From 92b165eb2f81b73c3f5a3239079547a9912a3597 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Mon, 23 Feb 2026 04:41:22 -0800 Subject: [PATCH 23/44] make pdlp/barrier scaling default --- .../diversity/diversity_manager.cu | 9 - .../mip_heuristics/mip_scaling_strategy.cu | 253 +++++++++--------- cpp/src/pdlp/solve.cu | 8 +- 3 files changed, 122 insertions(+), 148 deletions(-) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index c5b43b1e2..edf124b92 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -407,15 +407,6 @@ solution_t diversity_manager_t::run_solver() pdlp_settings.presolver = presolver_t::None; set_pdlp_solver_mode(pdlp_settings); - bool enable_concurrent_root_pdlp_scaling = true; - - if (!enable_concurrent_root_pdlp_scaling) { - // Keep this toggle local to concurrent-root LP solve (not part of solver settings API). - pdlp_settings.hyper_params.do_ruiz_scaling = false; - pdlp_settings.hyper_params.do_pock_chambolle_scaling = false; - pdlp_settings.hyper_params.bound_objective_rescaling = false; - } - timer_t lp_timer(lp_time_limit); auto lp_result = solve_lp_with_method(*problem_ptr, pdlp_settings, lp_timer); diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu index 47837dab8..ef5d453cc 100644 --- a/cpp/src/mip_heuristics/mip_scaling_strategy.cu +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -35,7 +35,6 @@ #include namespace cuopt::linear_programming::detail { -namespace { constexpr int row_scaling_num_iterations = 3; constexpr int row_scaling_k_min = -20; @@ -81,11 +80,11 @@ struct min_op_t { }; template -void compute_row_inf_norm_with_cub(const problem_t& op_problem, - rmm::device_uvector& temp_storage, - size_t temp_storage_bytes, - rmm::device_uvector& row_inf_norm, - rmm::cuda_stream_view stream_view) +void compute_row_inf_norm(const problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::device_uvector& row_inf_norm, + rmm::cuda_stream_view stream_view) { auto coeff_abs_iter = thrust::make_transform_iterator(op_problem.coefficients.data(), abs_value_transform_t{}); @@ -103,13 +102,13 @@ void compute_row_inf_norm_with_cub(const problem_t& op_problem, } template -void compute_big_m_skip_rows_with_cub(const problem_t& op_problem, - rmm::device_uvector& temp_storage, - size_t temp_storage_bytes, - rmm::device_uvector& row_inf_norm, - rmm::device_uvector& row_min_nonzero, - rmm::device_uvector& row_nonzero_count, - rmm::device_uvector& row_skip_scaling) +void compute_big_m_skip_rows(const problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::device_uvector& row_inf_norm, + rmm::device_uvector& row_min_nonzero, + rmm::device_uvector& row_nonzero_count, + rmm::device_uvector& row_skip_scaling) { auto coeff_abs_iter = thrust::make_transform_iterator(op_problem.coefficients.data(), abs_value_transform_t{}); @@ -177,8 +176,6 @@ void compute_big_m_skip_rows_with_cub(const problem_t& op_problem, }); } -} // namespace - template mip_scaling_strategy_t::mip_scaling_strategy_t(problem_t& op_problem_scaled) : handle_ptr_(op_problem_scaled.handle_ptr), @@ -187,6 +184,62 @@ mip_scaling_strategy_t::mip_scaling_strategy_t(problem_t& op { } +template +size_t dry_run_cub(const problem_t& op_problem, + i_t n_rows, + rmm::device_uvector& row_inf_norm, + rmm::device_uvector& row_min_nonzero, + rmm::device_uvector& row_nonzero_count, + rmm::cuda_stream_view stream_view) +{ + size_t temp_storage_bytes = 0; + size_t current_required_bytes = 0; + + auto coeff_abs_iter = + thrust::make_transform_iterator(op_problem.coefficients.data(), abs_value_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_abs_iter, + row_inf_norm.data(), + n_rows, + op_problem.offsets.data(), + op_problem.offsets.data() + 1, + max_op_t{}, + f_t(0), + stream_view)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + + auto coeff_nonzero_min_iter = thrust::make_transform_iterator( + op_problem.coefficients.data(), nonzero_abs_or_inf_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_nonzero_min_iter, + row_min_nonzero.data(), + n_rows, + op_problem.offsets.data(), + op_problem.offsets.data() + 1, + min_op_t{}, + std::numeric_limits::infinity(), + stream_view)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + + auto coeff_nonzero_count_iter = thrust::make_transform_iterator( + op_problem.coefficients.data(), nonzero_count_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_nonzero_count_iter, + row_nonzero_count.data(), + n_rows, + op_problem.offsets.data(), + op_problem.offsets.data() + 1, + thrust::plus{}, + i_t(0), + stream_view)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + + return temp_storage_bytes; +} + template void mip_scaling_strategy_t::scale_problem() { @@ -196,7 +249,7 @@ void mip_scaling_strategy_t::scale_problem() const i_t n_cols = op_problem_scaled_.n_variables; const i_t nnz = op_problem_scaled_.nnz; - if (n_rows == 0) { + if (n_rows == 0 || nnz <= 0) { op_problem_scaled_.is_scaled_ = true; return; } @@ -208,82 +261,30 @@ void mip_scaling_strategy_t::scale_problem() rmm::device_uvector iteration_scaling(static_cast(n_rows), stream_view_); rmm::device_uvector coefficient_row_index(static_cast(nnz), stream_view_); - if (nnz > 0) { - thrust::upper_bound(handle_ptr_->get_thrust_policy(), - op_problem_scaled_.offsets.begin(), - op_problem_scaled_.offsets.end(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(nnz), - coefficient_row_index.begin()); - thrust::transform( - handle_ptr_->get_thrust_policy(), - coefficient_row_index.begin(), - coefficient_row_index.end(), - coefficient_row_index.begin(), - [] __device__(i_t row_upper_bound_idx) -> i_t { return row_upper_bound_idx - 1; }); - } + thrust::upper_bound(handle_ptr_->get_thrust_policy(), + op_problem_scaled_.offsets.begin(), + op_problem_scaled_.offsets.end(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(nnz), + coefficient_row_index.begin()); + thrust::transform( + handle_ptr_->get_thrust_policy(), + coefficient_row_index.begin(), + coefficient_row_index.end(), + coefficient_row_index.begin(), + [] __device__(i_t row_upper_bound_idx) -> i_t { return row_upper_bound_idx - 1; }); - size_t temp_storage_bytes = 0; - size_t current_required_bytes = 0; - if (nnz > 0) { - auto coeff_abs_iter = thrust::make_transform_iterator(op_problem_scaled_.coefficients.data(), - abs_value_transform_t{}); - RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, - current_required_bytes, - coeff_abs_iter, - row_inf_norm.data(), - n_rows, - op_problem_scaled_.offsets.data(), - op_problem_scaled_.offsets.data() + 1, - max_op_t{}, - f_t(0), - stream_view_)); - temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); - - auto coeff_nonzero_min_iter = thrust::make_transform_iterator( - op_problem_scaled_.coefficients.data(), nonzero_abs_or_inf_transform_t{}); - RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, - current_required_bytes, - coeff_nonzero_min_iter, - row_min_nonzero.data(), - n_rows, - op_problem_scaled_.offsets.data(), - op_problem_scaled_.offsets.data() + 1, - min_op_t{}, - std::numeric_limits::infinity(), - stream_view_)); - temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); - - auto coeff_nonzero_count_iter = thrust::make_transform_iterator( - op_problem_scaled_.coefficients.data(), nonzero_count_transform_t{}); - RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, - current_required_bytes, - coeff_nonzero_count_iter, - row_nonzero_count.data(), - n_rows, - op_problem_scaled_.offsets.data(), - op_problem_scaled_.offsets.data() + 1, - thrust::plus{}, - i_t(0), - stream_view_)); - temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); - } + size_t temp_storage_bytes = dry_run_cub( + op_problem_scaled_, n_rows, row_inf_norm, row_min_nonzero, row_nonzero_count, stream_view_); rmm::device_uvector temp_storage(temp_storage_bytes, stream_view_); - if (nnz > 0) { - compute_big_m_skip_rows_with_cub(op_problem_scaled_, - temp_storage, - temp_storage_bytes, - row_inf_norm, - row_min_nonzero, - row_nonzero_count, - row_skip_scaling); - } else { - thrust::fill( - handle_ptr_->get_thrust_policy(), row_inf_norm.begin(), row_inf_norm.end(), f_t(0)); - thrust::fill( - handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(0)); - } + compute_big_m_skip_rows(op_problem_scaled_, + temp_storage, + temp_storage_bytes, + row_inf_norm, + row_min_nonzero, + row_nonzero_count, + row_skip_scaling); i_t skipped_big_m_rows = thrust::count( handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(1)); @@ -295,41 +296,31 @@ void mip_scaling_strategy_t::scale_problem() skipped_big_m_rows); for (int iteration = 0; iteration < row_scaling_num_iterations; ++iteration) { - if (nnz > 0) { - compute_row_inf_norm_with_cub( - op_problem_scaled_, temp_storage, temp_storage_bytes, row_inf_norm, stream_view_); - } else { - thrust::fill( - handle_ptr_->get_thrust_policy(), row_inf_norm.begin(), row_inf_norm.end(), f_t(0)); - } - - const double log2_sum = thrust::transform_reduce( - handle_ptr_->get_thrust_policy(), - thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.begin(), row_skip_scaling.begin())), - thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.end(), row_skip_scaling.end())), - [] __device__(auto row_info) -> double { - const f_t row_norm = thrust::get<0>(row_info); - const i_t skip_row = thrust::get<1>(row_info); - if (skip_row || row_norm <= f_t(0)) { return 0.0; } - return ::log2(static_cast(row_norm)); - }, - 0.0, - thrust::plus{}); - const i_t active_row_count = thrust::transform_reduce( + compute_row_inf_norm( + op_problem_scaled_, temp_storage, temp_storage_bytes, row_inf_norm, stream_view_); + + using sum_count_t = thrust::tuple; + auto log2_sum_and_count = thrust::transform_reduce( handle_ptr_->get_thrust_policy(), thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.begin(), row_skip_scaling.begin())), thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.end(), row_skip_scaling.end())), - [] __device__(auto row_info) -> i_t { + [] __device__(auto row_info) -> sum_count_t { const f_t row_norm = thrust::get<0>(row_info); const i_t skip_row = thrust::get<1>(row_info); - return (!skip_row && row_norm > f_t(0)) ? i_t(1) : i_t(0); + if (skip_row || row_norm == f_t(0)) { return {0.0, 0.0}; } + return {log2(static_cast(row_norm)), 1.0}; }, - i_t(0), - thrust::plus{}); + sum_count_t{0.0, 0.0}, + [] __device__(sum_count_t a, sum_count_t b) -> sum_count_t { + return {thrust::get<0>(a) + thrust::get<0>(b), thrust::get<1>(a) + thrust::get<1>(b)}; + }); + const double log2_sum = thrust::get<0>(log2_sum_and_count); + const i_t active_row_count = static_cast(thrust::get<1>(log2_sum_and_count)); if (active_row_count == 0) { break; } - f_t target_norm = static_cast(::exp2(log2_sum / static_cast(active_row_count))); - if (!::isfinite(target_norm) || target_norm <= f_t(0)) { break; } + f_t target_norm = static_cast(exp2(log2_sum / static_cast(active_row_count))); + cuopt_assert(isfinite(target_norm), "target_norm must be finite"); + cuopt_assert(target_norm > f_t(0), "target_norm must be positive"); thrust::transform( handle_ptr_->get_thrust_policy(), @@ -339,30 +330,29 @@ void mip_scaling_strategy_t::scale_problem() [target_norm] __device__(auto row_info) -> f_t { const f_t row_norm = thrust::get<0>(row_info); const i_t skip_row = thrust::get<1>(row_info); - if (skip_row || row_norm <= f_t(0) || target_norm <= f_t(0)) { return f_t(1); } + if (skip_row || row_norm == f_t(0)) { return f_t(1); } const f_t desired_scaling = target_norm / row_norm; - if (!::isfinite(desired_scaling) || desired_scaling <= f_t(0)) { return f_t(1); } + if (!isfinite(desired_scaling) || desired_scaling <= f_t(0)) { return f_t(1); } int exponent = 0; const f_t mantissa = - ::frexp(desired_scaling, &exponent); // desired_scaling = mantissa * 2^exponent + frexp(desired_scaling, &exponent); // desired_scaling = mantissa * 2^exponent int k = mantissa >= static_cast(nearest_pow2_mantissa_threshold) ? exponent : exponent - 1; + // clamp it so we don't overscale. range [1e-6,1e6] if (k < row_scaling_k_min) { k = row_scaling_k_min; } if (k > row_scaling_k_max) { k = row_scaling_k_max; } - return ::ldexp(f_t(1), k); + return ldexp(f_t(1), k); }); - if (nnz > 0) { - thrust::transform( - handle_ptr_->get_thrust_policy(), - op_problem_scaled_.coefficients.begin(), - op_problem_scaled_.coefficients.end(), - thrust::make_permutation_iterator(iteration_scaling.begin(), coefficient_row_index.begin()), - op_problem_scaled_.coefficients.begin(), - thrust::multiplies{}); - } + thrust::transform( + handle_ptr_->get_thrust_policy(), + op_problem_scaled_.coefficients.begin(), + op_problem_scaled_.coefficients.end(), + thrust::make_permutation_iterator(iteration_scaling.begin(), coefficient_row_index.begin()), + op_problem_scaled_.coefficients.begin(), + thrust::multiplies{}); thrust::transform(handle_ptr_->get_thrust_policy(), op_problem_scaled_.constraint_lower_bounds.begin(), @@ -376,13 +366,10 @@ void mip_scaling_strategy_t::scale_problem() iteration_scaling.begin(), op_problem_scaled_.constraint_upper_bounds.begin(), thrust::multiplies{}); - - // Delay A^T updates until after all row-scaling iterations are complete. - // Rebuilding transpose from the final scaled A guarantees exact consistency for - // downstream checks (e.g., concurrent root PDLP path). } op_problem_scaled_.compute_transpose_of_problem(); + combine_constraint_bounds(op_problem_scaled_, op_problem_scaled_.combined_bounds); op_problem_scaled_.check_problem_representation(true, true); op_problem_scaled_.is_scaled_ = true; CUOPT_LOG_INFO("MIP row scaling completed"); diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index ca960ee54..691ec1693 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -956,11 +956,7 @@ optimization_problem_solution_t run_concurrent( // For MIP concurrent solves (PDLP + Barrier), pre-scale once with PDLP scaling and run both // methods on the same scaled problem. - const bool apply_shared_scaling = - settings.inside_mip && (settings_pdlp.hyper_params.do_ruiz_scaling || - settings_pdlp.hyper_params.do_pock_chambolle_scaling || - settings_pdlp.hyper_params.bound_objective_rescaling); - if (apply_shared_scaling) { + if (settings.inside_mip) { pre_scaled_problem_ptr = std::make_unique>(problem); concurrent_scaling_ptr = std::make_unique>( problem.handle_ptr, @@ -1006,7 +1002,7 @@ optimization_problem_solution_t run_concurrent( cuopt_problem_to_simplex_problem(problem.handle_ptr, problem); // Barrier shares the same model as PDLP (scaled in MIP concurrent mode, original otherwise). dual_simplex::user_problem_t barrier_problem = - apply_shared_scaling + settings.inside_mip ? cuopt_problem_to_simplex_problem(problem.handle_ptr, *concurrent_problem) : dual_simplex_problem; // Create a thread for dual simplex From 2b080ccab49c179e05bd691d81405bc1af3c0974 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Mon, 23 Feb 2026 04:43:31 -0800 Subject: [PATCH 24/44] fix compule error --- cpp/src/mip_heuristics/mip_scaling_strategy.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu index ef5d453cc..1fd60d595 100644 --- a/cpp/src/mip_heuristics/mip_scaling_strategy.cu +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -7,6 +7,7 @@ #include #include +#include #include #include From 7a61d1333df0c172d7d59280eb09af1a5ae1a1ee Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Mon, 23 Feb 2026 07:10:59 -0800 Subject: [PATCH 25/44] skip mip scaling --- benchmarks/linear_programming/cuopt/run_mip.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index 51d1b4a43..47b68559a 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -209,6 +209,7 @@ int run_single_file(std::string file_path, settings.tolerances.absolute_tolerance = 1e-6; settings.presolver = cuopt::linear_programming::presolver_t::Default; settings.reliability_branching = reliability_branching; + settings.mip_scaling = false; settings.seed = 42; cuopt::linear_programming::benchmark_info_t benchmark_info; settings.benchmark_info_ptr = &benchmark_info; From 0ee834484bb651971811a78897006f90a5aae9c9 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Tue, 24 Feb 2026 03:35:16 -0800 Subject: [PATCH 26/44] without skipping big M --- cpp/src/mip_heuristics/mip_scaling_strategy.cu | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu index 1fd60d595..ec9c0ddfd 100644 --- a/cpp/src/mip_heuristics/mip_scaling_strategy.cu +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -279,13 +279,13 @@ void mip_scaling_strategy_t::scale_problem() op_problem_scaled_, n_rows, row_inf_norm, row_min_nonzero, row_nonzero_count, stream_view_); rmm::device_uvector temp_storage(temp_storage_bytes, stream_view_); - compute_big_m_skip_rows(op_problem_scaled_, - temp_storage, - temp_storage_bytes, - row_inf_norm, - row_min_nonzero, - row_nonzero_count, - row_skip_scaling); + // compute_big_m_skip_rows(op_problem_scaled_, + // temp_storage, + // temp_storage_bytes, + // row_inf_norm, + // row_min_nonzero, + // row_nonzero_count, + // row_skip_scaling); i_t skipped_big_m_rows = thrust::count( handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(1)); From 2a500f48e1b3f348c12c7b789f673d5188418ac8 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Tue, 24 Feb 2026 07:49:43 -0800 Subject: [PATCH 27/44] mip scaling with skipping big M --- benchmarks/linear_programming/cuopt/run_mip.cpp | 2 +- cpp/src/mip_heuristics/mip_scaling_strategy.cu | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index 47b68559a..01e8dc994 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -209,7 +209,7 @@ int run_single_file(std::string file_path, settings.tolerances.absolute_tolerance = 1e-6; settings.presolver = cuopt::linear_programming::presolver_t::Default; settings.reliability_branching = reliability_branching; - settings.mip_scaling = false; + settings.mip_scaling = true; settings.seed = 42; cuopt::linear_programming::benchmark_info_t benchmark_info; settings.benchmark_info_ptr = &benchmark_info; diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu index ec9c0ddfd..720f5b87f 100644 --- a/cpp/src/mip_heuristics/mip_scaling_strategy.cu +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -10,7 +10,6 @@ #include #include -#include #include #include From d0c5a425ca1cc24f9b4cb984bc0090b6a4c38755 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Tue, 24 Feb 2026 16:09:51 +0000 Subject: [PATCH 28/44] fix thrust build + more timer checks --- .../diversity/diversity_manager.cu | 26 ++++++++++--------- cpp/src/mip_heuristics/solve.cu | 5 ++++ cpp/src/mip_heuristics/solver.cu | 16 ++++++++++++ cpp/src/pdlp/swap_and_resize_helper.cuh | 1 + cpp/src/pdlp/utils.cuh | 1 + cpp/src/utilities/copy_helpers.hpp | 1 + cpp/src/utilities/cuda_helpers.cuh | 1 + 7 files changed, 39 insertions(+), 12 deletions(-) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 25b021ac3..4fa0d3f4a 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -201,18 +201,20 @@ bool diversity_manager_t::run_presolve(f_t time_limit) compute_probing_cache(ls.constraint_prop.bounds_update, *problem_ptr, probing_timer); if (problem_is_infeasible) { return false; } } - const bool remap_cache_ids = true; - trivial_presolve(*problem_ptr, remap_cache_ids); - if (!problem_ptr->empty && !check_bounds_sanity(*problem_ptr)) { return false; } - // May overconstrain if Papilo presolve has been run before - if (context.settings.presolver == presolver_t::None) { - if (!problem_ptr->empty) { - // do the resizing no-matter what, bounds presolve might not change the bounds but initial - // trivial presolve might have - ls.constraint_prop.bounds_update.resize(*problem_ptr); - ls.constraint_prop.conditional_bounds_update.update_constraint_bounds( - *problem_ptr, ls.constraint_prop.bounds_update); - if (!check_bounds_sanity(*problem_ptr)) { return false; } + if (!presolve_timer.check_time_limit()) { + const bool remap_cache_ids = true; + trivial_presolve(*problem_ptr, remap_cache_ids); + if (!problem_ptr->empty && !check_bounds_sanity(*problem_ptr)) { return false; } + // May overconstrain if Papilo presolve has been run before + if (context.settings.presolver == presolver_t::None) { + if (!problem_ptr->empty) { + // do the resizing no-matter what, bounds presolve might not change the bounds but initial + // trivial presolve might have + ls.constraint_prop.bounds_update.resize(*problem_ptr); + ls.constraint_prop.conditional_bounds_update.update_constraint_bounds( + *problem_ptr, ls.constraint_prop.bounds_update); + if (!check_bounds_sanity(*problem_ptr)) { return false; } + } } } stats.presolve_time = presolve_timer.elapsed_time(); diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 41e272992..3c6ee21a4 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -153,6 +153,11 @@ mip_solution_t run_mip(detail::problem_t& problem, detail::trivial_presolve(scaled_problem); detail::mip_solver_t solver(scaled_problem, settings, scaling, timer); + if (timer.check_time_limit()) { + CUOPT_LOG_INFO("Time limit reached before main solve"); + detail::solution_t sol(problem); + return sol.get_solution(false, solver.get_solver_stats(), false); + } auto scaled_sol = solver.run_solver(); bool is_feasible_before_scaling = scaled_sol.get_feasible(); scaled_sol.problem_ptr = &problem; diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index 3e2251171..235d4500d 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -134,6 +134,14 @@ solution_t mip_solver_t::run_solver() return sol; } + if (timer_.check_time_limit()) { + CUOPT_LOG_INFO("Time limit reached after presolve"); + solution_t sol(*context.problem_ptr); + context.stats.total_solve_time = timer_.elapsed_time(); + context.problem_ptr->post_process_solution(sol); + return sol; + } + // if the problem was reduced to a LP: run concurrent LP if (run_presolve && context.problem_ptr->n_integer_vars == 0) { CUOPT_LOG_INFO("Problem reduced to a LP, running concurrent LP"); @@ -285,6 +293,14 @@ solution_t mip_solver_t::run_solver() std::placeholders::_5, std::placeholders::_6); + if (timer_.check_time_limit()) { + CUOPT_LOG_INFO("Time limit reached during B&B setup"); + solution_t sol(*context.problem_ptr); + context.stats.total_solve_time = timer_.elapsed_time(); + context.problem_ptr->post_process_solution(sol); + return sol; + } + // Fork a thread for branch and bound // std::async and std::future allow us to get the return value of bb::solve() // without having to manually manage the thread diff --git a/cpp/src/pdlp/swap_and_resize_helper.cuh b/cpp/src/pdlp/swap_and_resize_helper.cuh index 0d4e2a6d0..6ed05df24 100644 --- a/cpp/src/pdlp/swap_and_resize_helper.cuh +++ b/cpp/src/pdlp/swap_and_resize_helper.cuh @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/pdlp/utils.cuh b/cpp/src/pdlp/utils.cuh index 33625f768..138c9c2ab 100644 --- a/cpp/src/pdlp/utils.cuh +++ b/cpp/src/pdlp/utils.cuh @@ -25,6 +25,7 @@ #include #include #include +#include namespace cuopt::linear_programming::detail { diff --git a/cpp/src/utilities/copy_helpers.hpp b/cpp/src/utilities/copy_helpers.hpp index 943ce463a..36a465905 100644 --- a/cpp/src/utilities/copy_helpers.hpp +++ b/cpp/src/utilities/copy_helpers.hpp @@ -15,6 +15,7 @@ #include #include +#include #include #include diff --git a/cpp/src/utilities/cuda_helpers.cuh b/cpp/src/utilities/cuda_helpers.cuh index ae50e9967..946099648 100644 --- a/cpp/src/utilities/cuda_helpers.cuh +++ b/cpp/src/utilities/cuda_helpers.cuh @@ -10,6 +10,7 @@ #include #include +#include #include #include #include From 84f8fb320e70a78d5b4432a2a96b816b32b0f64f Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Tue, 24 Feb 2026 08:31:29 -0800 Subject: [PATCH 29/44] fix headers --- cpp/src/mip_heuristics/solve.cu | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 5860a3a62..2282909ee 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -143,10 +143,8 @@ mip_solution_t run_mip(detail::problem_t& problem, "please provide a more numerically stable problem."); } - auto sol = - scaled_sol.get_solution(is_feasible_on_scaled_problem || is_feasible_on_original_problem, - solver.get_solver_stats(), - false); + auto sol = scaled_sol.get_solution( + is_feasible_after_unscaling || is_feasible_before_scaling, solver.get_solver_stats(), false); int hidesol = std::getenv("CUOPT_MIP_HIDE_SOLUTION") ? atoi(std::getenv("CUOPT_MIP_HIDE_SOLUTION")) : 0; From 67240f57035bde789b1e5512c116c00fd46c5baf Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Tue, 24 Feb 2026 16:51:09 +0000 Subject: [PATCH 30/44] review comment --- cpp/src/mip_heuristics/solve.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 3c6ee21a4..44f0e7468 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -156,7 +156,9 @@ mip_solution_t run_mip(detail::problem_t& problem, if (timer.check_time_limit()) { CUOPT_LOG_INFO("Time limit reached before main solve"); detail::solution_t sol(problem); - return sol.get_solution(false, solver.get_solver_stats(), false); + auto stats = solver.get_solver_stats(); + stats.total_solve_time = timer.elapsed_time(); + return sol.get_solution(false, stats, false); } auto scaled_sol = solver.run_solver(); bool is_feasible_before_scaling = scaled_sol.get_feasible(); From a15424fc250b6e649b8b0b0015641d9771c9b426 Mon Sep 17 00:00:00 2001 From: Alice Boucher Date: Tue, 24 Feb 2026 17:02:33 +0000 Subject: [PATCH 31/44] fix thrust solve --- cpp/src/routing/utilities/check_input.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/routing/utilities/check_input.cu b/cpp/src/routing/utilities/check_input.cu index b2f035ee5..e902f2d46 100644 --- a/cpp/src/routing/utilities/check_input.cu +++ b/cpp/src/routing/utilities/check_input.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -17,6 +17,7 @@ #include #include #include +#include #include #include From 4a1c6721ed0b619ebc2b3367ed5531e8b4042f7c Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Tue, 24 Feb 2026 10:20:48 -0800 Subject: [PATCH 32/44] remove nvtx --- cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu index 3a1ea7b63..323165090 100644 --- a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu +++ b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu @@ -15,7 +15,6 @@ #include #include -#include #include #include #include From 7045ae727e40778f94f4216c4c0736af095773b4 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 25 Feb 2026 01:40:58 -0800 Subject: [PATCH 33/44] fix init issues --- cpp/src/mip_heuristics/mip_scaling_strategy.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu index 720f5b87f..76a19cc27 100644 --- a/cpp/src/mip_heuristics/mip_scaling_strategy.cu +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -258,6 +258,8 @@ void mip_scaling_strategy_t::scale_problem() rmm::device_uvector row_min_nonzero(static_cast(n_rows), stream_view_); rmm::device_uvector row_nonzero_count(static_cast(n_rows), stream_view_); rmm::device_uvector row_skip_scaling(static_cast(n_rows), stream_view_); + thrust::fill( + handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(0)); rmm::device_uvector iteration_scaling(static_cast(n_rows), stream_view_); rmm::device_uvector coefficient_row_index(static_cast(nnz), stream_view_); From 80ac99d9d24433cf2be009b2e593444d65dff6a7 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 25 Feb 2026 01:41:29 -0800 Subject: [PATCH 34/44] don't skip big m --- cpp/src/mip_heuristics/mip_scaling_strategy.cu | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu index 76a19cc27..ed4294d9c 100644 --- a/cpp/src/mip_heuristics/mip_scaling_strategy.cu +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -280,13 +280,13 @@ void mip_scaling_strategy_t::scale_problem() op_problem_scaled_, n_rows, row_inf_norm, row_min_nonzero, row_nonzero_count, stream_view_); rmm::device_uvector temp_storage(temp_storage_bytes, stream_view_); - // compute_big_m_skip_rows(op_problem_scaled_, - // temp_storage, - // temp_storage_bytes, - // row_inf_norm, - // row_min_nonzero, - // row_nonzero_count, - // row_skip_scaling); + compute_big_m_skip_rows(op_problem_scaled_, + temp_storage, + temp_storage_bytes, + row_inf_norm, + row_min_nonzero, + row_nonzero_count, + row_skip_scaling); i_t skipped_big_m_rows = thrust::count( handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(1)); From 8eaebba181a26968ef92372405c17af12d73a9a2 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 25 Feb 2026 04:30:04 -0800 Subject: [PATCH 35/44] do scaling beforehand --- cpp/src/mip_heuristics/diversity/diversity_manager.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 4ef291be8..31f362918 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -177,6 +177,7 @@ bool diversity_manager_t::run_presolve(f_t time_limit) raft::common::nvtx::range fun_scope("run_presolve"); CUOPT_LOG_INFO("Running presolve!"); timer_t presolve_timer(time_limit); + if (context.settings.mip_scaling) { context.scaling.scale_problem(); } auto term_crit = ls.constraint_prop.bounds_update.solve(*problem_ptr); if (ls.constraint_prop.bounds_update.infeas_constraints_count > 0) { stats.presolve_time = timer.elapsed_time(); @@ -217,7 +218,6 @@ bool diversity_manager_t::run_presolve(f_t time_limit) } } } - if (context.settings.mip_scaling) { context.scaling.scale_problem(); } stats.presolve_time = presolve_timer.elapsed_time(); lp_optimal_solution.resize(problem_ptr->n_variables, problem_ptr->handle_ptr->get_stream()); lp_dual_optimal_solution.resize(problem_ptr->n_constraints, From 6b2a63157573c0fd956e5d5991c9150260be72c9 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 25 Feb 2026 07:27:20 -0800 Subject: [PATCH 36/44] without big M --- cpp/src/mip_heuristics/mip_scaling_strategy.cu | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu index ed4294d9c..76a19cc27 100644 --- a/cpp/src/mip_heuristics/mip_scaling_strategy.cu +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -280,13 +280,13 @@ void mip_scaling_strategy_t::scale_problem() op_problem_scaled_, n_rows, row_inf_norm, row_min_nonzero, row_nonzero_count, stream_view_); rmm::device_uvector temp_storage(temp_storage_bytes, stream_view_); - compute_big_m_skip_rows(op_problem_scaled_, - temp_storage, - temp_storage_bytes, - row_inf_norm, - row_min_nonzero, - row_nonzero_count, - row_skip_scaling); + // compute_big_m_skip_rows(op_problem_scaled_, + // temp_storage, + // temp_storage_bytes, + // row_inf_norm, + // row_min_nonzero, + // row_nonzero_count, + // row_skip_scaling); i_t skipped_big_m_rows = thrust::count( handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(1)); From cfcf0ab98a05ed821f64b696f09b412d95366132 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Fri, 27 Feb 2026 02:37:40 -0800 Subject: [PATCH 37/44] try with mip scaling --- .../diversity/diversity_manager.cu | 10 +- .../mip_heuristics/mip_scaling_strategy.cu | 145 +++++++++++++----- 2 files changed, 115 insertions(+), 40 deletions(-) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 31f362918..8e1f0e24e 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -391,14 +391,16 @@ solution_t diversity_manager_t::run_solver() } else if (!fj_only_run) { convert_greater_to_less(*problem_ptr); - f_t tolerance_divisor = - problem_ptr->tolerances.absolute_tolerance / problem_ptr->tolerances.relative_tolerance; - if (tolerance_divisor == 0) { tolerance_divisor = 1; } f_t absolute_tolerance = context.settings.tolerances.absolute_tolerance; + f_t tolerance_divisor = 100.; pdlp_solver_settings_t pdlp_settings{}; - pdlp_settings.tolerances.relative_primal_tolerance = absolute_tolerance / tolerance_divisor; + pdlp_settings.tolerances.absolute_dual_tolerance = absolute_tolerance; pdlp_settings.tolerances.relative_dual_tolerance = absolute_tolerance / tolerance_divisor; + pdlp_settings.tolerances.absolute_primal_tolerance = absolute_tolerance; + pdlp_settings.tolerances.relative_primal_tolerance = absolute_tolerance / tolerance_divisor; + pdlp_settings.tolerances.absolute_gap_tolerance = absolute_tolerance; + pdlp_settings.tolerances.relative_gap_tolerance = absolute_tolerance / tolerance_divisor; pdlp_settings.time_limit = lp_time_limit; pdlp_settings.first_primal_feasible = false; pdlp_settings.concurrent_halt = &global_concurrent_halt; diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu index 76a19cc27..7bc31a9e0 100644 --- a/cpp/src/mip_heuristics/mip_scaling_strategy.cu +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -36,10 +36,14 @@ namespace cuopt::linear_programming::detail { -constexpr int row_scaling_num_iterations = 3; -constexpr int row_scaling_k_min = -20; -constexpr int row_scaling_k_max = 20; -constexpr double nearest_pow2_mantissa_threshold = 0.7071067811865476; +constexpr int row_scaling_max_iterations = 8; +constexpr int row_scaling_k_min = -20; +constexpr int row_scaling_k_max = 20; +constexpr int row_scaling_big_m_soft_k_min = -4; +constexpr int row_scaling_big_m_soft_k_max = 0; +constexpr double row_scaling_spread_rel_tol = 1.0e-2; +constexpr double nearest_pow2_mantissa_threshold = 0.7071067811865476; +constexpr double min_abs_objective_coefficient_threshold = 1.0e-4; constexpr double big_m_abs_threshold = 1.0e4; constexpr double big_m_ratio_threshold = 1.0e4; @@ -176,6 +180,41 @@ void compute_big_m_skip_rows(const problem_t& op_problem, }); } +template +void scale_objective(problem_t& op_problem) +{ + const f_t min_abs_objective_coefficient = + thrust::transform_reduce(op_problem.handle_ptr->get_thrust_policy(), + op_problem.objective_coefficients.begin(), + op_problem.objective_coefficients.end(), + nonzero_abs_or_inf_transform_t{}, + std::numeric_limits::infinity(), + min_op_t{}); + + if (!std::isfinite(min_abs_objective_coefficient) || min_abs_objective_coefficient <= f_t(0) || + min_abs_objective_coefficient >= static_cast(min_abs_objective_coefficient_threshold)) { + return; + } + + const f_t obj_scaling_coefficient = + static_cast(min_abs_objective_coefficient_threshold) / min_abs_objective_coefficient; + if (!std::isfinite(obj_scaling_coefficient) || obj_scaling_coefficient <= f_t(1)) { return; } + + thrust::transform(op_problem.handle_ptr->get_thrust_policy(), + op_problem.objective_coefficients.begin(), + op_problem.objective_coefficients.end(), + op_problem.objective_coefficients.begin(), + [obj_scaling_coefficient] __device__(f_t objective_coefficient) -> f_t { + return objective_coefficient * obj_scaling_coefficient; + }); + op_problem.presolve_data.objective_scaling_factor /= obj_scaling_coefficient; + op_problem.presolve_data.objective_offset *= obj_scaling_coefficient; + + CUOPT_LOG_INFO("MIP objective scaling applied: min_abs_coeff=%g scale=%g", + static_cast(min_abs_objective_coefficient), + static_cast(obj_scaling_coefficient)); +} + template mip_scaling_strategy_t::mip_scaling_strategy_t(problem_t& op_problem_scaled) : handle_ptr_(op_problem_scaled.handle_ptr), @@ -249,6 +288,8 @@ void mip_scaling_strategy_t::scale_problem() const i_t n_cols = op_problem_scaled_.n_variables; const i_t nnz = op_problem_scaled_.nnz; + scale_objective(op_problem_scaled_); + if (n_rows == 0 || nnz <= 0) { op_problem_scaled_.is_scaled_ = true; return; @@ -280,45 +321,65 @@ void mip_scaling_strategy_t::scale_problem() op_problem_scaled_, n_rows, row_inf_norm, row_min_nonzero, row_nonzero_count, stream_view_); rmm::device_uvector temp_storage(temp_storage_bytes, stream_view_); - // compute_big_m_skip_rows(op_problem_scaled_, - // temp_storage, - // temp_storage_bytes, - // row_inf_norm, - // row_min_nonzero, - // row_nonzero_count, - // row_skip_scaling); - - i_t skipped_big_m_rows = thrust::count( + compute_big_m_skip_rows(op_problem_scaled_, + temp_storage, + temp_storage_bytes, + row_inf_norm, + row_min_nonzero, + row_nonzero_count, + row_skip_scaling); + + i_t big_m_rows = thrust::count( handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(1)); - CUOPT_LOG_INFO("MIP row scaling start: rows=%d cols=%d iterations=%d skip_big_m_rows=%d", + CUOPT_LOG_INFO("MIP row scaling start: rows=%d cols=%d max_iterations=%d soft_big_m_rows=%d", n_rows, n_cols, - row_scaling_num_iterations, - skipped_big_m_rows); + row_scaling_max_iterations, + big_m_rows); - for (int iteration = 0; iteration < row_scaling_num_iterations; ++iteration) { + double previous_row_log2_spread = std::numeric_limits::infinity(); + for (int iteration = 0; iteration < row_scaling_max_iterations; ++iteration) { compute_row_inf_norm( op_problem_scaled_, temp_storage, temp_storage_bytes, row_inf_norm, stream_view_); - using sum_count_t = thrust::tuple; - auto log2_sum_and_count = thrust::transform_reduce( + using row_stats_t = thrust::tuple; + auto row_log2_stats = thrust::transform_reduce( handle_ptr_->get_thrust_policy(), - thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.begin(), row_skip_scaling.begin())), - thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.end(), row_skip_scaling.end())), - [] __device__(auto row_info) -> sum_count_t { - const f_t row_norm = thrust::get<0>(row_info); - const i_t skip_row = thrust::get<1>(row_info); - if (skip_row || row_norm == f_t(0)) { return {0.0, 0.0}; } - return {log2(static_cast(row_norm)), 1.0}; + row_inf_norm.begin(), + row_inf_norm.end(), + [] __device__(f_t row_norm) -> row_stats_t { + if (row_norm == f_t(0)) { + return {0.0, + 0.0, + std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}; + } + const double row_log2 = log2(static_cast(row_norm)); + return {row_log2, 1.0, row_log2, row_log2}; }, - sum_count_t{0.0, 0.0}, - [] __device__(sum_count_t a, sum_count_t b) -> sum_count_t { - return {thrust::get<0>(a) + thrust::get<0>(b), thrust::get<1>(a) + thrust::get<1>(b)}; + row_stats_t{0.0, + 0.0, + std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}, + [] __device__(row_stats_t a, row_stats_t b) -> row_stats_t { + return {thrust::get<0>(a) + thrust::get<0>(b), + thrust::get<1>(a) + thrust::get<1>(b), + min_op_t{}(thrust::get<2>(a), thrust::get<2>(b)), + max_op_t{}(thrust::get<3>(a), thrust::get<3>(b))}; }); - const double log2_sum = thrust::get<0>(log2_sum_and_count); - const i_t active_row_count = static_cast(thrust::get<1>(log2_sum_and_count)); + const double log2_sum = thrust::get<0>(row_log2_stats); + const i_t active_row_count = static_cast(thrust::get<1>(row_log2_stats)); if (active_row_count == 0) { break; } + const double row_log2_spread = thrust::get<3>(row_log2_stats) - thrust::get<2>(row_log2_stats); + if (std::isfinite(previous_row_log2_spread)) { + const double spread_improvement = previous_row_log2_spread - row_log2_spread; + if (spread_improvement <= + row_scaling_spread_rel_tol * std::max(1.0, previous_row_log2_spread)) { + break; + } + } + previous_row_log2_spread = row_log2_spread; f_t target_norm = static_cast(exp2(log2_sum / static_cast(active_row_count))); cuopt_assert(isfinite(target_norm), "target_norm must be finite"); @@ -331,8 +392,8 @@ void mip_scaling_strategy_t::scale_problem() iteration_scaling.begin(), [target_norm] __device__(auto row_info) -> f_t { const f_t row_norm = thrust::get<0>(row_info); - const i_t skip_row = thrust::get<1>(row_info); - if (skip_row || row_norm == f_t(0)) { return f_t(1); } + const i_t is_big_m = thrust::get<1>(row_info); + if (row_norm == f_t(0)) { return f_t(1); } const f_t desired_scaling = target_norm / row_norm; if (!isfinite(desired_scaling) || desired_scaling <= f_t(0)) { return f_t(1); } @@ -342,12 +403,24 @@ void mip_scaling_strategy_t::scale_problem() frexp(desired_scaling, &exponent); // desired_scaling = mantissa * 2^exponent int k = mantissa >= static_cast(nearest_pow2_mantissa_threshold) ? exponent : exponent - 1; - // clamp it so we don't overscale. range [1e-6,1e6] - if (k < row_scaling_k_min) { k = row_scaling_k_min; } - if (k > row_scaling_k_max) { k = row_scaling_k_max; } + // Clamp to avoid overscaling, and softly limit scaling on big-M rows. + if (is_big_m) { + if (k < row_scaling_big_m_soft_k_min) { k = row_scaling_big_m_soft_k_min; } + if (k > row_scaling_big_m_soft_k_max) { k = row_scaling_big_m_soft_k_max; } + } else { + if (k < row_scaling_k_min) { k = row_scaling_k_min; } + if (k > row_scaling_k_max) { k = row_scaling_k_max; } + } return ldexp(f_t(1), k); }); + i_t scaled_rows = + thrust::count_if(handle_ptr_->get_thrust_policy(), + iteration_scaling.begin(), + iteration_scaling.end(), + [] __device__(f_t row_scale) -> bool { return row_scale != f_t(1); }); + if (scaled_rows == 0) { break; } + thrust::transform( handle_ptr_->get_thrust_policy(), op_problem_scaled_.coefficients.begin(), From 1db603c092a7d4d039e111540d19edd542f6b026 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Fri, 27 Feb 2026 02:41:36 -0800 Subject: [PATCH 38/44] with assertS --- cpp/src/mip_heuristics/diversity/diversity_manager.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 8e1f0e24e..13c0b1028 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -178,6 +178,7 @@ bool diversity_manager_t::run_presolve(f_t time_limit) CUOPT_LOG_INFO("Running presolve!"); timer_t presolve_timer(time_limit); if (context.settings.mip_scaling) { context.scaling.scale_problem(); } + auto term_crit = ls.constraint_prop.bounds_update.solve(*problem_ptr); if (ls.constraint_prop.bounds_update.infeas_constraints_count > 0) { stats.presolve_time = timer.elapsed_time(); From 0f39c06ed7ce8da2cf8e9abd007a8a7f109301c0 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Fri, 27 Feb 2026 04:47:12 -0800 Subject: [PATCH 39/44] try without any lp scaling --- .../diversity/diversity_manager.cu | 1 - cpp/src/pdlp/solve.cu | 144 +++++++++++------- 2 files changed, 90 insertions(+), 55 deletions(-) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 13c0b1028..9cfd767fd 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -410,7 +410,6 @@ solution_t diversity_manager_t::run_solver() pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; pdlp_settings.num_gpus = context.settings.num_gpus; pdlp_settings.presolver = presolver_t::None; - set_pdlp_solver_mode(pdlp_settings); timer_t lp_timer(lp_time_limit); auto lp_result = solve_lp_with_method(*problem_ptr, pdlp_settings, lp_timer); diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index 691ec1693..9d4ab483f 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -24,6 +24,10 @@ #include #include +#include +#include +#include +#include #include #include #include @@ -45,6 +49,8 @@ #include #include +#include + #include // For std::thread #define CUOPT_LOG_CONDITIONAL_INFO(condition, ...) \ @@ -950,39 +956,6 @@ optimization_problem_solution_t run_concurrent( global_concurrent_halt = 0; settings_pdlp.concurrent_halt = &global_concurrent_halt; - detail::problem_t* concurrent_problem = &problem; - std::unique_ptr> pre_scaled_problem_ptr; - std::unique_ptr> concurrent_scaling_ptr; - - // For MIP concurrent solves (PDLP + Barrier), pre-scale once with PDLP scaling and run both - // methods on the same scaled problem. - if (settings.inside_mip) { - pre_scaled_problem_ptr = std::make_unique>(problem); - concurrent_scaling_ptr = std::make_unique>( - problem.handle_ptr, - *pre_scaled_problem_ptr, - settings_pdlp.hyper_params.default_l_inf_ruiz_iterations, - static_cast(settings_pdlp.hyper_params.default_alpha_pock_chambolle_rescaling), - pre_scaled_problem_ptr->reverse_coefficients, - pre_scaled_problem_ptr->reverse_offsets, - pre_scaled_problem_ptr->reverse_constraints, - nullptr, - settings_pdlp.hyper_params, - true); - concurrent_scaling_ptr->scale_problem(); - // Keep CSR/transpose exactly consistent for strict debug validity checks. - pre_scaled_problem_ptr->compute_transpose_of_problem(); - // Keep combined bounds consistent with scaled lower/upper bounds. - detail::combine_constraint_bounds(*pre_scaled_problem_ptr, - pre_scaled_problem_ptr->combined_bounds); - concurrent_problem = pre_scaled_problem_ptr.get(); - - // Avoid scaling twice on the PDLP branch once the shared pre-scaling has been applied. - settings_pdlp.hyper_params.do_ruiz_scaling = false; - settings_pdlp.hyper_params.do_pock_chambolle_scaling = false; - settings_pdlp.hyper_params.bound_objective_rescaling = false; - } - // Make sure allocations are done on the original stream problem.handle_ptr->sync_stream(); @@ -994,17 +967,11 @@ optimization_problem_solution_t run_concurrent( device_count > 1, error_type_t::RuntimeError, "Multi-GPU mode requires at least 2 GPUs"); } - // Initialize simplex/barrier structures before we run PDLP. + // Initialize the dual simplex structures before we run PDLP. // Otherwise, CUDA API calls to the problem stream may occur in both threads and throw graph - // capture off. - // Keep dual simplex on the original unscaled model to preserve vertex solution semantics. + // capture off dual_simplex::user_problem_t dual_simplex_problem = cuopt_problem_to_simplex_problem(problem.handle_ptr, problem); - // Barrier shares the same model as PDLP (scaled in MIP concurrent mode, original otherwise). - dual_simplex::user_problem_t barrier_problem = - settings.inside_mip - ? cuopt_problem_to_simplex_problem(problem.handle_ptr, *concurrent_problem) - : dual_simplex_problem; // Create a thread for dual simplex std::unique_ptr< std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t>> @@ -1017,6 +984,7 @@ optimization_problem_solution_t run_concurrent( std::ref(sol_dual_simplex_ptr), std::ref(timer)); } + dual_simplex::user_problem_t barrier_problem = dual_simplex_problem; // Create a thread for barrier std::unique_ptr< std::tuple, dual_simplex::lp_status_t, f_t, f_t, f_t>> @@ -1025,10 +993,10 @@ optimization_problem_solution_t run_concurrent( auto call_barrier_thread = [&]() { rmm::cuda_stream_view barrier_stream = rmm::cuda_stream_per_thread; auto barrier_handle = raft::handle_t(barrier_stream); - auto barrier_problem_local = barrier_problem; - barrier_problem_local.handle_ptr = &barrier_handle; + auto barrier_problem = dual_simplex_problem; + barrier_problem.handle_ptr = &barrier_handle; - run_barrier_thread(std::ref(barrier_problem_local), + run_barrier_thread(std::ref(barrier_problem), std::ref(settings_pdlp), std::ref(sol_barrier_ptr), std::ref(timer)); @@ -1048,7 +1016,7 @@ optimization_problem_solution_t run_concurrent( CUOPT_LOG_DEBUG("PDLP device: %d", raft::device_setter::get_current_device()); } // Run pdlp in the main thread - auto sol_pdlp = run_pdlp(*concurrent_problem, settings_pdlp, timer, is_batch_mode); + auto sol_pdlp = run_pdlp(problem, settings_pdlp, timer, is_batch_mode); // Wait for dual simplex thread to finish if (!settings.inside_mip) { dual_simplex_thread.join(); } @@ -1069,7 +1037,7 @@ optimization_problem_solution_t run_concurrent( problem.handle_ptr->get_stream()}; // copy the barrier solution to the device - auto sol_barrier = convert_dual_simplex_sol(*concurrent_problem, + auto sol_barrier = convert_dual_simplex_sol(problem, std::get<0>(*sol_barrier_ptr), std::get<1>(*sol_barrier_ptr), std::get<2>(*sol_barrier_ptr), @@ -1077,14 +1045,6 @@ optimization_problem_solution_t run_concurrent( std::get<4>(*sol_barrier_ptr), 1); - if (concurrent_scaling_ptr) { - concurrent_scaling_ptr->unscale_solutions( - sol_pdlp.get_primal_solution(), sol_pdlp.get_dual_solution(), sol_pdlp.get_reduced_cost()); - concurrent_scaling_ptr->unscale_solutions(sol_barrier.get_primal_solution(), - sol_barrier.get_dual_solution(), - sol_barrier.get_reduced_cost()); - } - f_t end_time = timer.elapsed_time(); CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "Concurrent time: %.3fs", end_time); // Check status to see if we should return the pdlp solution or the dual simplex solution @@ -1391,6 +1351,9 @@ template cuopt::linear_programming::optimization_problem_t mps_data_model_to_optimization_problem( raft::handle_t const* handle_ptr, const cuopt::mps_parser::mps_data_model_t& data_model) { + cuopt_expects(handle_ptr != nullptr, + error_type_t::ValidationError, + "handle_ptr must not be null for GPU-backed problem construction"); cuopt::linear_programming::optimization_problem_t op_problem(handle_ptr); op_problem.set_maximize(data_model.get_sense()); @@ -1481,6 +1444,72 @@ optimization_problem_solution_t solve_lp( return solve_lp(op_problem, settings, problem_checking, use_pdlp_solver_mode); } +// ============================================================================ +// Interface-based solve overloads with remote execution support +// ============================================================================ + +template +std::unique_ptr> solve_lp( + optimization_problem_interface_t* problem_interface, + pdlp_solver_settings_t const& settings, + bool problem_checking, + bool use_pdlp_solver_mode, + bool is_batch_mode) +{ + // Check if remote execution is enabled + if (is_remote_execution_enabled()) { + cuopt_expects(!is_batch_mode, + error_type_t::ValidationError, + "Batch mode with remote execution is not supported via this entry point. " + "Use solve_batch_remote() instead."); + CUOPT_LOG_INFO("Remote LP solve requested"); + return problem_interface->solve_lp_remote(settings, problem_checking, use_pdlp_solver_mode); + } else { + // Local execution - convert to optimization_problem_t and call original solve_lp + CUOPT_LOG_INFO("Local LP solve"); + + // Check if this is a CPU problem (test mode: CUOPT_USE_CPU_MEM_FOR_LOCAL=true) + auto* cpu_prob = dynamic_cast*>(problem_interface); + if (cpu_prob != nullptr) { + CUOPT_LOG_INFO("Test mode: Converting CPU problem to GPU for local solve"); + + // Create CUDA resources for the conversion + rmm::cuda_stream stream; + raft::handle_t handle(stream); + + // Temporarily set the handle on the CPU problem so it can create GPU resources + cpu_prob->set_handle(&handle); + + // Convert CPU problem to GPU problem + auto op_problem = cpu_prob->to_optimization_problem(); + + // Clear the handle to avoid dangling pointer after this scope + cpu_prob->set_handle(nullptr); + + // Synchronize before solving to ensure conversion is complete + stream.synchronize(); + + // Solve on GPU + auto gpu_solution = solve_lp( + op_problem, settings, problem_checking, use_pdlp_solver_mode, is_batch_mode); + + // Ensure all GPU work from the solve is complete before to_cpu_solution() D2H copies. + stream.synchronize(); + + CUOPT_LOG_INFO("Test mode: Converting GPU solution back to CPU solution"); + gpu_lp_solution_t gpu_sol_interface(std::move(gpu_solution)); + return gpu_sol_interface.to_cpu_solution(); + } + + auto op_problem = problem_interface->to_optimization_problem(); + auto gpu_solution = solve_lp( + op_problem, settings, problem_checking, use_pdlp_solver_mode, is_batch_mode); + + // Wrap GPU solution in interface and return + return std::make_unique>(std::move(gpu_solution)); + } +} + #define INSTANTIATE(F_TYPE) \ template optimization_problem_solution_t solve_lp( \ optimization_problem_t& op_problem, \ @@ -1496,6 +1525,13 @@ optimization_problem_solution_t solve_lp( bool problem_checking, \ bool use_pdlp_solver_mode); \ \ + template std::unique_ptr> solve_lp( \ + optimization_problem_interface_t*, \ + pdlp_solver_settings_t const&, \ + bool, \ + bool, \ + bool); \ + \ template optimization_problem_solution_t solve_lp_with_method( \ detail::problem_t& problem, \ pdlp_solver_settings_t const& settings, \ From ca5b07e58026c0de06614194ee52c78ec1ef9a80 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 4 Mar 2026 03:47:12 -0800 Subject: [PATCH 40/44] scaling before presolve --- .../diversity/diversity_manager.cu | 1 - cpp/src/mip_heuristics/diversity/lns/rins.cu | 3 +- .../diversity/recombiners/sub_mip.cuh | 2 - .../mip_heuristics/mip_scaling_strategy.cu | 177 +++++++++--------- .../mip_heuristics/mip_scaling_strategy.cuh | 7 +- cpp/src/mip_heuristics/solve.cu | 6 +- cpp/src/mip_heuristics/solver.cu | 6 +- cpp/src/mip_heuristics/solver.cuh | 1 - cpp/src/mip_heuristics/solver_context.cuh | 7 +- cpp/tests/mip/multi_probe_test.cu | 3 +- 10 files changed, 105 insertions(+), 108 deletions(-) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 9cfd767fd..994800049 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -177,7 +177,6 @@ bool diversity_manager_t::run_presolve(f_t time_limit) raft::common::nvtx::range fun_scope("run_presolve"); CUOPT_LOG_INFO("Running presolve!"); timer_t presolve_timer(time_limit); - if (context.settings.mip_scaling) { context.scaling.scale_problem(); } auto term_crit = ls.constraint_prop.bounds_update.solve(*problem_ptr); if (ls.constraint_prop.bounds_update.infeas_constraints_count > 0) { diff --git a/cpp/src/mip_heuristics/diversity/lns/rins.cu b/cpp/src/mip_heuristics/diversity/lns/rins.cu index 7fd8533f8..8ecbb2b36 100644 --- a/cpp/src/mip_heuristics/diversity/lns/rins.cu +++ b/cpp/src/mip_heuristics/diversity/lns/rins.cu @@ -223,8 +223,7 @@ void rins_t::run_rins() std::vector> rins_solution_queue; - mip_solver_context_t fj_context( - &rins_handle, &fixed_problem, context.settings, context.scaling); + mip_solver_context_t fj_context(&rins_handle, &fixed_problem, context.settings); fj_t fj(fj_context); solution_t fj_solution(fixed_problem); fj_solution.copy_new_assignment(cuopt::host_copy(fixed_assignment, rins_handle.get_stream())); diff --git a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh index 02c79be15..7f0d9fc4a 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh @@ -71,8 +71,6 @@ class sub_mip_recombiner_t : public recombiner_t { "n_vars_from_guiding %d n_vars_from_other %d", n_vars_from_guiding, n_vars_from_other); this->compute_vars_to_fix(offspring, vars_to_fix, n_vars_from_other, n_vars_from_guiding); auto [fixed_problem, fixed_assignment, variable_map] = offspring.fix_variables(vars_to_fix); - mip_scaling_strategy_t scaling(fixed_problem); - scaling.scale_problem(); fixed_problem.presolve_data.reset_additional_vars(fixed_problem, offspring.handle_ptr); fixed_problem.presolve_data.initialize_var_mapping(fixed_problem, offspring.handle_ptr); trivial_presolve(fixed_problem); diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu index 7bc31a9e0..ff89cfc88 100644 --- a/cpp/src/mip_heuristics/mip_scaling_strategy.cu +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -67,100 +67,107 @@ struct nonzero_count_transform_t { __device__ i_t operator()(f_t value) const { return raft::abs(value) > f_t(0) ? i_t(1) : i_t(0); } }; -template +template struct max_op_t { - __host__ __device__ t_t operator()(const t_t& lhs, const t_t& rhs) const + __host__ __device__ item_t operator()(const item_t& lhs, const item_t& rhs) const { return lhs > rhs ? lhs : rhs; } }; -template +template struct min_op_t { - __host__ __device__ t_t operator()(const t_t& lhs, const t_t& rhs) const + __host__ __device__ item_t operator()(const item_t& lhs, const item_t& rhs) const { return lhs < rhs ? lhs : rhs; } }; template -void compute_row_inf_norm(const problem_t& op_problem, - rmm::device_uvector& temp_storage, - size_t temp_storage_bytes, - rmm::device_uvector& row_inf_norm, - rmm::cuda_stream_view stream_view) +void compute_row_inf_norm( + const cuopt::linear_programming::optimization_problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::device_uvector& row_inf_norm, + rmm::cuda_stream_view stream_view) { + const auto& matrix_values = op_problem.get_constraint_matrix_values(); + const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets(); auto coeff_abs_iter = - thrust::make_transform_iterator(op_problem.coefficients.data(), abs_value_transform_t{}); + thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t{}); size_t current_bytes = temp_storage_bytes; RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), current_bytes, coeff_abs_iter, row_inf_norm.data(), - op_problem.n_constraints, - op_problem.offsets.data(), - op_problem.offsets.data() + 1, + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, max_op_t{}, f_t(0), stream_view)); } template -void compute_big_m_skip_rows(const problem_t& op_problem, - rmm::device_uvector& temp_storage, - size_t temp_storage_bytes, - rmm::device_uvector& row_inf_norm, - rmm::device_uvector& row_min_nonzero, - rmm::device_uvector& row_nonzero_count, - rmm::device_uvector& row_skip_scaling) +void compute_big_m_skip_rows( + const cuopt::linear_programming::optimization_problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::device_uvector& row_inf_norm, + rmm::device_uvector& row_min_nonzero, + rmm::device_uvector& row_nonzero_count, + rmm::device_uvector& row_skip_scaling) { + const auto& matrix_values = op_problem.get_constraint_matrix_values(); + const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets(); + const auto stream_view = op_problem.get_handle_ptr()->get_stream(); auto coeff_abs_iter = - thrust::make_transform_iterator(op_problem.coefficients.data(), abs_value_transform_t{}); - auto coeff_nonzero_min_iter = thrust::make_transform_iterator( - op_problem.coefficients.data(), nonzero_abs_or_inf_transform_t{}); - auto coeff_nonzero_count_iter = thrust::make_transform_iterator( - op_problem.coefficients.data(), nonzero_count_transform_t{}); + thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t{}); + auto coeff_nonzero_min_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_abs_or_inf_transform_t{}); + auto coeff_nonzero_count_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_count_transform_t{}); size_t max_bytes = temp_storage_bytes; RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), max_bytes, coeff_abs_iter, row_inf_norm.data(), - op_problem.n_constraints, - op_problem.offsets.data(), - op_problem.offsets.data() + 1, + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, max_op_t{}, f_t(0), - op_problem.handle_ptr->get_stream())); + stream_view)); size_t min_bytes = temp_storage_bytes; RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), min_bytes, coeff_nonzero_min_iter, row_min_nonzero.data(), - op_problem.n_constraints, - op_problem.offsets.data(), - op_problem.offsets.data() + 1, + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, min_op_t{}, std::numeric_limits::infinity(), - op_problem.handle_ptr->get_stream())); + stream_view)); size_t count_bytes = temp_storage_bytes; RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), count_bytes, coeff_nonzero_count_iter, row_nonzero_count.data(), - op_problem.n_constraints, - op_problem.offsets.data(), - op_problem.offsets.data() + 1, + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, thrust::plus{}, i_t(0), - op_problem.handle_ptr->get_stream())); + stream_view)); auto row_begin = thrust::make_zip_iterator( thrust::make_tuple(row_inf_norm.begin(), row_min_nonzero.begin(), row_nonzero_count.begin())); auto row_end = thrust::make_zip_iterator( thrust::make_tuple(row_inf_norm.end(), row_min_nonzero.end(), row_nonzero_count.end())); thrust::transform( - op_problem.handle_ptr->get_thrust_policy(), + op_problem.get_handle_ptr()->get_thrust_policy(), row_begin, row_end, row_skip_scaling.begin(), @@ -181,12 +188,13 @@ void compute_big_m_skip_rows(const problem_t& op_problem, } template -void scale_objective(problem_t& op_problem) +void scale_objective(cuopt::linear_programming::optimization_problem_t& op_problem) { + auto& objective_coefficients = op_problem.get_objective_coefficients(); const f_t min_abs_objective_coefficient = - thrust::transform_reduce(op_problem.handle_ptr->get_thrust_policy(), - op_problem.objective_coefficients.begin(), - op_problem.objective_coefficients.end(), + thrust::transform_reduce(op_problem.get_handle_ptr()->get_thrust_policy(), + objective_coefficients.begin(), + objective_coefficients.end(), nonzero_abs_or_inf_transform_t{}, std::numeric_limits::infinity(), min_op_t{}); @@ -200,15 +208,16 @@ void scale_objective(problem_t& op_problem) static_cast(min_abs_objective_coefficient_threshold) / min_abs_objective_coefficient; if (!std::isfinite(obj_scaling_coefficient) || obj_scaling_coefficient <= f_t(1)) { return; } - thrust::transform(op_problem.handle_ptr->get_thrust_policy(), - op_problem.objective_coefficients.begin(), - op_problem.objective_coefficients.end(), - op_problem.objective_coefficients.begin(), + thrust::transform(op_problem.get_handle_ptr()->get_thrust_policy(), + objective_coefficients.begin(), + objective_coefficients.end(), + objective_coefficients.begin(), [obj_scaling_coefficient] __device__(f_t objective_coefficient) -> f_t { return objective_coefficient * obj_scaling_coefficient; }); - op_problem.presolve_data.objective_scaling_factor /= obj_scaling_coefficient; - op_problem.presolve_data.objective_offset *= obj_scaling_coefficient; + op_problem.set_objective_scaling_factor(op_problem.get_objective_scaling_factor() / + obj_scaling_coefficient); + op_problem.set_objective_offset(op_problem.get_objective_offset() * obj_scaling_coefficient); CUOPT_LOG_INFO("MIP objective scaling applied: min_abs_coeff=%g scale=%g", static_cast(min_abs_objective_coefficient), @@ -216,61 +225,64 @@ void scale_objective(problem_t& op_problem) } template -mip_scaling_strategy_t::mip_scaling_strategy_t(problem_t& op_problem_scaled) - : handle_ptr_(op_problem_scaled.handle_ptr), +mip_scaling_strategy_t::mip_scaling_strategy_t( + typename mip_scaling_strategy_t::optimization_problem_type_t& op_problem_scaled) + : handle_ptr_(op_problem_scaled.get_handle_ptr()), stream_view_(handle_ptr_->get_stream()), op_problem_scaled_(op_problem_scaled) { } template -size_t dry_run_cub(const problem_t& op_problem, +size_t dry_run_cub(const cuopt::linear_programming::optimization_problem_t& op_problem, i_t n_rows, rmm::device_uvector& row_inf_norm, rmm::device_uvector& row_min_nonzero, rmm::device_uvector& row_nonzero_count, rmm::cuda_stream_view stream_view) { + const auto& matrix_values = op_problem.get_constraint_matrix_values(); + const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets(); size_t temp_storage_bytes = 0; size_t current_required_bytes = 0; auto coeff_abs_iter = - thrust::make_transform_iterator(op_problem.coefficients.data(), abs_value_transform_t{}); + thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t{}); RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, current_required_bytes, coeff_abs_iter, row_inf_norm.data(), n_rows, - op_problem.offsets.data(), - op_problem.offsets.data() + 1, + matrix_offsets.data(), + matrix_offsets.data() + 1, max_op_t{}, f_t(0), stream_view)); temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); - auto coeff_nonzero_min_iter = thrust::make_transform_iterator( - op_problem.coefficients.data(), nonzero_abs_or_inf_transform_t{}); + auto coeff_nonzero_min_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_abs_or_inf_transform_t{}); RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, current_required_bytes, coeff_nonzero_min_iter, row_min_nonzero.data(), n_rows, - op_problem.offsets.data(), - op_problem.offsets.data() + 1, + matrix_offsets.data(), + matrix_offsets.data() + 1, min_op_t{}, std::numeric_limits::infinity(), stream_view)); temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); - auto coeff_nonzero_count_iter = thrust::make_transform_iterator( - op_problem.coefficients.data(), nonzero_count_transform_t{}); + auto coeff_nonzero_count_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_count_transform_t{}); RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, current_required_bytes, coeff_nonzero_count_iter, row_nonzero_count.data(), n_rows, - op_problem.offsets.data(), - op_problem.offsets.data() + 1, + matrix_offsets.data(), + matrix_offsets.data() + 1, thrust::plus{}, i_t(0), stream_view)); @@ -284,16 +296,17 @@ void mip_scaling_strategy_t::scale_problem() { raft::common::nvtx::range fun_scope("mip_scale_problem"); - const i_t n_rows = op_problem_scaled_.n_constraints; - const i_t n_cols = op_problem_scaled_.n_variables; - const i_t nnz = op_problem_scaled_.nnz; + auto& matrix_values = op_problem_scaled_.get_constraint_matrix_values(); + auto& matrix_offsets = op_problem_scaled_.get_constraint_matrix_offsets(); + auto& constraint_lower_bounds = op_problem_scaled_.get_constraint_lower_bounds(); + auto& constraint_upper_bounds = op_problem_scaled_.get_constraint_upper_bounds(); + const i_t n_rows = op_problem_scaled_.get_n_constraints(); + const i_t n_cols = op_problem_scaled_.get_n_variables(); + const i_t nnz = op_problem_scaled_.get_nnz(); scale_objective(op_problem_scaled_); - if (n_rows == 0 || nnz <= 0) { - op_problem_scaled_.is_scaled_ = true; - return; - } + if (n_rows == 0 || nnz <= 0) { return; } rmm::device_uvector row_inf_norm(static_cast(n_rows), stream_view_); rmm::device_uvector row_min_nonzero(static_cast(n_rows), stream_view_); @@ -305,8 +318,8 @@ void mip_scaling_strategy_t::scale_problem() rmm::device_uvector coefficient_row_index(static_cast(nnz), stream_view_); thrust::upper_bound(handle_ptr_->get_thrust_policy(), - op_problem_scaled_.offsets.begin(), - op_problem_scaled_.offsets.end(), + matrix_offsets.begin(), + matrix_offsets.end(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(nnz), coefficient_row_index.begin()); @@ -423,30 +436,26 @@ void mip_scaling_strategy_t::scale_problem() thrust::transform( handle_ptr_->get_thrust_policy(), - op_problem_scaled_.coefficients.begin(), - op_problem_scaled_.coefficients.end(), + matrix_values.begin(), + matrix_values.end(), thrust::make_permutation_iterator(iteration_scaling.begin(), coefficient_row_index.begin()), - op_problem_scaled_.coefficients.begin(), + matrix_values.begin(), thrust::multiplies{}); thrust::transform(handle_ptr_->get_thrust_policy(), - op_problem_scaled_.constraint_lower_bounds.begin(), - op_problem_scaled_.constraint_lower_bounds.end(), + constraint_lower_bounds.begin(), + constraint_lower_bounds.end(), iteration_scaling.begin(), - op_problem_scaled_.constraint_lower_bounds.begin(), + constraint_lower_bounds.begin(), thrust::multiplies{}); thrust::transform(handle_ptr_->get_thrust_policy(), - op_problem_scaled_.constraint_upper_bounds.begin(), - op_problem_scaled_.constraint_upper_bounds.end(), + constraint_upper_bounds.begin(), + constraint_upper_bounds.end(), iteration_scaling.begin(), - op_problem_scaled_.constraint_upper_bounds.begin(), + constraint_upper_bounds.begin(), thrust::multiplies{}); } - op_problem_scaled_.compute_transpose_of_problem(); - combine_constraint_bounds(op_problem_scaled_, op_problem_scaled_.combined_bounds); - op_problem_scaled_.check_problem_representation(true, true); - op_problem_scaled_.is_scaled_ = true; CUOPT_LOG_INFO("MIP row scaling completed"); } diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cuh b/cpp/src/mip_heuristics/mip_scaling_strategy.cuh index 51fe4187c..0fc83b8cb 100644 --- a/cpp/src/mip_heuristics/mip_scaling_strategy.cuh +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cuh @@ -7,7 +7,7 @@ #pragma once -#include +#include #include @@ -18,14 +18,15 @@ namespace cuopt::linear_programming::detail { template class mip_scaling_strategy_t { public: - explicit mip_scaling_strategy_t(problem_t& op_problem_scaled); + using optimization_problem_type_t = cuopt::linear_programming::optimization_problem_t; + explicit mip_scaling_strategy_t(optimization_problem_type_t& op_problem_scaled); void scale_problem(); private: raft::handle_t const* handle_ptr_{nullptr}; rmm::cuda_stream_view stream_view_; - problem_t& op_problem_scaled_; + optimization_problem_type_t& op_problem_scaled_; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index 6b91dfa67..042b1c46e 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -128,13 +128,12 @@ mip_solution_t run_mip(detail::problem_t& problem, "Size mismatch"); cuopt_assert(problem.original_problem_ptr->get_n_constraints() == scaled_problem.n_constraints, "Size mismatch"); - detail::mip_scaling_strategy_t scaling(scaled_problem); // only call preprocess on scaled problem, so we can compute feasibility on the original problem scaled_problem.preprocess_problem(); detail::trivial_presolve(scaled_problem); - detail::mip_solver_t solver(scaled_problem, settings, scaling, timer); + detail::mip_solver_t solver(scaled_problem, settings, timer); if (timer.check_time_limit()) { CUOPT_LOG_INFO("Time limit reached before main solve"); detail::solution_t sol(problem); @@ -215,7 +214,8 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, } auto timer = timer_t(time_limit); - + detail::mip_scaling_strategy_t scaling(op_problem); + scaling.scale_problem(); double presolve_time = 0.0; std::unique_ptr> presolver; std::optional> presolve_result; diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index e3608e099..1f263bd3a 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -41,14 +41,10 @@ static void init_handler(const raft::handle_t* handle_ptr) template mip_solver_t::mip_solver_t(const problem_t& op_problem, const mip_solver_settings_t& solver_settings, - mip_scaling_strategy_t& scaling, timer_t timer) : op_problem_(op_problem), solver_settings_(solver_settings), - context(op_problem.handle_ptr, - const_cast*>(&op_problem), - solver_settings, - scaling), + context(op_problem.handle_ptr, const_cast*>(&op_problem), solver_settings), timer_(timer) { init_handler(op_problem.handle_ptr); diff --git a/cpp/src/mip_heuristics/solver.cuh b/cpp/src/mip_heuristics/solver.cuh index ace58022d..9b9024a1d 100644 --- a/cpp/src/mip_heuristics/solver.cuh +++ b/cpp/src/mip_heuristics/solver.cuh @@ -20,7 +20,6 @@ class mip_solver_t { public: explicit mip_solver_t(const problem_t& op_problem, const mip_solver_settings_t& solver_settings, - mip_scaling_strategy_t& scaling, timer_t timer); solution_t run_solver(); diff --git a/cpp/src/mip_heuristics/solver_context.cuh b/cpp/src/mip_heuristics/solver_context.cuh index d05367a35..d68a19bb6 100644 --- a/cpp/src/mip_heuristics/solver_context.cuh +++ b/cpp/src/mip_heuristics/solver_context.cuh @@ -7,7 +7,6 @@ #include -#include #include #include #include @@ -34,9 +33,8 @@ template struct mip_solver_context_t { explicit mip_solver_context_t(raft::handle_t const* handle_ptr_, problem_t* problem_ptr_, - mip_solver_settings_t settings_, - mip_scaling_strategy_t& scaling) - : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_), scaling(scaling) + mip_solver_settings_t settings_) + : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_) { cuopt_assert(problem_ptr != nullptr, "problem_ptr is nullptr"); stats.set_solution_bound(problem_ptr->maximize ? std::numeric_limits::infinity() @@ -53,7 +51,6 @@ struct mip_solver_context_t { diversity_manager_t* diversity_manager_ptr{nullptr}; std::atomic preempt_heuristic_solver_ = false; const mip_solver_settings_t settings; - mip_scaling_strategy_t& scaling; solver_stats_t stats; // Work limit context for tracking work units in deterministic mode (shared across all timers in // GPU heuristic loop) diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu index 8f415e878..003220de9 100644 --- a/cpp/tests/mip/multi_probe_test.cu +++ b/cpp/tests/mip/multi_probe_test.cu @@ -150,8 +150,7 @@ void test_multi_probe(std::string path) problem_checking_t::check_problem_representation(op_problem); detail::problem_t problem(op_problem); mip_solver_settings_t default_settings{}; - detail::mip_scaling_strategy_t scaling(problem); - detail::mip_solver_t solver(problem, default_settings, scaling, cuopt::timer_t(0)); + detail::mip_solver_t solver(problem, default_settings, cuopt::timer_t(0)); detail::bound_presolve_t bnd_prb_0(solver.context); detail::bound_presolve_t bnd_prb_1(solver.context); detail::multi_probe_t multi_probe_prs(solver.context); From 2f58450a216f2a91368dd53d2752e2461d36907d Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 4 Mar 2026 06:02:21 -0800 Subject: [PATCH 41/44] fix objective issues and pdlp solver mode --- cpp/src/mip_heuristics/diversity/diversity_manager.cu | 2 +- cpp/src/mip_heuristics/mip_scaling_strategy.cu | 1 - cpp/src/mip_heuristics/presolve/third_party_presolve.cpp | 3 +++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 994800049..19f25bb10 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -409,7 +409,7 @@ solution_t diversity_manager_t::run_solver() pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; pdlp_settings.num_gpus = context.settings.num_gpus; pdlp_settings.presolver = presolver_t::None; - + set_pdlp_solver_mode(pdlp_settings); timer_t lp_timer(lp_time_limit); auto lp_result = solve_lp_with_method(*problem_ptr, pdlp_settings, lp_timer); diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu index ff89cfc88..682a928b3 100644 --- a/cpp/src/mip_heuristics/mip_scaling_strategy.cu +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -217,7 +217,6 @@ void scale_objective(cuopt::linear_programming::optimization_problem_t }); op_problem.set_objective_scaling_factor(op_problem.get_objective_scaling_factor() / obj_scaling_coefficient); - op_problem.set_objective_offset(op_problem.get_objective_offset() * obj_scaling_coefficient); CUOPT_LOG_INFO("MIP objective scaling applied: min_abs_coeff=%g scale=%g", static_cast(min_abs_objective_coefficient), diff --git a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp index 5a89393a6..92b2f9f21 100644 --- a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp +++ b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp @@ -662,6 +662,9 @@ std::optional> third_party_presolve_t( papilo_problem, op_problem.get_handle_ptr(), category, maximize_); + opt_problem.set_problem_name(op_problem.get_problem_name()); + opt_problem.set_objective_offset(op_problem.get_objective_offset()); + opt_problem.set_objective_scaling_factor(op_problem.get_objective_scaling_factor()); auto col_flags = papilo_problem.getColFlags(); std::vector implied_integer_indices; for (size_t i = 0; i < col_flags.size(); i++) { From 071ec5df582926965ba65dc1004f69b5a721ef29 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 4 Mar 2026 06:03:54 -0800 Subject: [PATCH 42/44] with stable 3 --- cpp/src/mip_heuristics/diversity/diversity_manager.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 19f25bb10..8a397b3f1 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -406,7 +406,7 @@ solution_t diversity_manager_t::run_solver() pdlp_settings.concurrent_halt = &global_concurrent_halt; pdlp_settings.method = method_t::Concurrent; pdlp_settings.inside_mip = true; - pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; + pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; pdlp_settings.num_gpus = context.settings.num_gpus; pdlp_settings.presolver = presolver_t::None; set_pdlp_solver_mode(pdlp_settings); From a1030a79cd584f5c77ae609d039b48db920489ff Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 4 Mar 2026 08:48:43 -0800 Subject: [PATCH 43/44] fix issues solver mode 2 --- cpp/src/dual_simplex/user_problem.hpp | 2 +- cpp/src/mip_heuristics/diversity/diversity_manager.cu | 2 +- cpp/src/mip_heuristics/presolve/third_party_presolve.cpp | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/dual_simplex/user_problem.hpp b/cpp/src/dual_simplex/user_problem.hpp index f50a6d33a..73c4c391b 100644 --- a/cpp/src/dual_simplex/user_problem.hpp +++ b/cpp/src/dual_simplex/user_problem.hpp @@ -46,7 +46,7 @@ struct user_problem_t { std::vector row_names; std::vector col_names; f_t obj_constant; - f_t obj_scale; // 1.0 for min, -1.0 for max + f_t obj_scale; // positive for min, netagive for max bool objective_is_integral{false}; std::vector var_types; std::vector Q_offsets; diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 8a397b3f1..19f25bb10 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -406,7 +406,7 @@ solution_t diversity_manager_t::run_solver() pdlp_settings.concurrent_halt = &global_concurrent_halt; pdlp_settings.method = method_t::Concurrent; pdlp_settings.inside_mip = true; - pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; + pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; pdlp_settings.num_gpus = context.settings.num_gpus; pdlp_settings.presolver = presolver_t::None; set_pdlp_solver_mode(pdlp_settings); diff --git a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp index 92b2f9f21..8aebacd72 100644 --- a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp +++ b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp @@ -662,9 +662,10 @@ std::optional> third_party_presolve_t( papilo_problem, op_problem.get_handle_ptr(), category, maximize_); + // metadata from original optimization problem that is not filled opt_problem.set_problem_name(op_problem.get_problem_name()); - opt_problem.set_objective_offset(op_problem.get_objective_offset()); opt_problem.set_objective_scaling_factor(op_problem.get_objective_scaling_factor()); + // when an objective offset outside (e.g. from mps file), handle accordingly auto col_flags = papilo_problem.getColFlags(); std::vector implied_integer_indices; for (size_t i = 0; i < col_flags.size(); i++) { From abc5583ad87195eb8bad3d0d0781a6bf20dd1016 Mon Sep 17 00:00:00 2001 From: akifcorduk Date: Wed, 4 Mar 2026 08:49:05 -0800 Subject: [PATCH 44/44] fix issues solver mode 3 --- cpp/src/mip_heuristics/diversity/diversity_manager.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 19f25bb10..8a397b3f1 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -406,7 +406,7 @@ solution_t diversity_manager_t::run_solver() pdlp_settings.concurrent_halt = &global_concurrent_halt; pdlp_settings.method = method_t::Concurrent; pdlp_settings.inside_mip = true; - pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; + pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable3; pdlp_settings.num_gpus = context.settings.num_gpus; pdlp_settings.presolver = presolver_t::None; set_pdlp_solver_mode(pdlp_settings);