diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index 9b79dff8af..e2a244844b 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -209,6 +209,7 @@ int run_single_file(std::string file_path, settings.tolerances.absolute_tolerance = 1e-6; settings.presolver = cuopt::linear_programming::presolver_t::Default; settings.reliability_branching = reliability_branching; + settings.mip_scaling = true; settings.seed = 42; cuopt::linear_programming::benchmark_info_t benchmark_info; settings.benchmark_info_ptr = &benchmark_info; diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 6d32cd5ed9..60534a36fa 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -107,7 +107,7 @@ class mip_solver_settings_t { /** Initial primal solutions */ std::vector>> initial_solutions; - bool mip_scaling = false; + bool mip_scaling = true; presolver_t presolver{presolver_t::Default}; /** * @brief Determinism mode for MIP solver. diff --git a/cpp/src/dual_simplex/user_problem.hpp b/cpp/src/dual_simplex/user_problem.hpp index f50a6d33a5..73c4c391be 100644 --- a/cpp/src/dual_simplex/user_problem.hpp +++ b/cpp/src/dual_simplex/user_problem.hpp @@ -46,7 +46,7 @@ struct user_problem_t { std::vector row_names; std::vector col_names; f_t obj_constant; - f_t obj_scale; // 1.0 for min, -1.0 for max + f_t obj_scale; // positive for min, netagive for max bool objective_is_integral{false}; std::vector var_types; std::vector Q_offsets; diff --git a/cpp/src/mip_heuristics/CMakeLists.txt b/cpp/src/mip_heuristics/CMakeLists.txt index 538e3c49ac..4107366546 100644 --- a/cpp/src/mip_heuristics/CMakeLists.txt +++ b/cpp/src/mip_heuristics/CMakeLists.txt @@ -18,6 +18,7 @@ set(MIP_LP_NECESSARY_FILES # Files that are MIP-specific and not needed for pure LP set(MIP_NON_LP_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/mip_scaling_strategy.cu ${CMAKE_CURRENT_SOURCE_DIR}/solve.cu ${CMAKE_CURRENT_SOURCE_DIR}/solver.cu ${CMAKE_CURRENT_SOURCE_DIR}/diversity/assignment_hash_map.cu diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 4fa0d3f4ab..3a40039823 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -177,6 +177,7 @@ bool diversity_manager_t::run_presolve(f_t time_limit) raft::common::nvtx::range fun_scope("run_presolve"); CUOPT_LOG_INFO("Running presolve!"); timer_t presolve_timer(time_limit); + auto term_crit = ls.constraint_prop.bounds_update.solve(*problem_ptr); if (ls.constraint_prop.bounds_update.infeas_constraints_count > 0) { stats.presolve_time = timer.elapsed_time(); @@ -390,14 +391,14 @@ solution_t diversity_manager_t::run_solver() } else if (!fj_only_run) { convert_greater_to_less(*problem_ptr); - f_t tolerance_divisor = - problem_ptr->tolerances.absolute_tolerance / problem_ptr->tolerances.relative_tolerance; - if (tolerance_divisor == 0) { tolerance_divisor = 1; } f_t absolute_tolerance = context.settings.tolerances.absolute_tolerance; + f_t tolerance_divisor = 100.; pdlp_solver_settings_t pdlp_settings{}; - pdlp_settings.tolerances.relative_primal_tolerance = absolute_tolerance / tolerance_divisor; + pdlp_settings.tolerances.absolute_dual_tolerance = absolute_tolerance; pdlp_settings.tolerances.relative_dual_tolerance = absolute_tolerance / tolerance_divisor; + pdlp_settings.tolerances.absolute_primal_tolerance = absolute_tolerance; + pdlp_settings.tolerances.relative_primal_tolerance = absolute_tolerance / tolerance_divisor; pdlp_settings.time_limit = lp_time_limit; pdlp_settings.first_primal_feasible = false; pdlp_settings.concurrent_halt = &global_concurrent_halt; @@ -406,7 +407,7 @@ solution_t diversity_manager_t::run_solver() pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; pdlp_settings.num_gpus = context.settings.num_gpus; pdlp_settings.presolver = presolver_t::None; - + set_pdlp_solver_mode(pdlp_settings); timer_t lp_timer(lp_time_limit); auto lp_result = solve_lp_with_method(*problem_ptr, pdlp_settings, lp_timer); diff --git a/cpp/src/mip_heuristics/diversity/lns/rins.cu b/cpp/src/mip_heuristics/diversity/lns/rins.cu index 7fd8533f82..8ecbb2b369 100644 --- a/cpp/src/mip_heuristics/diversity/lns/rins.cu +++ b/cpp/src/mip_heuristics/diversity/lns/rins.cu @@ -223,8 +223,7 @@ void rins_t::run_rins() std::vector> rins_solution_queue; - mip_solver_context_t fj_context( - &rins_handle, &fixed_problem, context.settings, context.scaling); + mip_solver_context_t fj_context(&rins_handle, &fixed_problem, context.settings); fj_t fj(fj_context); solution_t fj_solution(fixed_problem); fj_solution.copy_new_assignment(cuopt::host_copy(fixed_assignment, rins_handle.get_stream())); diff --git a/cpp/src/mip_heuristics/diversity/population.cu b/cpp/src/mip_heuristics/diversity/population.cu index bca87223d9..bb0fdd6d11 100644 --- a/cpp/src/mip_heuristics/diversity/population.cu +++ b/cpp/src/mip_heuristics/diversity/population.cu @@ -265,10 +265,6 @@ void population_t::invoke_get_solution_callback( f_t user_bound = context.stats.get_solution_bound(); solution_t temp_sol(sol); problem_ptr->post_process_assignment(temp_sol.assignment); - if (context.settings.mip_scaling) { - rmm::device_uvector dummy(0, temp_sol.handle_ptr->get_stream()); - context.scaling.unscale_solutions(temp_sol.assignment, dummy); - } if (problem_ptr->has_papilo_presolve_data()) { problem_ptr->papilo_uncrush_assignment(temp_sol.assignment); } @@ -308,10 +304,8 @@ void population_t::run_solution_callbacks(solution_t& sol) invoke_get_solution_callback(sol, get_sol_callback); } } - // save the best objective here, because we might not have been able to return the solution to - // the user because of the unscaling that causes infeasibility. - // This prevents an issue of repaired, or a fully feasible solution being reported in the call - // back in next run. + // Save the best objective here even if callback handling later exits early. + // This prevents older solutions from being reported as "new best" in subsequent callbacks. best_feasible_objective = sol.get_objective(); } @@ -344,7 +338,6 @@ void population_t::run_solution_callbacks(solution_t& sol) incumbent_assignment.size(), sol.handle_ptr->get_stream()); - if (context.settings.mip_scaling) { context.scaling.scale_solutions(incumbent_assignment); } bool is_valid = problem_ptr->pre_process_assignment(incumbent_assignment); if (!is_valid) { return; } cuopt_assert(outside_sol.assignment.size() == incumbent_assignment.size(), diff --git a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh index b2f7f80066..7f0d9fc4aa 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh @@ -71,19 +71,6 @@ class sub_mip_recombiner_t : public recombiner_t { "n_vars_from_guiding %d n_vars_from_other %d", n_vars_from_guiding, n_vars_from_other); this->compute_vars_to_fix(offspring, vars_to_fix, n_vars_from_other, n_vars_from_guiding); auto [fixed_problem, fixed_assignment, variable_map] = offspring.fix_variables(vars_to_fix); - // TODO ask Akif and Alice if this is ok - pdlp_initial_scaling_strategy_t scaling( - fixed_problem.handle_ptr, - fixed_problem, - context.settings.hyper_params.default_l_inf_ruiz_iterations, - (f_t)context.settings.hyper_params.default_alpha_pock_chambolle_rescaling, - fixed_problem.reverse_coefficients, - fixed_problem.reverse_offsets, - fixed_problem.reverse_constraints, - nullptr, - context.settings.hyper_params, - true); - scaling.scale_problem(); fixed_problem.presolve_data.reset_additional_vars(fixed_problem, offspring.handle_ptr); fixed_problem.presolve_data.initialize_var_mapping(fixed_problem, offspring.handle_ptr); trivial_presolve(fixed_problem); @@ -141,8 +128,6 @@ class sub_mip_recombiner_t : public recombiner_t { offspring.handle_ptr->sync_stream(); } if (solution_vector.size() > 0) { - rmm::device_uvector dummy(0, offspring.handle_ptr->get_stream()); - scaling.unscale_solutions(fixed_assignment, dummy); // unfix the assignment on given result no matter if it is feasible offspring.unfix_variables(fixed_assignment, variable_map); offspring @@ -174,8 +159,6 @@ class sub_mip_recombiner_t : public recombiner_t { solution.size(), offspring.handle_ptr->get_stream()); fixed_problem.post_process_assignment(fixed_assignment, false); - rmm::device_uvector dummy(0, offspring.handle_ptr->get_stream()); - scaling.unscale_solutions(fixed_assignment, dummy); sol.unfix_variables(fixed_assignment, variable_map); sol.clamp_within_bounds(); // Scaling might bring some very slight variable bound violations sol.compute_feasibility(); diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu new file mode 100644 index 0000000000..682a928b34 --- /dev/null +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -0,0 +1,471 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace cuopt::linear_programming::detail { + +constexpr int row_scaling_max_iterations = 8; +constexpr int row_scaling_k_min = -20; +constexpr int row_scaling_k_max = 20; +constexpr int row_scaling_big_m_soft_k_min = -4; +constexpr int row_scaling_big_m_soft_k_max = 0; +constexpr double row_scaling_spread_rel_tol = 1.0e-2; +constexpr double nearest_pow2_mantissa_threshold = 0.7071067811865476; +constexpr double min_abs_objective_coefficient_threshold = 1.0e-4; + +constexpr double big_m_abs_threshold = 1.0e4; +constexpr double big_m_ratio_threshold = 1.0e4; + +template +struct abs_value_transform_t { + __device__ f_t operator()(f_t value) const { return raft::abs(value); } +}; + +template +struct nonzero_abs_or_inf_transform_t { + __device__ f_t operator()(f_t value) const + { + const f_t abs_value = raft::abs(value); + return abs_value > f_t(0) ? abs_value : std::numeric_limits::infinity(); + } +}; + +template +struct nonzero_count_transform_t { + __device__ i_t operator()(f_t value) const { return raft::abs(value) > f_t(0) ? i_t(1) : i_t(0); } +}; + +template +struct max_op_t { + __host__ __device__ item_t operator()(const item_t& lhs, const item_t& rhs) const + { + return lhs > rhs ? lhs : rhs; + } +}; + +template +struct min_op_t { + __host__ __device__ item_t operator()(const item_t& lhs, const item_t& rhs) const + { + return lhs < rhs ? lhs : rhs; + } +}; + +template +void compute_row_inf_norm( + const cuopt::linear_programming::optimization_problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::device_uvector& row_inf_norm, + rmm::cuda_stream_view stream_view) +{ + const auto& matrix_values = op_problem.get_constraint_matrix_values(); + const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets(); + auto coeff_abs_iter = + thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t{}); + size_t current_bytes = temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + current_bytes, + coeff_abs_iter, + row_inf_norm.data(), + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, + max_op_t{}, + f_t(0), + stream_view)); +} + +template +void compute_big_m_skip_rows( + const cuopt::linear_programming::optimization_problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::device_uvector& row_inf_norm, + rmm::device_uvector& row_min_nonzero, + rmm::device_uvector& row_nonzero_count, + rmm::device_uvector& row_skip_scaling) +{ + const auto& matrix_values = op_problem.get_constraint_matrix_values(); + const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets(); + const auto stream_view = op_problem.get_handle_ptr()->get_stream(); + auto coeff_abs_iter = + thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t{}); + auto coeff_nonzero_min_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_abs_or_inf_transform_t{}); + auto coeff_nonzero_count_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_count_transform_t{}); + + size_t max_bytes = temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + max_bytes, + coeff_abs_iter, + row_inf_norm.data(), + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, + max_op_t{}, + f_t(0), + stream_view)); + size_t min_bytes = temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + min_bytes, + coeff_nonzero_min_iter, + row_min_nonzero.data(), + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, + min_op_t{}, + std::numeric_limits::infinity(), + stream_view)); + size_t count_bytes = temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + count_bytes, + coeff_nonzero_count_iter, + row_nonzero_count.data(), + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, + thrust::plus{}, + i_t(0), + stream_view)); + + auto row_begin = thrust::make_zip_iterator( + thrust::make_tuple(row_inf_norm.begin(), row_min_nonzero.begin(), row_nonzero_count.begin())); + auto row_end = thrust::make_zip_iterator( + thrust::make_tuple(row_inf_norm.end(), row_min_nonzero.end(), row_nonzero_count.end())); + thrust::transform( + op_problem.get_handle_ptr()->get_thrust_policy(), + row_begin, + row_end, + row_skip_scaling.begin(), + [] __device__(auto row_info) -> i_t { + const f_t row_norm = thrust::get<0>(row_info); + const f_t row_min_non_zero = thrust::get<1>(row_info); + const i_t row_non_zero_size = thrust::get<2>(row_info); + if (row_non_zero_size < i_t(2) || row_min_non_zero >= std::numeric_limits::infinity()) { + return i_t(0); + } + + const f_t row_ratio = row_norm / row_min_non_zero; + return row_norm >= static_cast(big_m_abs_threshold) && + row_ratio >= static_cast(big_m_ratio_threshold) + ? i_t(1) + : i_t(0); + }); +} + +template +void scale_objective(cuopt::linear_programming::optimization_problem_t& op_problem) +{ + auto& objective_coefficients = op_problem.get_objective_coefficients(); + const f_t min_abs_objective_coefficient = + thrust::transform_reduce(op_problem.get_handle_ptr()->get_thrust_policy(), + objective_coefficients.begin(), + objective_coefficients.end(), + nonzero_abs_or_inf_transform_t{}, + std::numeric_limits::infinity(), + min_op_t{}); + + if (!std::isfinite(min_abs_objective_coefficient) || min_abs_objective_coefficient <= f_t(0) || + min_abs_objective_coefficient >= static_cast(min_abs_objective_coefficient_threshold)) { + return; + } + + const f_t obj_scaling_coefficient = + static_cast(min_abs_objective_coefficient_threshold) / min_abs_objective_coefficient; + if (!std::isfinite(obj_scaling_coefficient) || obj_scaling_coefficient <= f_t(1)) { return; } + + thrust::transform(op_problem.get_handle_ptr()->get_thrust_policy(), + objective_coefficients.begin(), + objective_coefficients.end(), + objective_coefficients.begin(), + [obj_scaling_coefficient] __device__(f_t objective_coefficient) -> f_t { + return objective_coefficient * obj_scaling_coefficient; + }); + op_problem.set_objective_scaling_factor(op_problem.get_objective_scaling_factor() / + obj_scaling_coefficient); + + CUOPT_LOG_INFO("MIP objective scaling applied: min_abs_coeff=%g scale=%g", + static_cast(min_abs_objective_coefficient), + static_cast(obj_scaling_coefficient)); +} + +template +mip_scaling_strategy_t::mip_scaling_strategy_t( + typename mip_scaling_strategy_t::optimization_problem_type_t& op_problem_scaled) + : handle_ptr_(op_problem_scaled.get_handle_ptr()), + stream_view_(handle_ptr_->get_stream()), + op_problem_scaled_(op_problem_scaled) +{ +} + +template +size_t dry_run_cub(const cuopt::linear_programming::optimization_problem_t& op_problem, + i_t n_rows, + rmm::device_uvector& row_inf_norm, + rmm::device_uvector& row_min_nonzero, + rmm::device_uvector& row_nonzero_count, + rmm::cuda_stream_view stream_view) +{ + const auto& matrix_values = op_problem.get_constraint_matrix_values(); + const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets(); + size_t temp_storage_bytes = 0; + size_t current_required_bytes = 0; + + auto coeff_abs_iter = + thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_abs_iter, + row_inf_norm.data(), + n_rows, + matrix_offsets.data(), + matrix_offsets.data() + 1, + max_op_t{}, + f_t(0), + stream_view)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + + auto coeff_nonzero_min_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_abs_or_inf_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_nonzero_min_iter, + row_min_nonzero.data(), + n_rows, + matrix_offsets.data(), + matrix_offsets.data() + 1, + min_op_t{}, + std::numeric_limits::infinity(), + stream_view)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + + auto coeff_nonzero_count_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_count_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_nonzero_count_iter, + row_nonzero_count.data(), + n_rows, + matrix_offsets.data(), + matrix_offsets.data() + 1, + thrust::plus{}, + i_t(0), + stream_view)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + + return temp_storage_bytes; +} + +template +void mip_scaling_strategy_t::scale_problem() +{ + raft::common::nvtx::range fun_scope("mip_scale_problem"); + + auto& matrix_values = op_problem_scaled_.get_constraint_matrix_values(); + auto& matrix_offsets = op_problem_scaled_.get_constraint_matrix_offsets(); + auto& constraint_lower_bounds = op_problem_scaled_.get_constraint_lower_bounds(); + auto& constraint_upper_bounds = op_problem_scaled_.get_constraint_upper_bounds(); + const i_t n_rows = op_problem_scaled_.get_n_constraints(); + const i_t n_cols = op_problem_scaled_.get_n_variables(); + const i_t nnz = op_problem_scaled_.get_nnz(); + + scale_objective(op_problem_scaled_); + + if (n_rows == 0 || nnz <= 0) { return; } + + rmm::device_uvector row_inf_norm(static_cast(n_rows), stream_view_); + rmm::device_uvector row_min_nonzero(static_cast(n_rows), stream_view_); + rmm::device_uvector row_nonzero_count(static_cast(n_rows), stream_view_); + rmm::device_uvector row_skip_scaling(static_cast(n_rows), stream_view_); + thrust::fill( + handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(0)); + rmm::device_uvector iteration_scaling(static_cast(n_rows), stream_view_); + rmm::device_uvector coefficient_row_index(static_cast(nnz), stream_view_); + + thrust::upper_bound(handle_ptr_->get_thrust_policy(), + matrix_offsets.begin(), + matrix_offsets.end(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(nnz), + coefficient_row_index.begin()); + thrust::transform( + handle_ptr_->get_thrust_policy(), + coefficient_row_index.begin(), + coefficient_row_index.end(), + coefficient_row_index.begin(), + [] __device__(i_t row_upper_bound_idx) -> i_t { return row_upper_bound_idx - 1; }); + + size_t temp_storage_bytes = dry_run_cub( + op_problem_scaled_, n_rows, row_inf_norm, row_min_nonzero, row_nonzero_count, stream_view_); + + rmm::device_uvector temp_storage(temp_storage_bytes, stream_view_); + compute_big_m_skip_rows(op_problem_scaled_, + temp_storage, + temp_storage_bytes, + row_inf_norm, + row_min_nonzero, + row_nonzero_count, + row_skip_scaling); + + i_t big_m_rows = thrust::count( + handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(1)); + + CUOPT_LOG_INFO("MIP row scaling start: rows=%d cols=%d max_iterations=%d soft_big_m_rows=%d", + n_rows, + n_cols, + row_scaling_max_iterations, + big_m_rows); + + double previous_row_log2_spread = std::numeric_limits::infinity(); + for (int iteration = 0; iteration < row_scaling_max_iterations; ++iteration) { + compute_row_inf_norm( + op_problem_scaled_, temp_storage, temp_storage_bytes, row_inf_norm, stream_view_); + + using row_stats_t = thrust::tuple; + auto row_log2_stats = thrust::transform_reduce( + handle_ptr_->get_thrust_policy(), + row_inf_norm.begin(), + row_inf_norm.end(), + [] __device__(f_t row_norm) -> row_stats_t { + if (row_norm == f_t(0)) { + return {0.0, + 0.0, + std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}; + } + const double row_log2 = log2(static_cast(row_norm)); + return {row_log2, 1.0, row_log2, row_log2}; + }, + row_stats_t{0.0, + 0.0, + std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}, + [] __device__(row_stats_t a, row_stats_t b) -> row_stats_t { + return {thrust::get<0>(a) + thrust::get<0>(b), + thrust::get<1>(a) + thrust::get<1>(b), + min_op_t{}(thrust::get<2>(a), thrust::get<2>(b)), + max_op_t{}(thrust::get<3>(a), thrust::get<3>(b))}; + }); + const double log2_sum = thrust::get<0>(row_log2_stats); + const i_t active_row_count = static_cast(thrust::get<1>(row_log2_stats)); + if (active_row_count == 0) { break; } + const double row_log2_spread = thrust::get<3>(row_log2_stats) - thrust::get<2>(row_log2_stats); + if (std::isfinite(previous_row_log2_spread)) { + const double spread_improvement = previous_row_log2_spread - row_log2_spread; + if (spread_improvement <= + row_scaling_spread_rel_tol * std::max(1.0, previous_row_log2_spread)) { + break; + } + } + previous_row_log2_spread = row_log2_spread; + + f_t target_norm = static_cast(exp2(log2_sum / static_cast(active_row_count))); + cuopt_assert(isfinite(target_norm), "target_norm must be finite"); + cuopt_assert(target_norm > f_t(0), "target_norm must be positive"); + + thrust::transform( + handle_ptr_->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.begin(), row_skip_scaling.begin())), + thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.end(), row_skip_scaling.end())), + iteration_scaling.begin(), + [target_norm] __device__(auto row_info) -> f_t { + const f_t row_norm = thrust::get<0>(row_info); + const i_t is_big_m = thrust::get<1>(row_info); + if (row_norm == f_t(0)) { return f_t(1); } + + const f_t desired_scaling = target_norm / row_norm; + if (!isfinite(desired_scaling) || desired_scaling <= f_t(0)) { return f_t(1); } + + int exponent = 0; + const f_t mantissa = + frexp(desired_scaling, &exponent); // desired_scaling = mantissa * 2^exponent + int k = + mantissa >= static_cast(nearest_pow2_mantissa_threshold) ? exponent : exponent - 1; + // Clamp to avoid overscaling, and softly limit scaling on big-M rows. + if (is_big_m) { + if (k < row_scaling_big_m_soft_k_min) { k = row_scaling_big_m_soft_k_min; } + if (k > row_scaling_big_m_soft_k_max) { k = row_scaling_big_m_soft_k_max; } + } else { + if (k < row_scaling_k_min) { k = row_scaling_k_min; } + if (k > row_scaling_k_max) { k = row_scaling_k_max; } + } + return ldexp(f_t(1), k); + }); + + i_t scaled_rows = + thrust::count_if(handle_ptr_->get_thrust_policy(), + iteration_scaling.begin(), + iteration_scaling.end(), + [] __device__(f_t row_scale) -> bool { return row_scale != f_t(1); }); + if (scaled_rows == 0) { break; } + + thrust::transform( + handle_ptr_->get_thrust_policy(), + matrix_values.begin(), + matrix_values.end(), + thrust::make_permutation_iterator(iteration_scaling.begin(), coefficient_row_index.begin()), + matrix_values.begin(), + thrust::multiplies{}); + + thrust::transform(handle_ptr_->get_thrust_policy(), + constraint_lower_bounds.begin(), + constraint_lower_bounds.end(), + iteration_scaling.begin(), + constraint_lower_bounds.begin(), + thrust::multiplies{}); + thrust::transform(handle_ptr_->get_thrust_policy(), + constraint_upper_bounds.begin(), + constraint_upper_bounds.end(), + iteration_scaling.begin(), + constraint_upper_bounds.begin(), + thrust::multiplies{}); + } + + CUOPT_LOG_INFO("MIP row scaling completed"); +} + +#define INSTANTIATE(F_TYPE) template class mip_scaling_strategy_t; + +#if MIP_INSTANTIATE_FLOAT +INSTANTIATE(float) +#endif + +#if MIP_INSTANTIATE_DOUBLE +INSTANTIATE(double) +#endif + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cuh b/cpp/src/mip_heuristics/mip_scaling_strategy.cuh new file mode 100644 index 0000000000..0fc83b8cb9 --- /dev/null +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cuh @@ -0,0 +1,32 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include + +#include + +#include + +namespace cuopt::linear_programming::detail { + +template +class mip_scaling_strategy_t { + public: + using optimization_problem_type_t = cuopt::linear_programming::optimization_problem_t; + explicit mip_scaling_strategy_t(optimization_problem_type_t& op_problem_scaled); + + void scale_problem(); + + private: + raft::handle_t const* handle_ptr_{nullptr}; + rmm::cuda_stream_view stream_view_; + optimization_problem_type_t& op_problem_scaled_; +}; + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp index 5a89393a6a..8aebacd724 100644 --- a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp +++ b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp @@ -662,6 +662,10 @@ std::optional> third_party_presolve_t( papilo_problem, op_problem.get_handle_ptr(), category, maximize_); + // metadata from original optimization problem that is not filled + opt_problem.set_problem_name(op_problem.get_problem_name()); + opt_problem.set_objective_scaling_factor(op_problem.get_objective_scaling_factor()); + // when an objective offset outside (e.g. from mps file), handle accordingly auto col_flags = papilo_problem.getColFlags(); std::vector implied_integer_indices; for (size_t i = 0; i < col_flags.size(); i++) { diff --git a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu index e2bbc8feb1..84415f5372 100644 --- a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu +++ b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu @@ -8,7 +8,6 @@ #include "relaxed_lp.cuh" #include -#include #include #include #include diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index f5a2172f2e..384230c8bd 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -9,13 +9,13 @@ #include #include +#include #include #include #include #include #include -#include #include #include #include @@ -64,12 +64,7 @@ mip_solution_t run_mip(detail::problem_t& problem, timer_t& timer) { raft::common::nvtx::range fun_scope("run_mip"); - auto constexpr const running_mip = true; - // TODO ask Akif and Alice how was this passed down? - auto hyper_params = settings.hyper_params; - hyper_params.update_primal_weight_on_initial_solution = false; - hyper_params.update_step_size_on_initial_solution = true; if (settings.get_mip_callbacks().size() > 0) { auto callback_num_variables = problem.original_problem_ptr->get_n_variables(); if (problem.has_papilo_presolve_data()) { @@ -133,33 +128,12 @@ mip_solution_t run_mip(detail::problem_t& problem, "Size mismatch"); cuopt_assert(problem.original_problem_ptr->get_n_constraints() == scaled_problem.n_constraints, "Size mismatch"); - detail::pdlp_initial_scaling_strategy_t scaling( - scaled_problem.handle_ptr, - scaled_problem, - hyper_params.default_l_inf_ruiz_iterations, - (f_t)hyper_params.default_alpha_pock_chambolle_rescaling, - scaled_problem.reverse_coefficients, - scaled_problem.reverse_offsets, - scaled_problem.reverse_constraints, - nullptr, - hyper_params, - running_mip); - - cuopt_func_call(auto saved_problem = scaled_problem); - if (settings.mip_scaling) { - scaling.scale_problem(); - if (settings.initial_solutions.size() > 0) { - for (const auto& initial_solution : settings.initial_solutions) { - scaling.scale_primal(*initial_solution); - } - } - } + // only call preprocess on scaled problem, so we can compute feasibility on the original problem scaled_problem.preprocess_problem(); - // cuopt_func_call((check_scaled_problem(scaled_problem, saved_problem))); detail::trivial_presolve(scaled_problem); - detail::mip_solver_t solver(scaled_problem, settings, scaling, timer); + detail::mip_solver_t solver(scaled_problem, settings, timer); if (timer.check_time_limit()) { CUOPT_LOG_INFO("Time limit reached before main solve"); detail::solution_t sol(problem); @@ -170,17 +144,16 @@ mip_solution_t run_mip(detail::problem_t& problem, auto scaled_sol = solver.run_solver(); bool is_feasible_before_scaling = scaled_sol.get_feasible(); scaled_sol.problem_ptr = &problem; - if (settings.mip_scaling) { scaling.unscale_solutions(scaled_sol); } // at this point we need to compute the feasibility on the original problem not the presolved one bool is_feasible_after_unscaling = scaled_sol.compute_feasibility(); if (!scaled_problem.empty && is_feasible_before_scaling != is_feasible_after_unscaling) { CUOPT_LOG_WARN( - "The feasibility does not match on scaled and unscaled problems. To overcome this issue, " + "The feasibility does not match on scaled and original problems. To overcome this issue, " "please provide a more numerically stable problem."); } auto sol = scaled_sol.get_solution( - is_feasible_before_scaling || is_feasible_after_unscaling, solver.get_solver_stats(), false); + is_feasible_after_unscaling || is_feasible_before_scaling, solver.get_solver_stats(), false); int hidesol = std::getenv("CUOPT_MIP_HIDE_SOLUTION") ? atoi(std::getenv("CUOPT_MIP_HIDE_SOLUTION")) : 0; @@ -241,7 +214,8 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, } auto timer = timer_t(time_limit); - + detail::mip_scaling_strategy_t scaling(op_problem); + // scaling.scale_problem(); double presolve_time = 0.0; std::unique_ptr> presolver; std::optional> presolve_result; diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index 235d4500d2..1f263bd3a9 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -41,14 +41,10 @@ static void init_handler(const raft::handle_t* handle_ptr) template mip_solver_t::mip_solver_t(const problem_t& op_problem, const mip_solver_settings_t& solver_settings, - pdlp_initial_scaling_strategy_t& scaling, timer_t timer) : op_problem_(op_problem), solver_settings_(solver_settings), - context(op_problem.handle_ptr, - const_cast*>(&op_problem), - solver_settings, - scaling), + context(op_problem.handle_ptr, const_cast*>(&op_problem), solver_settings), timer_(timer) { init_handler(op_problem.handle_ptr); diff --git a/cpp/src/mip_heuristics/solver.cuh b/cpp/src/mip_heuristics/solver.cuh index 1b5fe17244..9b9024a1dc 100644 --- a/cpp/src/mip_heuristics/solver.cuh +++ b/cpp/src/mip_heuristics/solver.cuh @@ -20,7 +20,6 @@ class mip_solver_t { public: explicit mip_solver_t(const problem_t& op_problem, const mip_solver_settings_t& solver_settings, - pdlp_initial_scaling_strategy_t& scaling, timer_t timer); solution_t run_solver(); diff --git a/cpp/src/mip_heuristics/solver_context.cuh b/cpp/src/mip_heuristics/solver_context.cuh index baac1dd9d6..d68a19bb61 100644 --- a/cpp/src/mip_heuristics/solver_context.cuh +++ b/cpp/src/mip_heuristics/solver_context.cuh @@ -9,7 +9,6 @@ #include #include -#include #include #include @@ -34,9 +33,8 @@ template struct mip_solver_context_t { explicit mip_solver_context_t(raft::handle_t const* handle_ptr_, problem_t* problem_ptr_, - mip_solver_settings_t settings_, - pdlp_initial_scaling_strategy_t& scaling) - : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_), scaling(scaling) + mip_solver_settings_t settings_) + : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_) { cuopt_assert(problem_ptr != nullptr, "problem_ptr is nullptr"); stats.set_solution_bound(problem_ptr->maximize ? std::numeric_limits::infinity() @@ -53,7 +51,6 @@ struct mip_solver_context_t { diversity_manager_t* diversity_manager_ptr{nullptr}; std::atomic preempt_heuristic_solver_ = false; const mip_solver_settings_t settings; - pdlp_initial_scaling_strategy_t& scaling; solver_stats_t stats; // Work limit context for tracking work units in deterministic mode (shared across all timers in // GPU heuristic loop) diff --git a/cpp/src/mip_heuristics/utils.cuh b/cpp/src/mip_heuristics/utils.cuh index 33712635e9..ffadc1f510 100644 --- a/cpp/src/mip_heuristics/utils.cuh +++ b/cpp/src/mip_heuristics/utils.cuh @@ -339,8 +339,9 @@ static void inline run_device_lambda(const rmm::cuda_stream_view& stream, Func f template f_t compute_rel_mip_gap(f_t user_obj, f_t solution_bound) { - if (user_obj == 0.0) { - return solution_bound == 0.0 ? 0.0 : std::numeric_limits::infinity(); + if (integer_equal(user_obj, 0.0, 1e-6)) { + return integer_equal(solution_bound, 0.0, 1e-6) ? 0.0 + : std::numeric_limits::infinity(); } return std::abs(user_obj - solution_bound) / std::abs(user_obj); } diff --git a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu index afa3ee5fb7..3231650905 100644 --- a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu +++ b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu @@ -15,7 +15,6 @@ #include #include -#include #include #include #include @@ -195,8 +194,6 @@ void pdlp_initial_scaling_strategy_t::ruiz_inf_scaling(i_t number_of_r op_problem_scaled_.view(), this->view()); RAFT_CUDA_TRY(cudaPeekAtLastError()); - if (running_mip_) { reset_integer_variables(); } - raft::linalg::binaryOp(cummulative_constraint_matrix_scaling_.data(), cummulative_constraint_matrix_scaling_.data(), iteration_constraint_matrix_scaling_.data(), @@ -219,17 +216,6 @@ void pdlp_initial_scaling_strategy_t::ruiz_inf_scaling(i_t number_of_r } } -template -void pdlp_initial_scaling_strategy_t::reset_integer_variables() -{ - thrust::scatter( - handle_ptr_->get_thrust_policy(), - thrust::make_constant_iterator(1), - thrust::make_constant_iterator(1) + op_problem_scaled_.integer_indices.size(), - op_problem_scaled_.integer_indices.begin(), - iteration_variable_scaling_.begin()); -} - template __global__ void pock_chambolle_scaling_kernel_row( const typename problem_t::view_t op_problem, @@ -345,8 +331,6 @@ void pdlp_initial_scaling_strategy_t::pock_chambolle_scaling(f_t alpha A_T_indices_.data()); RAFT_CUDA_TRY(cudaPeekAtLastError()); - if (running_mip_) { reset_integer_variables(); } - // divide the sqrt of the vectors of the sums from above to the respective scaling vectors // (only if sqrt(sum)>0) raft::linalg::binaryOp(cummulative_constraint_matrix_scaling_.data(), diff --git a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cuh b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cuh index 5a3dcfaca2..b8ca2fed2b 100644 --- a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cuh +++ b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cuh @@ -88,7 +88,6 @@ class pdlp_initial_scaling_strategy_t { void compute_scaling_vectors(i_t number_of_ruiz_iterations, f_t alpha); void ruiz_inf_scaling(i_t number_of_ruiz_iterations); void pock_chambolle_scaling(f_t alpha); - void reset_integer_variables(); raft::handle_t const* handle_ptr_{nullptr}; rmm::cuda_stream_view stream_view_; diff --git a/cpp/tests/mip/feasibility_jump_tests.cu b/cpp/tests/mip/feasibility_jump_tests.cu index baa3e9b803..4e8a518522 100644 --- a/cpp/tests/mip/feasibility_jump_tests.cu +++ b/cpp/tests/mip/feasibility_jump_tests.cu @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -77,16 +78,7 @@ static fj_state_t run_fj(std::string test_instance, // run the problem constructor of MIP, so that we do bounds standardization detail::problem_t problem(op_problem); problem.preprocess_problem(); - detail::pdhg_solver_t pdhg_solver(problem.handle_ptr, problem); - detail::pdlp_initial_scaling_strategy_t scaling(&handle_, - problem, - 10, - 1.0, - pdhg_solver, - problem.reverse_coefficients, - problem.reverse_offsets, - problem.reverse_constraints, - true); + detail::mip_scaling_strategy_t scaling(problem); auto settings = mip_solver_settings_t{}; settings.time_limit = 30.; diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu index 5e2f08007d..1f825a26f7 100644 --- a/cpp/tests/mip/load_balancing_test.cu +++ b/cpp/tests/mip/load_balancing_test.cu @@ -9,11 +9,11 @@ #include "mip_utils.cuh" #include +#include #include #include #include #include -#include #include #include #include @@ -128,16 +128,7 @@ void test_multi_probe(std::string path) problem_checking_t::check_problem_representation(op_problem); detail::problem_t problem(op_problem); mip_solver_settings_t default_settings{}; - detail::pdhg_solver_t pdhg_solver(problem.handle_ptr, problem); - detail::pdlp_initial_scaling_strategy_t scaling(&handle_, - problem, - 10, - 1.0, - problem.reverse_coefficients, - problem.reverse_offsets, - problem.reverse_constraints, - nullptr, - true); + detail::mip_scaling_strategy_t scaling(problem); detail::mip_solver_t solver(problem, default_settings, scaling, cuopt::timer_t(0)); detail::load_balanced_problem_t lb_problem(problem); detail::load_balanced_bounds_presolve_t lb_prs(lb_problem, solver.context); diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu index 073c153486..003220de9b 100644 --- a/cpp/tests/mip/multi_probe_test.cu +++ b/cpp/tests/mip/multi_probe_test.cu @@ -9,10 +9,10 @@ #include "mip_utils.cuh" #include +#include #include #include #include -#include #include #include #include @@ -150,18 +150,7 @@ void test_multi_probe(std::string path) problem_checking_t::check_problem_representation(op_problem); detail::problem_t problem(op_problem); mip_solver_settings_t default_settings{}; - pdlp_hyper_params::pdlp_hyper_params_t hyper_params{}; - detail::pdlp_initial_scaling_strategy_t scaling(&handle_, - problem, - 10, - 1.0, - problem.reverse_coefficients, - problem.reverse_offsets, - problem.reverse_constraints, - nullptr, - hyper_params, - true); - detail::mip_solver_t solver(problem, default_settings, scaling, cuopt::timer_t(0)); + detail::mip_solver_t solver(problem, default_settings, cuopt::timer_t(0)); detail::bound_presolve_t bnd_prb_0(solver.context); detail::bound_presolve_t bnd_prb_1(solver.context); detail::multi_probe_t multi_probe_prs(solver.context); diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py index 59ea62089d..8412c745b5 100644 --- a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py +++ b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py @@ -450,11 +450,6 @@ class SolverConfig(BaseModel): description="Set True to run heuristics only, False to run " "heuristics and branch and bound for MILP", ) - mip_batch_pdlp_strong_branching: Optional[int] = Field( - default=0, - description="Set 1 to enable batch PDLP strong branching " - "in the MIP solver, 0 to disable.", - ) num_cpu_threads: Optional[int] = Field( default=None, description="Set the number of CPU threads to use for branch and bound.", # noqa