diff --git a/src/pcms/field/evaluator/uniform_grid.h b/src/pcms/field/evaluator/uniform_grid.h index 47cfa15f..38299162 100644 --- a/src/pcms/field/evaluator/uniform_grid.h +++ b/src/pcms/field/evaluator/uniform_grid.h @@ -106,13 +106,16 @@ class UniformGridPointEvaluator : public PointEvaluator } // Order-1: multilinear interpolation + // Create local copy to avoid capturing reference in device lambdas in some + // compilers + auto grid_copy = grid_; Kokkos::View cell_dim_indices("cell_dim_indices", num_points, Dim); Kokkos::parallel_for( "compute_cell_dim_indices", Kokkos::RangePolicy(0, num_points), KOKKOS_CLASS_LAMBDA(const LO i) { - auto dim_idx = grid_.GetDimensionedIndex(cell_indices(i)); + auto dim_idx = grid_copy.GetDimensionedIndex(cell_indices(i)); for (unsigned d = 0; d < Dim; ++d) { cell_dim_indices(i, d) = dim_idx[d]; } @@ -132,7 +135,7 @@ class UniformGridPointEvaluator : public PointEvaluator "compute_parametric_coords", Kokkos::RangePolicy(0, num_points), KOKKOS_CLASS_LAMBDA(const LO i) { - auto cell_bbox = grid_.GetCellBBOX(cell_indices(i)); + auto cell_bbox = grid_copy.GetCellBBOX(cell_indices(i)); for (unsigned d = 0; d < Dim; ++d) { Real coord = coordinates(i, d); Real cell_min = cell_bbox.center[d] - cell_bbox.half_width[d]; @@ -256,6 +259,9 @@ class UniformGridEvaluatorFactory : public FieldEvaluatorFactory "is_out_of_bounds", num_points); // Parallel reduction to compute cell indices and count out-of-bounds points + // Create local copy to avoid capturing reference in device lambdas in some + // compilers + auto grid_copy = grid_; size_t num_out_of_bounds = 0; Kokkos::parallel_reduce( "localize_points_on_device", @@ -266,13 +272,13 @@ class UniformGridEvaluatorFactory : public FieldEvaluatorFactory point[d] = coordinates_device(i, d); } - bool out_of_bounds = !grid_.IsPointInBounds(point); + bool out_of_bounds = !grid_copy.IsPointInBounds(point); is_out_of_bounds_device(i) = out_of_bounds; if (out_of_bounds) { local_count += 1; } - cell_indices_device(i) = grid_.ClosestCellID(point); + cell_indices_device(i) = grid_copy.ClosestCellID(point); }, num_out_of_bounds); diff --git a/src/pcms/field/evaluator/uniform_grid_spline.h b/src/pcms/field/evaluator/uniform_grid_spline.h index a25a22a8..576f000a 100644 --- a/src/pcms/field/evaluator/uniform_grid_spline.h +++ b/src/pcms/field/evaluator/uniform_grid_spline.h @@ -16,6 +16,23 @@ namespace pcms { +namespace detail +{ + +struct InitCoordsFunctor +{ + Kokkos::View coords; + Real bot_left; + Real delta; + + KOKKOS_FUNCTION void operator()(LO i) const + { + coords(i) = bot_left + delta * i; + } +}; + +} // namespace detail + template > class UniformGridSplinePointEvaluator2D @@ -34,12 +51,20 @@ class UniformGridSplinePointEvaluator2D { Real dx = grid_.edge_length[0] / grid_.divisions[0]; Real dy = grid_.edge_length[1] / grid_.divisions[1]; - for (LO ix = 0; ix <= grid_.divisions[0]; ++ix) { - x_coords_(ix) = grid_.bot_left[0] + dx * ix; - } - for (LO iy = 0; iy <= grid_.divisions[1]; ++iy) { - y_coords_(iy) = grid_.bot_left[1] + dy * iy; - } + Real bot_left_x = grid_.bot_left[0]; + Real bot_left_y = grid_.bot_left[1]; + LO nx = grid_.divisions[0]; + LO ny = grid_.divisions[1]; + + Kokkos::parallel_for( + "init_x_coords", + Kokkos::RangePolicy(0, nx + 1), + detail::InitCoordsFunctor{x_coords_, bot_left_x, dx}); + + Kokkos::parallel_for( + "init_y_coords", + Kokkos::RangePolicy(0, ny + 1), + detail::InitCoordsFunctor{y_coords_, bot_left_y, dy}); } void Evaluate( @@ -149,8 +174,8 @@ class UniformGridSplinePointEvaluator2D const UniformGrid<2>& grid_; UniformGridFieldLocalizationHint<2> hint_; Real fill_value_; - Kokkos::View x_coords_; - Kokkos::View y_coords_; + Kokkos::View x_coords_; + Kokkos::View y_coords_; }; class UniformGridSplineEvaluatorFactory2D : public FieldEvaluatorFactory @@ -219,6 +244,8 @@ class UniformGridSplineEvaluatorFactory2D : public FieldEvaluatorFactory // Parallel reduction to compute cell indices and count out-of-bounds points size_t num_out_of_bounds = 0; + auto grid_copy = + grid_; // Create local copy to avoid capturing reference in some compilers Kokkos::parallel_reduce( "localize_points_on_device_spline", Kokkos::RangePolicy(0, num_points), @@ -228,13 +255,13 @@ class UniformGridSplineEvaluatorFactory2D : public FieldEvaluatorFactory point[d] = coordinates_d(i, d); } - bool out_of_bounds = !grid_.IsPointInBounds(point); + bool out_of_bounds = !grid_copy.IsPointInBounds(point); is_out_of_bounds_device(i) = out_of_bounds; if (out_of_bounds) { local_count += 1; } - cell_indices_device(i) = grid_.ClosestCellID(point); + cell_indices_device(i) = grid_copy.ClosestCellID(point); }, num_out_of_bounds);