From adce9d51552ed8aadabea411a24460242e2ee7f0 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Sat, 21 Feb 2026 17:57:04 -0500 Subject: [PATCH 1/7] Fix missing private clauses in 3D viscous GPU loops The 3D shear stress and bulk stress GPU parallel loops were missing rho_visc, gamma_visc, pi_inf_visc, and alpha_visc_sum from their private clauses. The corresponding 2D loops already had these variables listed. Without privatization, these variables could cause race conditions on GPU. Co-Authored-By: Claude Opus 4.6 --- src/simulation/m_viscous.fpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 6b1d9dbdf2..04f612221d 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -319,7 +319,7 @@ contains #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 if (shear_stress) then ! Shear stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -428,7 +428,7 @@ contains end if if (bulk_stress) then ! Bulk stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end From 35d405c927b5e4cfc390abffad9dad14c6b41a29 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Sat, 21 Feb 2026 18:51:42 -0500 Subject: [PATCH 2/7] Add loop indices to private clauses in 3D viscous GPU loops Add i,j,k,l to the private list for the 3D shear_stress and bulk_stress GPU parallel loops, matching the pattern already used by the analogous 2D loops at lines 105 and 215. Co-Authored-By: Claude Sonnet 4.6 --- src/simulation/m_viscous.fpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 04f612221d..19544506e1 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -319,7 +319,7 @@ contains #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 if (shear_stress) then ! Shear stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -428,7 +428,7 @@ contains end if if (bulk_stress) then ! Bulk stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end From 29380bdd1d96e0145d6ccbe9d53a92baa271f74e Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Mon, 23 Feb 2026 09:39:10 -0500 Subject: [PATCH 3/7] Add missing q to GPU_PARALLEL_LOOP private lists The sequential loop iterator q (used in `do q = 1, Re_size(i)`) was not privatized in any of the four GPU parallel regions. Without explicit privatization, q is shared across GPU threads on OpenACC and AMD OpenMP backends, causing a data race in Reynolds number computation. Co-Authored-By: Claude Opus 4.6 --- src/simulation/m_viscous.fpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 19544506e1..3160ae9fb3 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -102,7 +102,7 @@ contains #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (shear_stress) then ! Shear stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum ,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum ,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -212,7 +212,7 @@ contains #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (bulk_stress) then ! Bulk stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum ,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum ,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -319,7 +319,7 @@ contains #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 if (shear_stress) then ! Shear stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -428,7 +428,7 @@ contains end if if (bulk_stress) then ! Bulk stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end From 0206687619225911d5c5f7f9e1af474d30fdaac9 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Tue, 24 Feb 2026 18:25:23 -0500 Subject: [PATCH 4/7] Fix errant space before comma in GPU private clause list Co-Authored-By: Claude Sonnet 4.6 --- src/simulation/m_viscous.fpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 3160ae9fb3..a9d65c2b48 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -102,7 +102,7 @@ contains #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (shear_stress) then ! Shear stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum ,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -212,7 +212,7 @@ contains #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (bulk_stress) then ! Bulk stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum ,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end From 45b31d9e8a30ea65526c1d82837c9e3d8d8c8a5e Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Wed, 25 Feb 2026 23:25:42 -0500 Subject: [PATCH 5/7] Fix typos in m_viscous.fpp Co-Authored-By: Claude Opus 4.6 --- src/simulation/m_viscous.fpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index a9d65c2b48..9c6e4ad4d8 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -1564,12 +1564,12 @@ contains real(wp) :: divergence integer :: l, q ! iterators - ! zero the viscous stress, collection of velocity diriviatives, and spacial finite differences + ! zero the viscous stress, collection of velocity derivatives, and spatial finite differences viscous_stress_tensor = 0._wp velocity_gradient_tensor = 0._wp dx = 0._wp - ! get the change in x used in the finite difference equaiont + ! get the change in x used in the finite difference equation dx(1) = 0.5_wp*(x_cc(i + 1) - x_cc(i - 1)) dx(2) = 0.5_wp*(y_cc(j + 1) - y_cc(j - 1)) if (num_dims == 3) then From a2c4d1df8bbc2c1005dfbd57247519f3e5062887 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Sun, 1 Mar 2026 15:35:56 -0500 Subject: [PATCH 6/7] Work around CCE 19.0.0 optcg ICE in m_phase_change.fpp Replace matmul() with explicit 2x2 matrix-vector multiply to avoid an intermittent CCE 19.0.0 internal compiler error (signal 13 segfault in optcg) when matmul is used inside a subroutine marked with both !$acc routine seq and !DIR$ INLINEALWAYS. Co-Authored-By: Claude Sonnet 4.6 --- src/common/m_phase_change.fpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 83b4801d3b..fda94f1d5c 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -579,7 +579,8 @@ contains InvJac = InvJac/(Jac(1, 1)*Jac(2, 2) - Jac(1, 2)*Jac(2, 1)) ! calculating correction array for Newton's method - DeltamP = -1.0_wp*(matmul(InvJac, R2D)) + DeltamP(1) = -1.0_wp*(InvJac(1, 1)*R2D(1) + InvJac(1, 2)*R2D(2)) + DeltamP(2) = -1.0_wp*(InvJac(2, 1)*R2D(1) + InvJac(2, 2)*R2D(2)) ! updating two reacting 'masses'. Recall that inert 'masses' do not change during the phase change ! liquid From 1daf95b8873ce2ac1652a7ef8ecc857c75508ba5 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Sun, 1 Mar 2026 17:56:09 -0500 Subject: [PATCH 7/7] Fix use of uninitialized FT in s_TSat Newton loop The do while condition checked abs(FT) before FT was ever assigned. Fortran .or. is not short-circuit, so abs(FT) was always evaluated on the first iteration even though (ns == 0) guaranteed loop entry. Restructure to do...exit: compute FT first, then check convergence. Logically identical, no uninitialized reads, no intrinsics, safe on all GPU device compilers (exit maps to a plain conditional branch). Co-Authored-By: Claude Sonnet 4.6 --- src/common/m_phase_change.fpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index fda94f1d5c..df0e4cf656 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -717,7 +717,7 @@ contains ! underrelaxation factor Om = 1.0e-3_wp - do while ((abs(FT) > ptgalpha_eps) .or. (ns == 0)) + do ! increasing counter ns = ns + 1 @@ -738,6 +738,7 @@ contains ! updating saturation temperature TSat = TSat - Om*FT/dFdT + if (abs(FT) <= ptgalpha_eps) exit end do end if