From adce9d51552ed8aadabea411a24460242e2ee7f0 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Sat, 21 Feb 2026 17:57:04 -0500
Subject: [PATCH 1/7] Fix missing private clauses in 3D viscous GPU loops

The 3D shear stress and bulk stress GPU parallel loops were missing
rho_visc, gamma_visc, pi_inf_visc, and alpha_visc_sum from their
private clauses. The corresponding 2D loops already had these
variables listed. Without privatization, these variables could cause
race conditions on GPU.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/simulation/m_viscous.fpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp
index 6b1d9dbdf2..04f612221d 100644
--- a/src/simulation/m_viscous.fpp
+++ b/src/simulation/m_viscous.fpp
@@ -319,7 +319,7 @@ contains
         #:if not MFC_CASE_OPTIMIZATION or num_dims > 2
 
             if (shear_stress) then    ! Shear stresses
-                $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
+                $:GPU_PARALLEL_LOOP(collapse=3, private='[rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
                 do l = is3_viscous%beg, is3_viscous%end
                     do k = -1, 1
                         do j = is1_viscous%beg, is1_viscous%end
@@ -428,7 +428,7 @@ contains
             end if
 
             if (bulk_stress) then    ! Bulk stresses
-                $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
+                $:GPU_PARALLEL_LOOP(collapse=3, private='[rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
                 do l = is3_viscous%beg, is3_viscous%end
                     do k = -1, 1
                         do j = is1_viscous%beg, is1_viscous%end

From 35d405c927b5e4cfc390abffad9dad14c6b41a29 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Sat, 21 Feb 2026 18:51:42 -0500
Subject: [PATCH 2/7] Add loop indices to private clauses in 3D viscous GPU
 loops

Add i,j,k,l to the private list for the 3D shear_stress and
bulk_stress GPU parallel loops, matching the pattern already used
by the analogous 2D loops at lines 105 and 215.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/simulation/m_viscous.fpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp
index 04f612221d..19544506e1 100644
--- a/src/simulation/m_viscous.fpp
+++ b/src/simulation/m_viscous.fpp
@@ -319,7 +319,7 @@ contains
         #:if not MFC_CASE_OPTIMIZATION or num_dims > 2
 
             if (shear_stress) then    ! Shear stresses
-                $:GPU_PARALLEL_LOOP(collapse=3, private='[rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
+                $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
                 do l = is3_viscous%beg, is3_viscous%end
                     do k = -1, 1
                         do j = is1_viscous%beg, is1_viscous%end
@@ -428,7 +428,7 @@ contains
             end if
 
             if (bulk_stress) then    ! Bulk stresses
-                $:GPU_PARALLEL_LOOP(collapse=3, private='[rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
+                $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
                 do l = is3_viscous%beg, is3_viscous%end
                     do k = -1, 1
                         do j = is1_viscous%beg, is1_viscous%end

From 29380bdd1d96e0145d6ccbe9d53a92baa271f74e Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Mon, 23 Feb 2026 09:39:10 -0500
Subject: [PATCH 3/7] Add missing q to GPU_PARALLEL_LOOP private lists

The sequential loop iterator q (used in `do q = 1, Re_size(i)`) was not
privatized in any of the four GPU parallel regions. Without explicit
privatization, q is shared across GPU threads on OpenACC and AMD OpenMP
backends, causing a data race in Reynolds number computation.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/simulation/m_viscous.fpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp
index 19544506e1..3160ae9fb3 100644
--- a/src/simulation/m_viscous.fpp
+++ b/src/simulation/m_viscous.fpp
@@ -102,7 +102,7 @@ contains
 
         #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
             if (shear_stress) then    ! Shear stresses
-                $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum ,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
+                $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum ,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
                 do l = is3_viscous%beg, is3_viscous%end
                     do k = -1, 1
                         do j = is1_viscous%beg, is1_viscous%end
@@ -212,7 +212,7 @@ contains
 
         #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
             if (bulk_stress) then    ! Bulk stresses
-                $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum ,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
+                $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum ,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
                 do l = is3_viscous%beg, is3_viscous%end
                     do k = -1, 1
                         do j = is1_viscous%beg, is1_viscous%end
@@ -319,7 +319,7 @@ contains
         #:if not MFC_CASE_OPTIMIZATION or num_dims > 2
 
             if (shear_stress) then    ! Shear stresses
-                $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
+                $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
                 do l = is3_viscous%beg, is3_viscous%end
                     do k = -1, 1
                         do j = is1_viscous%beg, is1_viscous%end
@@ -428,7 +428,7 @@ contains
             end if
 
             if (bulk_stress) then    ! Bulk stresses
-                $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
+                $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
                 do l = is3_viscous%beg, is3_viscous%end
                     do k = -1, 1
                         do j = is1_viscous%beg, is1_viscous%end

From 0206687619225911d5c5f7f9e1af474d30fdaac9 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Tue, 24 Feb 2026 18:25:23 -0500
Subject: [PATCH 4/7] Fix errant space before comma in GPU private clause list

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/simulation/m_viscous.fpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp
index 3160ae9fb3..a9d65c2b48 100644
--- a/src/simulation/m_viscous.fpp
+++ b/src/simulation/m_viscous.fpp
@@ -102,7 +102,7 @@ contains
 
         #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
             if (shear_stress) then    ! Shear stresses
-                $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum ,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
+                $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
                 do l = is3_viscous%beg, is3_viscous%end
                     do k = -1, 1
                         do j = is1_viscous%beg, is1_viscous%end
@@ -212,7 +212,7 @@ contains
 
         #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
             if (bulk_stress) then    ! Bulk stresses
-                $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum ,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
+                $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum, alpha_visc, alpha_rho_visc, Re_visc, tau_Re]')
                 do l = is3_viscous%beg, is3_viscous%end
                     do k = -1, 1
                         do j = is1_viscous%beg, is1_viscous%end

From 45b31d9e8a30ea65526c1d82837c9e3d8d8c8a5e Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 25 Feb 2026 23:25:42 -0500
Subject: [PATCH 5/7] Fix typos in m_viscous.fpp

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/simulation/m_viscous.fpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp
index a9d65c2b48..9c6e4ad4d8 100644
--- a/src/simulation/m_viscous.fpp
+++ b/src/simulation/m_viscous.fpp
@@ -1564,12 +1564,12 @@ contains
         real(wp) :: divergence
         integer :: l, q ! iterators
 
-        ! zero the viscous stress, collection of velocity diriviatives, and spacial finite differences
+        ! zero the viscous stress, collection of velocity derivatives, and spatial finite differences
         viscous_stress_tensor = 0._wp
         velocity_gradient_tensor = 0._wp
         dx = 0._wp
 
-        ! get the change in x used in the finite difference equaiont
+        ! get the change in x used in the finite difference equation
         dx(1) = 0.5_wp*(x_cc(i + 1) - x_cc(i - 1))
         dx(2) = 0.5_wp*(y_cc(j + 1) - y_cc(j - 1))
         if (num_dims == 3) then

From a2c4d1df8bbc2c1005dfbd57247519f3e5062887 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@login11.frontier.olcf.ornl.gov>
Date: Sun, 1 Mar 2026 15:35:56 -0500
Subject: [PATCH 6/7] Work around CCE 19.0.0 optcg ICE in m_phase_change.fpp

Replace matmul() with explicit 2x2 matrix-vector multiply to avoid
an intermittent CCE 19.0.0 internal compiler error (signal 13 segfault
in optcg) when matmul is used inside a subroutine marked with both
!$acc routine seq and !DIR$ INLINEALWAYS.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/common/m_phase_change.fpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp
index 83b4801d3b..fda94f1d5c 100644
--- a/src/common/m_phase_change.fpp
+++ b/src/common/m_phase_change.fpp
@@ -579,7 +579,8 @@ contains
             InvJac = InvJac/(Jac(1, 1)*Jac(2, 2) - Jac(1, 2)*Jac(2, 1))
 
             ! calculating correction array for Newton's method
-            DeltamP = -1.0_wp*(matmul(InvJac, R2D))
+            DeltamP(1) = -1.0_wp*(InvJac(1, 1)*R2D(1) + InvJac(1, 2)*R2D(2))
+            DeltamP(2) = -1.0_wp*(InvJac(2, 1)*R2D(1) + InvJac(2, 2)*R2D(2))
 
             ! updating two reacting 'masses'. Recall that inert 'masses' do not change during the phase change
             ! liquid

From 1daf95b8873ce2ac1652a7ef8ecc857c75508ba5 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@login10.frontier.olcf.ornl.gov>
Date: Sun, 1 Mar 2026 17:56:09 -0500
Subject: [PATCH 7/7] Fix use of uninitialized FT in s_TSat Newton loop

The do while condition checked abs(FT) before FT was ever assigned.
Fortran .or. is not short-circuit, so abs(FT) was always evaluated
on the first iteration even though (ns == 0) guaranteed loop entry.

Restructure to do...exit: compute FT first, then check convergence.
Logically identical, no uninitialized reads, no intrinsics, safe on
all GPU device compilers (exit maps to a plain conditional branch).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/common/m_phase_change.fpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp
index fda94f1d5c..df0e4cf656 100644
--- a/src/common/m_phase_change.fpp
+++ b/src/common/m_phase_change.fpp
@@ -717,7 +717,7 @@ contains
 
             ! underrelaxation factor
             Om = 1.0e-3_wp
-            do while ((abs(FT) > ptgalpha_eps) .or. (ns == 0))
+            do
                 ! increasing counter
                 ns = ns + 1
 
@@ -738,6 +738,7 @@ contains
                 ! updating saturation temperature
                 TSat = TSat - Om*FT/dFdT
 
+                if (abs(FT) <= ptgalpha_eps) exit
             end do
 
         end if