apache · apurtell · May 5, 2026
diff --git a/src/main/tla/ConsistentFailover/Admin.tla b/src/main/tla/ConsistentFailover/Admin.tla
@@ -0,0 +1,182 @@
+-------------------------- MODULE Admin ----------------------------------------
+(*
+ * Operator-initiated actions for the Phoenix Consistent Failover
+ * protocol: failover initiation, abort, and OFFLINE lifecycle.
+ *
+ * These actions model the human operator (Admin actor) who drives
+ * failover and abort via the PhoenixHAAdminTool CLI, which delegates
+ * to HAGroupStoreManager coprocessor endpoints.
+ *
+ * AdminGoOffline and AdminForceRecover are gated on the
+ * UseOfflinePeerDetection feature flag and model
+ * the proactive design for peer OFFLINE detection. These actions
+ * use PhoenixHAAdminTool update with --force, bypassing the
+ * normal isTransitionAllowed() check.
+ *
+ * Implementation traceability:
+ *
+ *   TLA+ action              | Java source
+ *   -------------------------+----------------------------------------------
+ *   AdminStartFailover(c)    | HAGroupStoreManager
+ *                            |   .initiateFailoverOnActiveCluster() L375-400
+ *   AdminAbortFailover(c)    | HAGroupStoreManager
+ *                            |   .setHAGroupStatusToAbortToStandby() L419-425
+ *                            |   Also clears failoverPending (models
+ *                            |   abortFailoverListener L173-185)
+ *   AdminGoOffline(c)        | PhoenixHAAdminTool update --state OFFLINE
+ *                            |   (gated on UseOfflinePeerDetection)
+ *   AdminForceRecover(c)     | PhoenixHAAdminTool update --force
+ *                            |   --state STANDBY (OFFLINE -> S)
+ *                            |   (gated on UseOfflinePeerDetection)
+ *)
+EXTENDS SpecState, Types
+
+---------------------------------------------------------------------------
+
+(*
+ * Admin initiates failover on the active cluster.
+ *
+ * Two paths depending on current state:
+ *
+ *   AIS path:  AIS -> ATS   (in-sync, ready to hand off immediately)
+ *   ANIS path: ANIS -> ANISTS (not-in-sync, forwarder must drain OUT
+ *              before ANISTS can advance to ATS)
+ *
+ * AIS path guards:
+ *   The OUT directory must be empty and all live RS must be in SYNC
+ *   mode. DEAD RSes are allowed -- an RS can crash while the cluster
+ *   is AIS without changing the HA group state. The implementation
+ *   checks clusterState = AIS, not per-RS modes; a DEAD RS is not
+ *   writing, so the remaining SYNC RSes and empty OUT dir ensure
+ *   safety.
+ *
+ * ANIS path guards:
+ *   The implementation only validates the current state (ANIS) and
+ *   peer state. No outDirEmpty or writer-mode guards -- the
+ *   forwarder will drain OUT after the transition. The ANISTS ->
+ *   ATS transition (ANISTSToATS in HAGroupStore.tla) guards on
+ *   outDirEmpty and the anti-flapping gate.
+ *
+ * Both paths:
+ *   Peer must be in a stable standby state (S or DS) to prevent
+ *   initiating a new failover during the non-atomic window of a
+ *   previous failover (where the peer may still be in ATS).
+ *   Without this guard, the admin could produce an irrecoverable
+ *   (ATS, ATS) or (ANISTS, ATS) deadlock.
+ *
+ * Post: Cluster c transitions to ATS or ANISTS, both of which
+ *       map to the ACTIVE_TO_STANDBY role, blocking mutations
+ *       (isMutationBlocked()=true).
+ *
+ * Source: initiateFailoverOnActiveCluster() L375-400 checks current
+ *         state and selects AIS -> ATS or ANIS -> ANISTS.
+ *         Peer-state guard: getHAGroupStoreRecordFromPeer()
+ *         (HAGroupStoreClient L421).
+ *)
+AdminStartFailover(c) ==
+    /\ clusterState[Peer(c)] \in {"S", "DS"}
+    /\ \/ /\ clusterState[c] = "AIS"
+          /\ outDirEmpty[c]
+          /\ \A rs \in RS : writerMode[c][rs] \in {"SYNC", "DEAD"}
+          /\ clusterState' = [clusterState EXCEPT ![c] = "ATS"]
+       \/ /\ clusterState[c] = "ANIS"
+          /\ clusterState' = [clusterState EXCEPT ![c] = "ANISTS"]
+    /\ UNCHANGED <<writerVars, replayVars, envVars,
+                   outDirEmpty, antiFlapTimer,
+                   failoverPending, inProgressDirEmpty>>
+
+---------------------------------------------------------------------------
+
+(*
+ * Admin aborts an in-progress failover from the standby side.
+ *
+ * Pre:  Cluster c is in STA (STANDBY_TO_ACTIVE).
+ * Post: Cluster c transitions to AbTS (ABORT_TO_STANDBY).
+ *       The peer (in ATS) will react via PeerReactToAbTS,
+ *       transitioning to AbTAIS. Both then auto-complete back
+ *       to their pre-failover states.
+ *
+ * Abort must originate from the STA side to prevent dual-active
+ * races -- this is the AbortSafety property.
+ *
+ * Also clears failoverPending[c], modeling the abortFailoverListener
+ * (ReplicationLogDiscoveryReplay.java L173-185) which fires on LOCAL
+ * ABORT_TO_STANDBY, calling failoverPending.set(false).
+ *
+ * Source: setHAGroupStatusToAbortToStandby() L419-425.
+ *)
+AdminAbortFailover(c) ==
+    /\ clusterState[c] = "STA"
+    /\ clusterState' = [clusterState EXCEPT ![c] = "AbTS"]
+    /\ failoverPending' = [failoverPending EXCEPT ![c] = FALSE]
+    /\ UNCHANGED <<writerVars, replayVars, envVars,
+                   outDirEmpty, antiFlapTimer, inProgressDirEmpty>>
+
+---------------------------------------------------------------------------
+
+(*
+ * Admin takes a standby cluster offline.
+ *
+ * Gated on UseOfflinePeerDetection (Iteration 18, proactive modeling).
+ *
+ * Pre:  Cluster c is in S or DS (a standby state).
+ * Post: Cluster c transitions to OFFLINE.
+ *
+ * In the implementation, entering OFFLINE requires
+ * PhoenixHAAdminTool update --force --state OFFLINE, which
+ * bypasses isTransitionAllowed(). The operator decides when
+ * to take a cluster offline for maintenance or decommissioning.
+ *
+ * No ZK connectivity guard: the --force path writes directly
+ * to ZK, bypassing the isHealthy check used by
+ * setHAGroupStatusIfNeeded().
+ *
+ * Source: PhoenixHAAdminTool update --state OFFLINE (--force)
+ *)
+AdminGoOffline(c) ==
+    /\ UseOfflinePeerDetection = TRUE
+    /\ clusterState[c] \in {"S", "DS"}
+    /\ clusterState' = [clusterState EXCEPT ![c] = "OFFLINE"]
+    /\ UNCHANGED <<writerVars, replayVars, envVars,
+                   outDirEmpty, antiFlapTimer,
+                   failoverPending, inProgressDirEmpty>>
+
+---------------------------------------------------------------------------
+
+(*
+ * Admin force-recovers a cluster from OFFLINE.
+ *
+ * Gated on UseOfflinePeerDetection.
+ *
+ * Pre:  Cluster c is in OFFLINE.
+ * Post: Cluster c transitions to S (STANDBY).
+ *
+ * Recovery from OFFLINE requires PhoenixHAAdminTool update --force
+ * --state STANDBY, which bypasses isTransitionAllowed() (OFFLINE
+ * has no allowed outbound transitions in the implementation).
+ *
+ * The S-entry side effects mirror the pattern used by
+ * PeerReactToAIS (ATS->S) and AutoComplete (AbTS->S):
+ *   - writerMode reset to INIT for all RS (replication subsystem
+ *     restart on standby entry)
+ *   - outDirEmpty set to TRUE (OUT directory cleared)
+ *   - replayState set to SYNCED_RECOVERY (recoveryListener fold)
+ *
+ * No ZK connectivity guard: the --force path writes directly
+ * to ZK.
+ *
+ * Source: PhoenixHAAdminTool update --force --state STANDBY
+ *)
+AdminForceRecover(c) ==
+    /\ UseOfflinePeerDetection = TRUE
+    /\ clusterState[c] = "OFFLINE"
+    /\ clusterState' = [clusterState EXCEPT ![c] = "S"]
+    /\ writerMode' = [writerMode EXCEPT ![c] =
+            [rs \in RS |-> "INIT"]]
+    /\ outDirEmpty' = [outDirEmpty EXCEPT ![c] = TRUE]
+    /\ replayState' = [replayState EXCEPT ![c] = "SYNCED_RECOVERY"]
+    /\ UNCHANGED <<envVars,
+                   antiFlapTimer, failoverPending, inProgressDirEmpty,
+                   lastRoundInSync, lastRoundProcessed>>
+
+============================================================================
diff --git a/src/main/tla/ConsistentFailover/Clock.tla b/src/main/tla/ConsistentFailover/Clock.tla
@@ -0,0 +1,45 @@
+---------------------------- MODULE Clock ----------------------------------------
+(*
+ * Countdown timer.
+ *
+ * Provides a single Tick action that advances all per-cluster
+ * anti-flapping countdown timers by one tick toward 0. This follows
+ * the explicit-time pattern from Lamport, "Real Time is Really
+ * Simple" (CHARME 2005, Section 2). Time is modeled as an ordinary
+ * variable, and lower-bound timing constraints (action cannot fire
+ * until enough time passes) are expressed as enabling conditions on
+ * the guarded action using a countdown timer that ticks to 0.
+ *
+ * The Tick action is guarded so it only fires when at least one
+ * timer is still counting down (AntiFlapGateClosed). This prevents
+ * useless stuttering ticks when all timers have already expired.
+ *
+ * Implementation traceability:
+ *
+ *   TLA+ action  | Java source
+ *   -------------+--------------------------------------------------
+ *   Tick         | Passage of wall-clock time; no direct Java
+ *                |   counterpart. Models the interval between
+ *                |   HAGroupStoreClient.validateTransitionAndGet-
+ *                |   WaitTime() checks (L1027-1046).
+ *)
+EXTENDS SpecState, Types
+
+---------------------------------------------------------------------------
+
+(*
+ * Advance all countdown timers by one tick toward 0.
+ *
+ * Each cluster's anti-flapping timer is decremented via
+ * DecrementTimer (floor at 0). The action is enabled only when
+ * at least one cluster has a timer still counting down
+ * (AntiFlapGateClosed), preventing infinite stuttering at zero.
+ *)
+Tick ==
+    /\ \E c \in Cluster : AntiFlapGateClosed(antiFlapTimer[c])
+    /\ antiFlapTimer' = [c \in Cluster |-> DecrementTimer(antiFlapTimer[c])]
+    /\ UNCHANGED <<writerVars, replayVars, envVars,
+                   clusterState, outDirEmpty,
+                   failoverPending, inProgressDirEmpty>>
+
+============================================================================
diff --git a/src/main/tla/ConsistentFailover/ConsistentFailover-sim-liveness-ac.cfg b/src/main/tla/ConsistentFailover/ConsistentFailover-sim-liveness-ac.cfg
@@ -0,0 +1,54 @@
+\* TLC model configuration for ConsistentFailover.tla
+\*
+\* Per-property simulation liveness: AbortCompletion
+\* Uses FairnessAC (3 temporal clauses) to keep the Buchi
+\* automaton tractable. Samples random behaviors.
+\*
+\* Run:
+\* java -XX:+UseParallelGC \
+\*   -Dtlc2.TLC.stopAfter=28800 \
+\*   -cp tla2tools.jar:CommunityModules-deps.jar \
+\*   tlc2.TLC ConsistentFailover.tla \
+\*   -config ConsistentFailover-sim-liveness-ac.cfg \
+\*   -simulate -depth 10000 -workers auto
+
+SPECIFICATION SpecAC
+
+\* Model values
+CONSTANTS
+    \* The finite set of cluster identifiers forming the HA pair.
+    Cluster = {c1, c2}
+    \* The finite set of region server identifiers per cluster.
+    RS = {rs1, rs2}
+    \* Anti-flapping wait threshold (logical time ticks).
+    WaitTimeForSync = 2
+    \* Feature gate for proactive AWOP/ANISWOP modeling.
+    \* Set to TRUE to verify OFFLINE peer detection lifecycle.
+    UseOfflinePeerDetection = FALSE
+
+\* No SYMMETRY -- provides no benefit for random trace sampling.
+
+\* Liveness property
+PROPERTY
+    AbortCompletion
+
+\* Invariants to check
+INVARIANT
+    TypeOK
+    MutualExclusion
+    AbortSafety
+    AISImpliesInSync
+    WriterClusterConsistency
+    ZKSessionConsistency
+
+\* Action properties: every state change follows allowed transitions
+ACTION_CONSTRAINT
+    TransitionValid
+    WriterTransitionValid
+    AIStoATSPrecondition
+    AntiFlapGate
+    ANISTStoATSPrecondition
+    ReplayTransitionValid
+    FailoverTriggerCorrectness
+    NoDataLoss
+    ReplayRewindCorrectness
diff --git a/src/main/tla/ConsistentFailover/ConsistentFailover-sim-liveness-dr.cfg b/src/main/tla/ConsistentFailover/ConsistentFailover-sim-liveness-dr.cfg
@@ -0,0 +1,54 @@
+\* TLC model configuration for ConsistentFailover.tla
+\*
+\* Per-property simulation liveness: DegradationRecovery
+\* Uses FairnessDR (17 temporal clauses with 2 RS) to keep
+\* the Buchi automaton tractable. Samples random behaviors.
+\*
+\* Run:
+\* java -XX:+UseParallelGC \
+\*   -Dtlc2.TLC.stopAfter=28800 \
+\*   -cp tla2tools.jar:CommunityModules-deps.jar \
+\*   tlc2.TLC ConsistentFailover.tla \
+\*   -config ConsistentFailover-sim-liveness-dr.cfg \
+\*   -simulate -depth 10000 -workers auto
+
+SPECIFICATION SpecDR
+
+\* Model values
+CONSTANTS
+    \* The finite set of cluster identifiers forming the HA pair.
+    Cluster = {c1, c2}
+    \* The finite set of region server identifiers per cluster.
+    RS = {rs1, rs2}
+    \* Anti-flapping wait threshold (logical time ticks).
+    WaitTimeForSync = 2
+    \* Feature gate for proactive AWOP/ANISWOP modeling.
+    \* Set to TRUE to verify OFFLINE peer detection lifecycle.
+    UseOfflinePeerDetection = FALSE
+
+\* No SYMMETRY -- provides no benefit for random trace sampling.
+
+\* Liveness property
+PROPERTY
+    DegradationRecovery
+
+\* Invariants to check
+INVARIANT
+    TypeOK
+    MutualExclusion
+    AbortSafety
+    AISImpliesInSync
+    WriterClusterConsistency
+    ZKSessionConsistency
+
+\* Action properties: every state change follows allowed transitions
+ACTION_CONSTRAINT
+    TransitionValid
+    WriterTransitionValid
+    AIStoATSPrecondition
+    AntiFlapGate
+    ANISTStoATSPrecondition
+    ReplayTransitionValid
+    FailoverTriggerCorrectness
+    NoDataLoss
+    ReplayRewindCorrectness
diff --git a/src/main/tla/ConsistentFailover/ConsistentFailover-sim-liveness-fc.cfg b/src/main/tla/ConsistentFailover/ConsistentFailover-sim-liveness-fc.cfg
@@ -0,0 +1,54 @@
+\* TLC model configuration for ConsistentFailover.tla
+\*
+\* Per-property simulation liveness: FailoverCompletion
+\* Uses FairnessFC (8 temporal clauses) to keep the Buchi
+\* automaton tractable. Samples random behaviors.
+\*
+\* Run:
+\* java -XX:+UseParallelGC \
+\*   -Dtlc2.TLC.stopAfter=28800 \
+\*   -cp tla2tools.jar:CommunityModules-deps.jar \
+\*   tlc2.TLC ConsistentFailover.tla \
+\*   -config ConsistentFailover-sim-liveness-fc.cfg \
+\*   -simulate -depth 10000 -workers auto
+
+SPECIFICATION SpecFC
+
+\* Model values
+CONSTANTS
+    \* The finite set of cluster identifiers forming the HA pair.
+    Cluster = {c1, c2}
+    \* The finite set of region server identifiers per cluster.
+    RS = {rs1, rs2}
+    \* Anti-flapping wait threshold (logical time ticks).
+    WaitTimeForSync = 2
+    \* Feature gate for proactive AWOP/ANISWOP modeling.
+    \* Set to TRUE to verify OFFLINE peer detection lifecycle.
+    UseOfflinePeerDetection = FALSE
+
+\* No SYMMETRY -- provides no benefit for random trace sampling.
+
+\* Liveness property
+PROPERTY
+    FailoverCompletion
+
+\* Invariants to check
+INVARIANT
+    TypeOK
+    MutualExclusion
+    AbortSafety
+    AISImpliesInSync
+    WriterClusterConsistency
+    ZKSessionConsistency
+
+\* Action properties: every state change follows allowed transitions
+ACTION_CONSTRAINT
+    TransitionValid
+    WriterTransitionValid
+    AIStoATSPrecondition
+    AntiFlapGate
+    ANISTStoATSPrecondition
+    ReplayTransitionValid
+    FailoverTriggerCorrectness
+    NoDataLoss
+    ReplayRewindCorrectness