diff --git a/tests/systemtests/audit_empty_active_set_bootstrap_test.go b/tests/systemtests/audit_empty_active_set_bootstrap_test.go index c5f44633..52411eeb 100644 --- a/tests/systemtests/audit_empty_active_set_bootstrap_test.go +++ b/tests/systemtests/audit_empty_active_set_bootstrap_test.go @@ -6,29 +6,24 @@ package system // // When ALL supernodes are POSTPONED at epoch start, the epoch anchor has an // empty active_supernode_accounts set. Without active probers, no peer -// observations are generated, and the audit module's recovery rule -// (compliant host report + peer all-ports-OPEN) can never be satisfied. +// observations are generated, and the audit module's peer-port recovery rule +// (compliant host report + peer all-ports-OPEN) cannot be satisfied because +// no probers exist. // -// The fix is to use legacy MsgReportSupernodeMetrics to recover SNs to -// ACTIVE mid-epoch. Combined with audit epoch reports, the SN survives -// the audit EndBlocker and appears in the next epoch's anchor, seeding -// the active set and bootstrapping the peer-observation cycle. +// To break this bootstrap chicken-and-egg, the audit module applies a +// bootstrap-recovery exception in shouldRecoverAtEpochEnd: when the epoch +// anchor's active set is empty, a compliant self host-report alone is +// sufficient for recovery. Self-compliance is still mandatory; a misbehaving +// SN cannot self-recover via this branch. // -// Scenario: -// 1. Two supernodes register and start ACTIVE. -// 2. Neither submits epoch reports for epoch 0 → both POSTPONED at epoch 0 end. -// 3. Epoch 1: empty active set. Both submit host-only audit reports. -// Verify: audit recovery alone cannot recover them (no peer observations). -// 4. Legacy MsgReportSupernodeMetrics recovers both mid-epoch 2. -// 5. Epoch 2 end: audit enforcement checks them as ACTIVE — they have reports, -// host minimums disabled, no peer-port streak → they stay ACTIVE. -// 6. Epoch 3: both are in the anchor active set → peer observations flow → self-sustaining. +// With this exception, the chain self-heals from the deadlock once every +// POSTPONED SN submits a compliant host-only report — no operator +// intervention required. import ( "testing" "time" - sntypes "github.com/LumeraProtocol/lumera/x/supernode/v1/types" "github.com/stretchr/testify/require" ) @@ -43,7 +38,20 @@ func awaitAtLeastHeightWithSlack(t *testing.T, height int64) { sut.AwaitBlockHeight(t, height, 45*time.Second) } -func TestAuditEmptyActiveSetBootstrap_LegacyMetricsBreaksDeadlock(t *testing.T) { +// TestAuditEmptyActiveSetBootstrap_HostOnlyReportsRecover verifies that the +// bootstrap-recovery exception breaks the empty-active-set deadlock: when +// all SNs are POSTPONED and the active set is empty, submitting compliant +// host-only audit reports is sufficient to recover them at epoch end. +// +// This inverts the pre-fix contract (which asserted permanent deadlock). +// +// Scenario: +// 1. Two supernodes register and start ACTIVE. +// 2. Neither submits epoch reports for epoch 0 → both POSTPONED at epoch 0 end. +// 3. Epoch 1: empty active set. Both submit host-only audit reports. +// 4. Epoch 1 end: bootstrap-recovery exception fires → both recover to ACTIVE. +// 5. Epoch 2: both are in the anchor active set → peer observations flow → self-sustaining. +func TestAuditEmptyActiveSetBootstrap_HostOnlyReportsRecover(t *testing.T) { const ( epochLengthBlocks = uint64(10) originHeight = int64(1) @@ -82,76 +90,48 @@ func TestAuditEmptyActiveSetBootstrap_LegacyMetricsBreaksDeadlock(t *testing.T) require.Equal(t, "SUPERNODE_STATE_POSTPONED", querySupernodeLatestState(t, cli, n1.valAddr), "node1 should be POSTPONED after missing epoch 0 report") - // ── Epoch 1: Empty active set — the deadlock. ── + // ── Epoch 1: empty active set — bootstrap-recovery exception applies. ── epochID1 := uint64((epoch1Start - originHeight) / int64(epochLengthBlocks)) - // Both submit host-only audit epoch reports (as POSTPONED reporters, no observations). + // Both submit compliant host-only audit epoch reports (as POSTPONED reporters, + // no observations). With the bootstrap exception, this alone is sufficient + // for recovery at epoch 1 end. hostOK := auditHostReportJSON([]string{"PORT_STATE_OPEN"}) tx0 := submitEpochReport(t, cli, n0.nodeName, epochID1, hostOK, nil) RequireTxSuccess(t, tx0) tx1 := submitEpochReport(t, cli, n1.nodeName, epochID1, hostOK, nil) RequireTxSuccess(t, tx1) - // Wait for epoch 1 to end WITHOUT legacy metrics recovery. - // Both should remain POSTPONED — audit recovery fails (no peer observations). + // Wait for epoch 1 to end. awaitAtLeastHeightWithSlack(t, epoch2Start) - require.Equal(t, "SUPERNODE_STATE_POSTPONED", querySupernodeLatestState(t, cli, n0.valAddr), - "node0 should still be POSTPONED — audit recovery alone cannot break the deadlock") - require.Equal(t, "SUPERNODE_STATE_POSTPONED", querySupernodeLatestState(t, cli, n1.valAddr), - "node1 should still be POSTPONED — audit recovery alone cannot break the deadlock") - - // ── Epoch 2: Break the deadlock with legacy MsgReportSupernodeMetrics. ── - epochID2 := epochID1 + 1 - epoch3Start := epoch2Start + int64(epochLengthBlocks) - - // Submit legacy metrics → instant recovery to ACTIVE. - compliantMetrics := sntypes.SupernodeMetrics{ - VersionMajor: 2, - VersionMinor: 4, - VersionPatch: 5, - OpenPorts: []sntypes.PortStatus{ - {Port: 4444, State: sntypes.PortState_PORT_STATE_OPEN}, - }, - } - - hash0 := reportSupernodeMetrics(t, cli, n0.nodeName, n0.valAddr, n0.accAddr, compliantMetrics) - txJSON0 := waitForTx(t, cli, hash0) - resp0 := decodeTxResponse(t, txJSON0) - require.Equal(t, uint32(0), resp0.Code, "legacy metrics tx for node0 should succeed: %s", resp0.RawLog) - - hash1 := reportSupernodeMetrics(t, cli, n1.nodeName, n1.valAddr, n1.accAddr, compliantMetrics) - txJSON1 := waitForTx(t, cli, hash1) - resp1 := decodeTxResponse(t, txJSON1) - require.Equal(t, uint32(0), resp1.Code, "legacy metrics tx for node1 should succeed: %s", resp1.RawLog) - - // Submit audit epoch reports so epoch enforcement has both legacy metrics and - // fresh audit data available before the next boundary. - tx0e2 := submitEpochReport(t, cli, n0.nodeName, epochID2, hostOK, nil) - RequireTxSuccess(t, tx0e2) - tx1e2 := submitEpochReport(t, cli, n1.nodeName, epochID2, hostOK, nil) - RequireTxSuccess(t, tx1e2) - - // Wait for epoch 2 to end. - awaitAtLeastHeightWithSlack(t, epoch3Start) - - // Keep assertion surface narrow: tx/report acceptance is the contract this - // bootstrap check validates; detailed recovery semantics are covered by - // dedicated enforcement tests. + // Bootstrap-recovery exception: empty active set + compliant self host-report + // → both SNs recover to ACTIVE. + require.Equal(t, "SUPERNODE_STATE_ACTIVE", querySupernodeLatestState(t, cli, n0.valAddr), + "node0 should recover to ACTIVE via the empty-active-set bootstrap exception") + require.Equal(t, "SUPERNODE_STATE_ACTIVE", querySupernodeLatestState(t, cli, n1.valAddr), + "node1 should recover to ACTIVE via the empty-active-set bootstrap exception") } -// TestAuditEmptyActiveSetDeadlock_HostOnlyReportsCannotRecover verifies that -// when all supernodes are POSTPONED, submitting host-only epoch reports across -// multiple epochs is insufficient for recovery — proving the deadlock exists. -func TestAuditEmptyActiveSetDeadlock_HostOnlyReportsCannotRecover(t *testing.T) { +// TestAuditEmptyActiveSetBootstrap_NonCompliantHostStaysPostponed verifies +// the bootstrap-recovery exception still gates on self-compliance. A +// POSTPONED supernode that submits a host report violating a min-free +// threshold MUST remain POSTPONED even when the active set is empty. +// +// This guards against the exception turning into a "free pass" for +// misbehaving SNs and complements the unit-level tests in +// x/audit/v1/keeper/enforcement_empty_active_set_test.go. +func TestAuditEmptyActiveSetBootstrap_NonCompliantHostStaysPostponed(t *testing.T) { const ( epochLengthBlocks = uint64(10) originHeight = int64(1) ) + // Set a non-zero MinDiskFreePercent so non-compliant disk usage in the host + // report blocks self-compliance. sut.ModifyGenesisJSON(t, setSupernodeParamsForAuditTests(t), - setAuditParamsForFastEpochs(t, epochLengthBlocks, 1, 1, 1, []uint32{4444}), + setAuditParamsForFastEpochsWithMinDiskFree(t, epochLengthBlocks, 1, 1, 1, []uint32{4444}, 20), ) sut.StartChain(t) @@ -166,31 +146,25 @@ func TestAuditEmptyActiveSetDeadlock_HostOnlyReportsCannotRecover(t *testing.T) currentHeight := sut.AwaitNextBlock(t) _, epoch0Start := nextEpochAfterHeight(originHeight, epochLengthBlocks, currentHeight) epoch1Start := epoch0Start + int64(epochLengthBlocks) + epoch2Start := epoch1Start + int64(epochLengthBlocks) awaitAtLeastHeightWithSlack(t, epoch1Start) require.Equal(t, "SUPERNODE_STATE_POSTPONED", querySupernodeLatestState(t, cli, n0.valAddr)) require.Equal(t, "SUPERNODE_STATE_POSTPONED", querySupernodeLatestState(t, cli, n1.valAddr)) - // Submit host-only reports for 3 consecutive epochs. None should recover. - hostOK := auditHostReportJSON([]string{"PORT_STATE_OPEN"}) - for i := 0; i < 3; i++ { - epochStart := epoch1Start + int64(i)*int64(epochLengthBlocks) - nextEpochStart := epochStart + int64(epochLengthBlocks) - epochID := uint64((epochStart - originHeight) / int64(epochLengthBlocks)) - - awaitAtLeastHeightWithSlack(t, epochStart) - - tx0 := submitEpochReport(t, cli, n0.nodeName, epochID, hostOK, nil) - RequireTxSuccess(t, tx0) - tx1 := submitEpochReport(t, cli, n1.nodeName, epochID, hostOK, nil) - RequireTxSuccess(t, tx1) + // Epoch 1: empty active set. Both submit host reports with disk usage 95% + // (5% free, below the 20% MinDiskFreePercent). Self-compliance fails. + epochID1 := uint64((epoch1Start - originHeight) / int64(epochLengthBlocks)) + hostNonCompliant := auditHostReportWithDiskUsageJSON([]string{"PORT_STATE_OPEN"}, 95.0) + RequireTxSuccess(t, submitEpochReport(t, cli, n0.nodeName, epochID1, hostNonCompliant, nil)) + RequireTxSuccess(t, submitEpochReport(t, cli, n1.nodeName, epochID1, hostNonCompliant, nil)) - awaitAtLeastHeightWithSlack(t, nextEpochStart) + awaitAtLeastHeightWithSlack(t, epoch2Start) - require.Equal(t, "SUPERNODE_STATE_POSTPONED", querySupernodeLatestState(t, cli, n0.valAddr), - "node0 should remain POSTPONED in epoch %d — no peer observations possible", epochID) - require.Equal(t, "SUPERNODE_STATE_POSTPONED", querySupernodeLatestState(t, cli, n1.valAddr), - "node1 should remain POSTPONED in epoch %d — no peer observations possible", epochID) - } + // Self-compliance gate blocked the bootstrap exception → still POSTPONED. + require.Equal(t, "SUPERNODE_STATE_POSTPONED", querySupernodeLatestState(t, cli, n0.valAddr), + "node0 should remain POSTPONED — self-compliance gate blocks the bootstrap exception") + require.Equal(t, "SUPERNODE_STATE_POSTPONED", querySupernodeLatestState(t, cli, n1.valAddr), + "node1 should remain POSTPONED — self-compliance gate blocks the bootstrap exception") } diff --git a/tests/systemtests/audit_test_helpers_test.go b/tests/systemtests/audit_test_helpers_test.go index c0561c0b..0b75e344 100644 --- a/tests/systemtests/audit_test_helpers_test.go +++ b/tests/systemtests/audit_test_helpers_test.go @@ -66,6 +66,23 @@ func setAuditParamsForFastEpochs(t *testing.T, epochLengthBlocks uint64, peerQuo } } +// setAuditParamsForFastEpochsWithMinDiskFree is setAuditParamsForFastEpochs +// plus an explicit MinDiskFreePercent override. Used by tests that need to +// exercise the self-compliance gate against the host report's disk-usage +// field (e.g. the empty-active-set bootstrap exception's self-compliance +// guard). +func setAuditParamsForFastEpochsWithMinDiskFree(t *testing.T, epochLengthBlocks uint64, peerQuorumReports, minTargets, maxTargets uint32, requiredOpenPorts []uint32, minDiskFreePercent uint32) GenesisMutator { + base := setAuditParamsForFastEpochs(t, epochLengthBlocks, peerQuorumReports, minTargets, maxTargets, requiredOpenPorts) + return func(genesis []byte) []byte { + t.Helper() + state := base(genesis) + var err error + state, err = sjson.SetRawBytes(state, "app_state.audit.params.min_disk_free_percent", []byte(strconv.FormatUint(uint64(minDiskFreePercent), 10))) + require.NoError(t, err) + return state + } +} + // setSupernodeParamsForAuditTests keeps supernode registration permissive for test environments. // // These tests register supernodes and then submit audit reports "on their behalf" using node keys. @@ -292,6 +309,20 @@ func auditHostReportJSON(inboundPortStates []string) string { return string(bz) } +// auditHostReportWithDiskUsageJSON is like auditHostReportJSON but lets the +// caller pin disk_usage_percent. Used by tests that exercise the +// self-compliance gate (e.g. min-free thresholds). +func auditHostReportWithDiskUsageJSON(inboundPortStates []string, diskUsagePercent float64) string { + bz, _ := json.Marshal(map[string]any{ + "cpu_usage_percent": 1.0, + "mem_usage_percent": 1.0, + "disk_usage_percent": diskUsagePercent, + "inbound_port_states": inboundPortStates, + "failed_actions_count": 0, + }) + return string(bz) +} + // storageChallengeObservationJSON builds the JSON payload for --storage-challenge-observations flag. func storageChallengeObservationJSON(targetSupernodeAccount string, portStates []string) string { bz, _ := json.Marshal(map[string]any{ diff --git a/x/audit/v1/keeper/enforcement.go b/x/audit/v1/keeper/enforcement.go index 38cce7d8..7458613b 100644 --- a/x/audit/v1/keeper/enforcement.go +++ b/x/audit/v1/keeper/enforcement.go @@ -258,6 +258,17 @@ func (k Keeper) shouldRecoverAtEpochEnd(ctx sdk.Context, supernodeAccount string return false, err } + // Bootstrap exception: when the epoch's anchored active set is empty, no + // probers exist by construction, so the peer-port recovery rule below is + // unsatisfiable and would deadlock the chain (all SNs POSTPONED → 0 + // probers → 0 peer reports → no SN can ever recover). The peer-port gate + // is meaningless when there is nobody to attest, so accept a compliant + // self host-report alone as sufficient. The self-compliance check above + // still gates this branch — a misbehaving SN cannot self-recover. + if anchor, found := k.GetEpochAnchor(ctx, epochID); found && len(anchor.ActiveSupernodeAccounts) == 0 { + return true, nil + } + // Need at least one compliant peer report that shows all required ports OPEN. requiredPortsLen := len(params.RequiredOpenPorts) if requiredPortsLen == 0 { diff --git a/x/audit/v1/keeper/enforcement_empty_active_set_test.go b/x/audit/v1/keeper/enforcement_empty_active_set_test.go index 53c06fca..d3bc96ef 100644 --- a/x/audit/v1/keeper/enforcement_empty_active_set_test.go +++ b/x/audit/v1/keeper/enforcement_empty_active_set_test.go @@ -10,11 +10,57 @@ import ( "go.uber.org/mock/gomock" ) -// TestEnforceEpochEnd_EmptyActiveSet_PostponedCannotRecover verifies that when -// the active set is empty (all supernodes POSTPONED), submitting compliant -// host-only epoch reports is insufficient for recovery because no peer -// observations exist. This is the "empty active set deadlock". -func TestEnforceEpochEnd_EmptyActiveSet_PostponedCannotRecover(t *testing.T) { +// These tests cover the audit bootstrap-recovery exception in +// shouldRecoverAtEpochEnd. When the epoch's anchored active set is empty +// (all supernodes POSTPONED), the peer-port recovery rule is unsatisfiable +// by construction (no probers exist), so the chain must accept a compliant +// self host-report alone as sufficient to recover. Without this exception, +// the chain cannot self-heal from the "empty active set deadlock" and +// requires every validator key holder to perform a manual deregister/ +// re-register cycle out-of-band — a distributed coordination problem on +// mainnet. +// +// Self-compliance is still mandatory: a misbehaving SN (e.g. disk usage +// over threshold) cannot self-recover via this branch. + +// helper: writes an empty-active-set EpochAnchor at the given epochID so +// shouldRecoverAtEpochEnd can read it. +func writeEmptyActiveSetAnchor(t *testing.T, f *fixture, epochID uint64) { + t.Helper() + if err := f.keeper.SetEpochAnchor(f.ctx, types.EpochAnchor{ + EpochId: epochID, + EpochStartHeight: f.ctx.BlockHeight(), + EpochEndHeight: f.ctx.BlockHeight() + 1, + EpochLengthBlocks: 1, + ActiveSupernodeAccounts: nil, // empty active set — the deadlock condition + }); err != nil { + t.Fatalf("failed to set epoch anchor: %v", err) + } +} + +// helper: writes a non-empty active-set EpochAnchor so the bootstrap branch +// does NOT fire and the legacy peer-port path is exercised. +func writeNonEmptyActiveSetAnchor(t *testing.T, f *fixture, epochID uint64, accounts []string) { + t.Helper() + if err := f.keeper.SetEpochAnchor(f.ctx, types.EpochAnchor{ + EpochId: epochID, + EpochStartHeight: f.ctx.BlockHeight(), + EpochEndHeight: f.ctx.BlockHeight() + 1, + EpochLengthBlocks: 1, + ActiveSupernodeAccounts: accounts, + }); err != nil { + t.Fatalf("failed to set epoch anchor: %v", err) + } +} + +// TestEnforceEpochEnd_EmptyActiveSet_PostponedRecoversViaBootstrapException +// verifies that when the active set is empty (all SNs POSTPONED) AND every +// POSTPONED SN submitted a compliant self host-report, the bootstrap +// exception allows each SN to recover at epoch end, breaking the deadlock. +// +// This inverts the pre-fix behavior documented in commit history: +// previously this scenario asserted Times(0) recovery (deadlock confirmed). +func TestEnforceEpochEnd_EmptyActiveSet_PostponedRecoversViaBootstrapException(t *testing.T) { f := initFixture(t) _, sn0Acc, sn0Val := cryptotestutils.SupernodeAddresses() @@ -35,24 +81,25 @@ func TestEnforceEpochEnd_EmptyActiveSet_PostponedCannotRecover(t *testing.T) { epochID := uint64(1) + // Anchor the empty-active-set condition for this epoch. + writeEmptyActiveSetAnchor(t, f, epochID) + // Both POSTPONED supernodes submit compliant host-only reports. for _, sn := range []sntypes.SuperNode{sn0, sn1} { - err := f.keeper.SetReport(f.ctx, types.EpochReport{ + if err := f.keeper.SetReport(f.ctx, types.EpochReport{ SupernodeAccount: sn.SupernodeAccount, EpochId: epochID, ReportHeight: f.ctx.BlockHeight(), - HostReport: types.HostReport{}, - }) - if err != nil { + HostReport: types.HostReport{}, // defaults are compliant (mins are 0) + }); err != nil { t.Fatalf("failed to set report for %s: %v", sn.SupernodeAccount, err) } } - // No StorageChallengeReportIndex entries — no one probed anyone - // (empty active set means no probers were assigned). + // No StorageChallengeReportIndex entries — no one probed anyone. + // With the bootstrap exception, recovery still succeeds. // Mock: no ACTIVE supernodes, two POSTPONED. - // Per LEP-6 §17: audit EnforceEpochEnd only queries Active nodes (not StorageFull). f.supernodeKeeper.EXPECT(). GetAllSuperNodes(gomock.AssignableToTypeOf(f.ctx), sntypes.SuperNodeStateActive). Return([]sntypes.SuperNode{}, nil). @@ -62,10 +109,11 @@ func TestEnforceEpochEnd_EmptyActiveSet_PostponedCannotRecover(t *testing.T) { Return([]sntypes.SuperNode{sn0, sn1}, nil). Times(1) - // Recovery should NOT be called — no peer observations exist. + // Recovery MUST be called exactly once per SN — the bootstrap exception fired. f.supernodeKeeper.EXPECT(). RecoverSuperNodeFromPostponed(gomock.Any(), gomock.Any()). - Times(0) + Return(nil). + Times(2) err := f.keeper.EnforceEpochEnd(f.ctx, epochID, params) if err != nil { @@ -73,14 +121,118 @@ func TestEnforceEpochEnd_EmptyActiveSet_PostponedCannotRecover(t *testing.T) { } } -// TestEnforceEpochEnd_LegacyRecoveredSN_SurvivesWithReport verifies that a -// supernode which was recovered to ACTIVE mid-epoch (e.g., by legacy -// MsgReportSupernodeMetrics) and also submitted an audit epoch report -// is NOT re-postponed at epoch end, even when no peer observations exist. -// -// This confirms the fix: legacy metrics recovery + audit epoch report = -// the SN survives enforcement and can appear in the next epoch's anchor. -func TestEnforceEpochEnd_LegacyRecoveredSN_SurvivesWithReport(t *testing.T) { +// TestEnforceEpochEnd_EmptyActiveSet_NoSelfReport_NoRecover verifies the +// bootstrap exception does NOT bypass the self-compliance requirement. +// A POSTPONED SN that did not submit a report stays POSTPONED even when +// the active set is empty. +func TestEnforceEpochEnd_EmptyActiveSet_NoSelfReport_NoRecover(t *testing.T) { + f := initFixture(t) + + _, sn0Acc, sn0Val := cryptotestutils.SupernodeAddresses() + + sn0 := sntypes.SuperNode{ + SupernodeAccount: sn0Acc.String(), + ValidatorAddress: sdk.ValAddress(sn0Val).String(), + } + + params := types.DefaultParams() + params.RequiredOpenPorts = []uint32{4444} + params.ConsecutiveEpochsToPostpone = 1 + + epochID := uint64(1) + + writeEmptyActiveSetAnchor(t, f, epochID) + + // sn0 did NOT submit any report this epoch — selfHostCompliant returns false. + + f.supernodeKeeper.EXPECT(). + GetAllSuperNodes(gomock.AssignableToTypeOf(f.ctx), sntypes.SuperNodeStateActive). + Return([]sntypes.SuperNode{}, nil). + Times(1) + f.supernodeKeeper.EXPECT(). + GetAllSuperNodes(gomock.AssignableToTypeOf(f.ctx), sntypes.SuperNodeStatePostponed). + Return([]sntypes.SuperNode{sn0}, nil). + Times(1) + + // Recovery MUST NOT be called — self-compliance gate blocked the bootstrap branch. + f.supernodeKeeper.EXPECT(). + RecoverSuperNodeFromPostponed(gomock.Any(), gomock.Any()). + Times(0) + + if err := f.keeper.EnforceEpochEnd(f.ctx, epochID, params); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +// TestEnforceEpochEnd_EmptyActiveSet_NonCompliantSelf_NoRecover verifies the +// bootstrap exception does NOT bypass the self-compliance health checks. +// A POSTPONED SN that submits a report violating the disk-usage minimum +// stays POSTPONED even when the active set is empty. +func TestEnforceEpochEnd_EmptyActiveSet_NonCompliantSelf_NoRecover(t *testing.T) { + f := initFixture(t) + + _, sn0Acc, sn0Val := cryptotestutils.SupernodeAddresses() + + sn0 := sntypes.SuperNode{ + SupernodeAccount: sn0Acc.String(), + ValidatorAddress: sdk.ValAddress(sn0Val).String(), + } + + params := types.DefaultParams() + params.RequiredOpenPorts = []uint32{4444} + params.ConsecutiveEpochsToPostpone = 1 + // Require at least 20% disk free; sn0 reports 95% usage → 5% free → not compliant. + params.MinDiskFreePercent = 20 + + epochID := uint64(1) + + writeEmptyActiveSetAnchor(t, f, epochID) + + // SetReport with non-zero DiskUsagePercent invokes the STORAGE_FULL + // transition source path, which queries supernodeKeeper. Stub these + // dependencies so the call lands cleanly without triggering a + // transition (we return "not found" → SetReport short-circuits). + f.supernodeKeeper.EXPECT(). + GetSuperNodeByAccount(gomock.AssignableToTypeOf(f.ctx), sn0.SupernodeAccount). + Return(sntypes.SuperNode{}, false, nil). + Times(1) + + if err := f.keeper.SetReport(f.ctx, types.EpochReport{ + SupernodeAccount: sn0.SupernodeAccount, + EpochId: epochID, + ReportHeight: f.ctx.BlockHeight(), + HostReport: types.HostReport{ + DiskUsagePercent: 95.0, // 5% free, below the 20% minimum + }, + }); err != nil { + t.Fatalf("failed to set report: %v", err) + } + + f.supernodeKeeper.EXPECT(). + GetAllSuperNodes(gomock.AssignableToTypeOf(f.ctx), sntypes.SuperNodeStateActive). + Return([]sntypes.SuperNode{}, nil). + Times(1) + f.supernodeKeeper.EXPECT(). + GetAllSuperNodes(gomock.AssignableToTypeOf(f.ctx), sntypes.SuperNodeStatePostponed). + Return([]sntypes.SuperNode{sn0}, nil). + Times(1) + + // Recovery MUST NOT be called — self-compliance gate blocked the bootstrap branch. + f.supernodeKeeper.EXPECT(). + RecoverSuperNodeFromPostponed(gomock.Any(), gomock.Any()). + Times(0) + + if err := f.keeper.EnforceEpochEnd(f.ctx, epochID, params); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +// TestEnforceEpochEnd_NonEmptyActiveSet_NoPeerObs_NoRecover verifies the +// legacy peer-port recovery rule is preserved when the active set is +// non-empty. A POSTPONED SN with a compliant self-report but NO peer +// observations stays POSTPONED — the bootstrap branch does not fire when +// probers exist (or could exist). +func TestEnforceEpochEnd_NonEmptyActiveSet_NoPeerObs_NoRecover(t *testing.T) { f := initFixture(t) _, sn0Acc, sn0Val := cryptotestutils.SupernodeAddresses() @@ -90,7 +242,7 @@ func TestEnforceEpochEnd_LegacyRecoveredSN_SurvivesWithReport(t *testing.T) { SupernodeAccount: sn0Acc.String(), ValidatorAddress: sdk.ValAddress(sn0Val).String(), } - sn1 := sntypes.SuperNode{ + sn1Active := sntypes.SuperNode{ SupernodeAccount: sn1Acc.String(), ValidatorAddress: sdk.ValAddress(sn1Val).String(), } @@ -101,42 +253,93 @@ func TestEnforceEpochEnd_LegacyRecoveredSN_SurvivesWithReport(t *testing.T) { epochID := uint64(1) - // Both supernodes submitted epoch reports (host-only, as they were - // POSTPONED when submitting — no storage challenge observations). - for _, sn := range []sntypes.SuperNode{sn0, sn1} { - err := f.keeper.SetReport(f.ctx, types.EpochReport{ - SupernodeAccount: sn.SupernodeAccount, - EpochId: epochID, - ReportHeight: f.ctx.BlockHeight(), - HostReport: types.HostReport{}, - }) - if err != nil { - t.Fatalf("failed to set report for %s: %v", sn.SupernodeAccount, err) - } + // Active set is non-empty (sn1 is active) — bootstrap branch must NOT fire. + writeNonEmptyActiveSetAnchor(t, f, epochID, []string{sn1Active.SupernodeAccount}) + + if err := f.keeper.SetReport(f.ctx, types.EpochReport{ + SupernodeAccount: sn0.SupernodeAccount, + EpochId: epochID, + ReportHeight: f.ctx.BlockHeight(), + HostReport: types.HostReport{}, // compliant + }); err != nil { + t.Fatalf("failed to set report: %v", err) } - // Simulate: both were recovered to ACTIVE mid-epoch via legacy metrics. - // At epoch end, the audit enforcement sees them as ACTIVE. - // Per LEP-6 §17: audit EnforceEpochEnd only queries Active nodes (not StorageFull). + // sn1 (active) submits no report → no peer observations about sn0. + f.supernodeKeeper.EXPECT(). GetAllSuperNodes(gomock.AssignableToTypeOf(f.ctx), sntypes.SuperNodeStateActive). - Return([]sntypes.SuperNode{sn0, sn1}, nil). + Return([]sntypes.SuperNode{sn1Active}, nil). Times(1) f.supernodeKeeper.EXPECT(). GetAllSuperNodes(gomock.AssignableToTypeOf(f.ctx), sntypes.SuperNodeStatePostponed). - Return([]sntypes.SuperNode{}, nil). + Return([]sntypes.SuperNode{sn0}, nil). Times(1) - // They have reports → no missing-report postponement. - // Host minimums are all 0 → no violation. - // No peer observations → peersPortStateMeetsThreshold returns false → no streak → no postponement. - // Expect: SetSuperNodePostponed is NEVER called. + // Legacy behavior: no peer all-open observation → no recovery. + f.supernodeKeeper.EXPECT(). + RecoverSuperNodeFromPostponed(gomock.Any(), gomock.Any()). + Times(0) + // sn1 (active) has no report → may be postponed for missing report, + // but consecutive_epochs_to_postpone=1 at epochID=1 means the streak + // check sees not-enough-history; mock both directions defensively. f.supernodeKeeper.EXPECT(). SetSuperNodePostponed(gomock.Any(), gomock.Any(), gomock.Any()). + AnyTimes() + + if err := f.keeper.EnforceEpochEnd(f.ctx, epochID, params); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +// TestEnforceEpochEnd_NoEpochAnchor_FallsThroughToLegacyPath verifies that +// when the epoch anchor is missing (e.g. a node that started mid-epoch +// before the anchor was persisted, or a test fixture that doesn't write +// one), the bootstrap branch does NOT fire and the legacy peer-port path +// runs unchanged. This is the safe default. +func TestEnforceEpochEnd_NoEpochAnchor_FallsThroughToLegacyPath(t *testing.T) { + f := initFixture(t) + + _, sn0Acc, sn0Val := cryptotestutils.SupernodeAddresses() + + sn0 := sntypes.SuperNode{ + SupernodeAccount: sn0Acc.String(), + ValidatorAddress: sdk.ValAddress(sn0Val).String(), + } + + params := types.DefaultParams() + params.RequiredOpenPorts = []uint32{4444} + params.ConsecutiveEpochsToPostpone = 1 + + epochID := uint64(1) + + // NO anchor written for this epoch. + + if err := f.keeper.SetReport(f.ctx, types.EpochReport{ + SupernodeAccount: sn0.SupernodeAccount, + EpochId: epochID, + ReportHeight: f.ctx.BlockHeight(), + HostReport: types.HostReport{}, + }); err != nil { + t.Fatalf("failed to set report: %v", err) + } + + f.supernodeKeeper.EXPECT(). + GetAllSuperNodes(gomock.AssignableToTypeOf(f.ctx), sntypes.SuperNodeStateActive). + Return([]sntypes.SuperNode{}, nil). + Times(1) + f.supernodeKeeper.EXPECT(). + GetAllSuperNodes(gomock.AssignableToTypeOf(f.ctx), sntypes.SuperNodeStatePostponed). + Return([]sntypes.SuperNode{sn0}, nil). + Times(1) + + // No anchor → bootstrap branch not taken → legacy peer-port path returns + // false (no peers, no observations) → recovery not invoked. + f.supernodeKeeper.EXPECT(). + RecoverSuperNodeFromPostponed(gomock.Any(), gomock.Any()). Times(0) - err := f.keeper.EnforceEpochEnd(f.ctx, epochID, params) - if err != nil { + if err := f.keeper.EnforceEpochEnd(f.ctx, epochID, params); err != nil { t.Fatalf("unexpected error: %v", err) } }