diff --git a/internal/planner/archive.go b/internal/planner/archive.go index a940c7c..04c30a6 100644 --- a/internal/planner/archive.go +++ b/internal/planner/archive.go @@ -5,8 +5,10 @@ import ( "strconv" seiconfig "github.com/sei-protocol/sei-config" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/task" ) type archiveNodePlanner struct { @@ -26,12 +28,35 @@ func (p *archiveNodePlanner) Validate(node *seiv1alpha1.SeiNode) error { func (p *archiveNodePlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) { if node.Status.Phase == seiv1alpha1.PhaseRunning { - return buildRunningPlan(node) + return p.buildRunningPlan(node) } - return buildBasePlan(node, node.Spec.Peers, nil, &seiconfig.ConfigIntent{ + intent := &seiconfig.ConfigIntent{ Mode: seiconfig.ModeArchive, Overrides: mergeOverrides(mergeOverrides(commonOverrides(node), p.controllerOverrides(node)), node.Spec.Overrides), - }) + } + return buildBasePlan(node, node.Spec.Peers, nil, intent) +} + +// buildRunningPlan returns the update plan for a Running archive node. +// Same shape as full nodes (no extra validation gates). +func (p *archiveNodePlanner) buildRunningPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) { + if imageDrifted(node) { + setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted", imageDriftMessage(node)) + prog := []string{ + task.TaskTypeApplyStatefulSet, + task.TaskTypeApplyService, + TaskConfigPatch, + TaskConfigValidate, + task.TaskTypeReplacePod, + task.TaskTypeObserveImage, + TaskMarkReady, + } + return assembleUpdatePlan(node, prog, externalAddressPatch(node)) + } + if sidecarNeedsReapproval(node) { + return buildMarkReadyPlan(node) + } + return nil, nil } func (p *archiveNodePlanner) controllerOverrides(node *seiv1alpha1.SeiNode) map[string]string { diff --git a/internal/planner/full.go b/internal/planner/full.go index f04596e..ebb64f5 100644 --- a/internal/planner/full.go +++ b/internal/planner/full.go @@ -4,8 +4,10 @@ import ( "fmt" seiconfig "github.com/sei-protocol/sei-config" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/task" ) type fullNodePlanner struct { @@ -30,17 +32,40 @@ func (p *fullNodePlanner) Validate(node *seiv1alpha1.SeiNode) error { func (p *fullNodePlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) { if node.Status.Phase == seiv1alpha1.PhaseRunning { - return buildRunningPlan(node) + return p.buildRunningPlan(node) } fn := node.Spec.FullNode - params := &seiconfig.ConfigIntent{ + intent := &seiconfig.ConfigIntent{ Mode: seiconfig.ModeFull, Overrides: mergeOverrides(mergeOverrides(commonOverrides(node), p.controllerOverrides(node)), node.Spec.Overrides), } if NeedsBootstrap(node) { - return buildBootstrapPlan(node, node.Spec.Peers, fn.Snapshot, params) + return buildBootstrapPlan(node, node.Spec.Peers, fn.Snapshot, intent) } - return buildBasePlan(node, node.Spec.Peers, fn.Snapshot, params) + return buildBasePlan(node, node.Spec.Peers, fn.Snapshot, intent) +} + +// buildRunningPlan returns the update plan for a Running full node, or +// nil if no drift. pelletier/go-toml/v2 does not preserve comments on +// re-encode — the first config-patch erases operator-added comments. +func (p *fullNodePlanner) buildRunningPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) { + if imageDrifted(node) { + setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted", imageDriftMessage(node)) + prog := []string{ + task.TaskTypeApplyStatefulSet, + task.TaskTypeApplyService, + TaskConfigPatch, + TaskConfigValidate, + task.TaskTypeReplacePod, + task.TaskTypeObserveImage, + TaskMarkReady, + } + return assembleUpdatePlan(node, prog, externalAddressPatch(node)) + } + if sidecarNeedsReapproval(node) { + return buildMarkReadyPlan(node) + } + return nil, nil } func (p *fullNodePlanner) controllerOverrides(node *seiv1alpha1.SeiNode) map[string]string { diff --git a/internal/planner/node_update_test.go b/internal/planner/node_update_test.go index 333bc66..12a6104 100644 --- a/internal/planner/node_update_test.go +++ b/internal/planner/node_update_test.go @@ -1,6 +1,7 @@ package planner import ( + "encoding/json" "fmt" "testing" @@ -12,7 +13,10 @@ import ( "github.com/sei-protocol/sei-k8s-controller/internal/task" ) -const testImageV2 = "sei:v2.0.0" +const ( + testImageV2 = "sei:v2.0.0" + testExternalAddrAtl = "syncer-0-0-p2p.atlantic-2.harbor.platform.sei.io:26656" +) // runningFullNode returns a SeiNode in the Running phase with currentImage matching spec.image. func runningFullNode() *seiv1alpha1.SeiNode { @@ -39,42 +43,60 @@ func planTaskTypes(plan *seiv1alpha1.TaskPlan) []string { return types } -// --- buildRunningPlan tests --- +// configPatchFromPlan finds the config-patch task in a plan and unmarshals +// its params back into a ConfigPatchTask for inspection. +func configPatchFromPlan(t *testing.T, plan *seiv1alpha1.TaskPlan) task.ConfigPatchTask { + t.Helper() + g := NewWithT(t) + for _, pt := range plan.Tasks { + if pt.Type != TaskConfigPatch { + continue + } + g.Expect(pt.Params).NotTo(BeNil()) + var p task.ConfigPatchTask + g.Expect(json.Unmarshal(pt.Params.Raw, &p)).To(Succeed()) + return p + } + t.Fatal("plan has no config-patch task") + return task.ConfigPatchTask{} +} + +// --- fullNodePlanner running-plan tests --- -func TestBuildRunningPlan_NoDrift_ReturnsNil(t *testing.T) { +func TestFullPlanner_NoDrift_ReturnsNil(t *testing.T) { g := NewWithT(t) node := runningFullNode() - // spec.image == status.currentImage — no drift - plan, err := buildRunningPlan(node) + plan, err := (&fullNodePlanner{}).BuildPlan(node) g.Expect(err).NotTo(HaveOccurred()) g.Expect(plan).To(BeNil(), "no plan should be built when there is no image drift") } -func TestBuildRunningPlan_ImageDrift_ReturnsNodeUpdatePlan(t *testing.T) { +func TestFullPlanner_ImageDrift_UpdateProgression(t *testing.T) { g := NewWithT(t) node := runningFullNode() - node.Spec.Image = testImageV2 // drift: spec != status.currentImage + node.Spec.Image = testImageV2 - plan, err := buildRunningPlan(node) + plan, err := (&fullNodePlanner{}).BuildPlan(node) g.Expect(err).NotTo(HaveOccurred()) g.Expect(plan).NotTo(BeNil(), "plan should be built for image drift") g.Expect(plan.Phase).To(Equal(seiv1alpha1.TaskPlanActive)) g.Expect(plan.TargetPhase).To(Equal(seiv1alpha1.PhaseRunning)) - // FailedPhase should be empty — failure retries rather than transitioning out of Running. + // FailedPhase deliberately empty — failure retries on next reconcile. g.Expect(string(plan.FailedPhase)).To(BeEmpty()) - got := planTaskTypes(plan) want := []string{ task.TaskTypeApplyStatefulSet, task.TaskTypeApplyService, + TaskConfigPatch, + TaskConfigValidate, task.TaskTypeReplacePod, task.TaskTypeObserveImage, TaskMarkReady, } - g.Expect(got).To(Equal(want), "NodeUpdate plan should have exactly these tasks in order") + g.Expect(planTaskTypes(plan)).To(Equal(want)) - // All tasks should start Pending with non-empty IDs and params. + // Tasks start Pending with non-empty IDs and params. for _, pt := range plan.Tasks { g.Expect(pt.Status).To(Equal(seiv1alpha1.TaskPending), "task %s should start Pending", pt.Type) g.Expect(pt.ID).NotTo(BeEmpty(), "task %s should have an ID", pt.Type) @@ -82,12 +104,270 @@ func TestBuildRunningPlan_ImageDrift_ReturnsNodeUpdatePlan(t *testing.T) { } } +// The update plan's config-patch must carry the external-address from +// Spec.ExternalAddress — the publishable-P2P propagation contract. +func TestFullPlanner_ConfigPatchCarriesExternalAddress(t *testing.T) { + g := NewWithT(t) + node := runningFullNode() + node.Spec.ExternalAddress = testExternalAddrAtl + node.Spec.Image = testImageV2 + + plan, err := (&fullNodePlanner{}).BuildPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(plan).NotTo(BeNil()) + + patch := configPatchFromPlan(t, plan) + g.Expect(patch.Files).To(HaveKey("config.toml")) + p2p, ok := patch.Files["config.toml"]["p2p"].(map[string]any) + g.Expect(ok).To(BeTrue(), "config.toml.p2p should be a nested map") + g.Expect(p2p).To(HaveKeyWithValue("external-address", testExternalAddrAtl)) +} + +// Two builds of the same spec must produce identical patch payloads +// (plan-construction is deterministic w.r.t. spec). +func TestFullPlanner_ConfigPatchIsDeterministic(t *testing.T) { + g := NewWithT(t) + node := runningFullNode() + node.Spec.ExternalAddress = testExternalAddrAtl + node.Spec.Image = testImageV2 + + plan1, err := (&fullNodePlanner{}).BuildPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + plan2, err := (&fullNodePlanner{}).BuildPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + + p1 := configPatchFromPlan(t, plan1) + p2 := configPatchFromPlan(t, plan2) + g.Expect(p1.Files).To(Equal(p2.Files), "same spec must produce same patch payload") +} + +// --- archive and replay planners share the full progression --- + +func TestArchivePlanner_ImageDrift_UpdateProgression(t *testing.T) { + g := NewWithT(t) + node := runningFullNode() + node.Spec.FullNode = nil + node.Spec.Archive = &seiv1alpha1.ArchiveSpec{} + node.Spec.Image = testImageV2 + + plan, err := (&archiveNodePlanner{}).BuildPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(plan).NotTo(BeNil()) + + g.Expect(planTaskTypes(plan)).To(Equal([]string{ + task.TaskTypeApplyStatefulSet, + task.TaskTypeApplyService, + TaskConfigPatch, + TaskConfigValidate, + task.TaskTypeReplacePod, + task.TaskTypeObserveImage, + TaskMarkReady, + })) +} + +// Replayer shares the full/archive update shape — symmetry test. +func TestReplayerPlanner_ImageDrift_UpdateProgression(t *testing.T) { + g := NewWithT(t) + node := runningFullNode() + node.Spec.FullNode = nil + node.Spec.Replayer = &seiv1alpha1.ReplayerSpec{} + node.Spec.Image = testImageV2 + + plan, err := (&replayerPlanner{}).BuildPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(plan).NotTo(BeNil()) + + g.Expect(planTaskTypes(plan)).To(Equal([]string{ + task.TaskTypeApplyStatefulSet, + task.TaskTypeApplyService, + TaskConfigPatch, + TaskConfigValidate, + task.TaskTypeReplacePod, + task.TaskTypeObserveImage, + TaskMarkReady, + })) +} + +// Convention: init writes the whole config via TaskConfigApply; a Running +// node's update plan patches via TaskConfigPatch. Never both in one plan. +// Stated as a doc rule on NodePlanner; this test enforces it. +func TestPlannerConvention_InitUsesApply_UpdateUsesPatch(t *testing.T) { + g := NewWithT(t) + + // Init: empty phase, empty currentImage. + initNode := runningFullNode() + initNode.Status.Phase = "" + initNode.Status.CurrentImage = "" + + initPlan, err := (&fullNodePlanner{}).BuildPlan(initNode) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(initPlan).NotTo(BeNil()) + initTypes := planTaskTypes(initPlan) + g.Expect(initTypes).To(ContainElement(TaskConfigApply), "init must include config-apply") + g.Expect(initTypes).NotTo(ContainElement(TaskConfigPatch), "init must NOT include config-patch") + + // Running update: spec.image diverges from status.currentImage. + updateNode := runningFullNode() + updateNode.Spec.Image = testImageV2 + + updatePlan, err := (&fullNodePlanner{}).BuildPlan(updateNode) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(updatePlan).NotTo(BeNil()) + updateTypes := planTaskTypes(updatePlan) + g.Expect(updateTypes).To(ContainElement(TaskConfigPatch), "update must include config-patch") + g.Expect(updateTypes).NotTo(ContainElement(TaskConfigApply), "update must NOT include config-apply") +} + +// Validator's update progression prepends the three key-validation gates +// so a missing/malformed secret aborts before any STS mutation. +func TestValidatorPlanner_ImageDrift_PrependsValidationGates(t *testing.T) { + g := NewWithT(t) + node := runningFullNode() + node.Spec.FullNode = nil + node.Spec.Validator = &seiv1alpha1.ValidatorSpec{} + node.Spec.Image = testImageV2 + + plan, err := (&validatorPlanner{}).BuildPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(plan).NotTo(BeNil()) + + g.Expect(planTaskTypes(plan)).To(Equal([]string{ + task.TaskTypeValidateSigningKey, + task.TaskTypeValidateNodeKey, + task.TaskTypeValidateOperatorKeyring, + task.TaskTypeApplyStatefulSet, + task.TaskTypeApplyService, + TaskConfigPatch, + TaskConfigValidate, + task.TaskTypeReplacePod, + task.TaskTypeObserveImage, + TaskMarkReady, + })) +} + +// --- sidecar mark-ready re-apply --- + +func setSidecarReady(node *seiv1alpha1.SeiNode, status metav1.ConditionStatus, reason string) { + meta.SetStatusCondition(&node.Status.Conditions, metav1.Condition{ + Type: seiv1alpha1.ConditionSidecarReady, + Status: status, + Reason: reason, + Message: "test", + }) +} + +func TestFullPlanner_SidecarReady_NoPlan(t *testing.T) { + g := NewWithT(t) + node := runningFullNode() + setSidecarReady(node, metav1.ConditionTrue, "Ready") + + plan, err := (&fullNodePlanner{}).BuildPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(plan).To(BeNil()) +} + +func TestFullPlanner_SidecarNotReady_ReturnsMarkReadyPlan(t *testing.T) { + g := NewWithT(t) + node := runningFullNode() + setSidecarReady(node, metav1.ConditionFalse, "NotReady") + + plan, err := (&fullNodePlanner{}).BuildPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(plan).NotTo(BeNil()) + g.Expect(plan.Phase).To(Equal(seiv1alpha1.TaskPlanActive)) + g.Expect(plan.TargetPhase).To(Equal(seiv1alpha1.PhaseRunning)) + g.Expect(string(plan.FailedPhase)).To(BeEmpty()) + g.Expect(planTaskTypes(plan)).To(Equal([]string{TaskMarkReady})) +} + +func TestFullPlanner_SidecarUnknown_NoPlan(t *testing.T) { + g := NewWithT(t) + node := runningFullNode() + setSidecarReady(node, metav1.ConditionUnknown, "Unreachable") + + plan, err := (&fullNodePlanner{}).BuildPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(plan).To(BeNil(), "Unknown should not trigger a plan — re-probe next tick") +} + +func TestFullPlanner_ImageDriftWinsOverSidecar(t *testing.T) { + g := NewWithT(t) + node := runningFullNode() + node.Spec.Image = testImageV2 + setSidecarReady(node, metav1.ConditionFalse, "NotReady") + + plan, err := (&fullNodePlanner{}).BuildPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(plan).NotTo(BeNil()) + g.Expect(plan.Tasks).To(HaveLen(7), "should be full update plan, not one-task mark-ready") + g.Expect(planTaskTypes(plan)).To(Equal([]string{ + task.TaskTypeApplyStatefulSet, + task.TaskTypeApplyService, + TaskConfigPatch, + TaskConfigValidate, + task.TaskTypeReplacePod, + task.TaskTypeObserveImage, + TaskMarkReady, + })) +} + +func TestBuildMarkReadyPlan_FreshIDEveryCall(t *testing.T) { + g := NewWithT(t) + node := runningFullNode() + + p1, err := buildMarkReadyPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + p2, err := buildMarkReadyPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + + g.Expect(p1.ID).NotTo(Equal(p2.ID)) + g.Expect(p1.Tasks[0].ID).NotTo(Equal(p2.Tasks[0].ID)) +} + +func TestSidecarNeedsReapproval(t *testing.T) { + g := NewWithT(t) + + node := runningFullNode() + g.Expect(sidecarNeedsReapproval(node)).To(BeFalse()) + + setSidecarReady(node, metav1.ConditionTrue, "Ready") + g.Expect(sidecarNeedsReapproval(node)).To(BeFalse()) + + setSidecarReady(node, metav1.ConditionUnknown, "Unreachable") + g.Expect(sidecarNeedsReapproval(node)).To(BeFalse()) + + setSidecarReady(node, metav1.ConditionFalse, "SomethingElse") + g.Expect(sidecarNeedsReapproval(node)).To(BeFalse()) + + setSidecarReady(node, metav1.ConditionFalse, "NotReady") + g.Expect(sidecarNeedsReapproval(node)).To(BeTrue()) +} + +func TestBuildRunningPlan_UniqueIDs(t *testing.T) { + g := NewWithT(t) + node := runningFullNode() + node.Spec.Image = testImageV2 + + plan1, err := (&fullNodePlanner{}).BuildPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + plan2, err := (&fullNodePlanner{}).BuildPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + + g.Expect(plan1.ID).NotTo(Equal(plan2.ID), "separate plan builds should have unique IDs") + + seen := map[string]bool{} + for _, tsk := range plan1.Tasks { + g.Expect(seen[tsk.ID]).To(BeFalse(), "duplicate task ID: %s", tsk.ID) + seen[tsk.ID] = true + } +} + // --- ResolvePlan condition tests --- func TestResolvePlan_NodeUpdate_SetsCondition(t *testing.T) { g := NewWithT(t) node := runningFullNode() - node.Spec.Image = testImageV2 // drift triggers NodeUpdate plan + node.Spec.Image = testImageV2 err := (&NodeResolver{}).ResolvePlan(t.Context(), node) g.Expect(err).NotTo(HaveOccurred()) @@ -105,22 +385,17 @@ func TestResolvePlan_CompletedPlan_ClearsCondition(t *testing.T) { node := runningFullNode() node.Spec.Image = testImageV2 - // Build a NodeUpdate plan and simulate completion. err := (&NodeResolver{}).ResolvePlan(t.Context(), node) g.Expect(err).NotTo(HaveOccurred()) g.Expect(node.Status.Plan).NotTo(BeNil()) - // Verify the condition was set. cond := meta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionNodeUpdateInProgress) g.Expect(cond).NotTo(BeNil()) g.Expect(cond.Status).To(Equal(metav1.ConditionTrue)) - // Mark the plan completed. node.Status.Plan.Phase = seiv1alpha1.TaskPlanComplete - // Also converge currentImage so a new plan is not built. node.Status.CurrentImage = testImageV2 - // ResolvePlan should clear the completed plan and the condition. err = (&NodeResolver{}).ResolvePlan(t.Context(), node) g.Expect(err).NotTo(HaveOccurred()) @@ -136,7 +411,6 @@ func TestResolvePlan_FailedPlan_ClearsCondition(t *testing.T) { node := runningFullNode() node.Spec.Image = testImageV2 - // Build a NodeUpdate plan and simulate failure. err := (&NodeResolver{}).ResolvePlan(t.Context(), node) g.Expect(err).NotTo(HaveOccurred()) g.Expect(node.Status.Plan).NotTo(BeNil()) @@ -150,13 +424,9 @@ func TestResolvePlan_FailedPlan_ClearsCondition(t *testing.T) { Error: "apply error", } - // ResolvePlan should clear the failed plan. Since drift still exists, - // it immediately builds a new NodeUpdate plan and sets the condition - // back to True. This is correct — automatic retry on failure. err = (&NodeResolver{}).ResolvePlan(t.Context(), node) g.Expect(err).NotTo(HaveOccurred()) - // A new plan was built because drift still exists. g.Expect(node.Status.Plan).NotTo(BeNil(), "new plan should be built because drift persists") g.Expect(node.Status.Plan.Phase).To(Equal(seiv1alpha1.TaskPlanActive)) @@ -168,7 +438,7 @@ func TestResolvePlan_FailedPlan_ClearsCondition(t *testing.T) { func TestResolvePlan_ResumesActivePlan(t *testing.T) { g := NewWithT(t) node := runningFullNode() - node.Spec.Image = testImageV2 // drift exists + node.Spec.Image = testImageV2 existingPlan := &seiv1alpha1.TaskPlan{ ID: "existing-plan-123", @@ -185,19 +455,15 @@ func TestResolvePlan_ResumesActivePlan(t *testing.T) { "active plan should be resumed, not replaced") } -// --- Additional edge cases --- - func TestResolvePlan_CompletedNonUpdatePlan_DoesNotClearCondition(t *testing.T) { g := NewWithT(t) node := runningFullNode() - // Manually set the condition (as if a previous NodeUpdate plan set it). meta.SetStatusCondition(&node.Status.Conditions, metav1.Condition{ Type: seiv1alpha1.ConditionNodeUpdateInProgress, Status: metav1.ConditionFalse, Reason: "UpdateComplete", }) - // Create a completed plan without observe-image (not a NodeUpdate plan). node.Status.Plan = &seiv1alpha1.TaskPlan{ ID: "non-update-plan", Phase: seiv1alpha1.TaskPlanComplete, @@ -209,7 +475,6 @@ func TestResolvePlan_CompletedNonUpdatePlan_DoesNotClearCondition(t *testing.T) err := (&NodeResolver{}).ResolvePlan(t.Context(), node) g.Expect(err).NotTo(HaveOccurred()) - // The condition should remain unchanged (already False from before). cond := meta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionNodeUpdateInProgress) g.Expect(cond).NotTo(BeNil()) g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) @@ -217,26 +482,7 @@ func TestResolvePlan_CompletedNonUpdatePlan_DoesNotClearCondition(t *testing.T) "reason should not be overwritten by a non-update plan completion") } -func TestBuildRunningPlan_UniqueIDs(t *testing.T) { - g := NewWithT(t) - node := runningFullNode() - node.Spec.Image = testImageV2 - - plan1, err := buildRunningPlan(node) - g.Expect(err).NotTo(HaveOccurred()) - - plan2, err := buildRunningPlan(node) - g.Expect(err).NotTo(HaveOccurred()) - - g.Expect(plan1.ID).NotTo(Equal(plan2.ID), "separate plan builds should have unique IDs") - - // Task IDs within a single plan should all be unique. - seen := map[string]bool{} - for _, tsk := range plan1.Tasks { - g.Expect(seen[tsk.ID]).To(BeFalse(), "duplicate task ID: %s", tsk.ID) - seen[tsk.ID] = true - } -} +// --- handleTerminalPlan tests (unchanged) --- func TestHandleTerminalPlan_CompletedWithUpdateCondition(t *testing.T) { g := NewWithT(t) @@ -291,7 +537,6 @@ func TestHandleTerminalPlan_NilPlan(t *testing.T) { g := NewWithT(t) node := runningFullNode() node.Status.Plan = nil - // Should be a no-op — no panic. handleTerminalPlan(t.Context(), node) g.Expect(node.Status.Plan).To(BeNil()) } @@ -325,105 +570,3 @@ func TestPlanFailureMessage_NoDetail(t *testing.T) { plan := &seiv1alpha1.TaskPlan{} g.Expect(planFailureMessage(plan)).To(Equal("unknown")) } - -// --- sidecar mark-ready re-apply tests --- - -func setSidecarReady(node *seiv1alpha1.SeiNode, status metav1.ConditionStatus, reason string) { - meta.SetStatusCondition(&node.Status.Conditions, metav1.Condition{ - Type: seiv1alpha1.ConditionSidecarReady, - Status: status, - Reason: reason, - Message: "test", - }) -} - -func TestBuildRunningPlan_SidecarReady_NoPlan(t *testing.T) { - g := NewWithT(t) - node := runningFullNode() - setSidecarReady(node, metav1.ConditionTrue, "Ready") - - plan, err := buildRunningPlan(node) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(plan).To(BeNil()) -} - -func TestBuildRunningPlan_SidecarNotReady_ReturnsMarkReadyPlan(t *testing.T) { - g := NewWithT(t) - node := runningFullNode() - setSidecarReady(node, metav1.ConditionFalse, "NotReady") - - plan, err := buildRunningPlan(node) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(plan).NotTo(BeNil()) - g.Expect(plan.Phase).To(Equal(seiv1alpha1.TaskPlanActive)) - g.Expect(plan.TargetPhase).To(Equal(seiv1alpha1.PhaseRunning)) - g.Expect(string(plan.FailedPhase)).To(BeEmpty()) - g.Expect(planTaskTypes(plan)).To(Equal([]string{TaskMarkReady})) -} - -func TestBuildRunningPlan_SidecarUnknown_NoPlan(t *testing.T) { - g := NewWithT(t) - node := runningFullNode() - setSidecarReady(node, metav1.ConditionUnknown, "Unreachable") - - plan, err := buildRunningPlan(node) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(plan).To(BeNil(), "Unknown should not trigger a plan — re-probe next tick") -} - -func TestBuildRunningPlan_ImageDriftWinsOverSidecar(t *testing.T) { - g := NewWithT(t) - node := runningFullNode() - node.Spec.Image = testImageV2 // image drift - setSidecarReady(node, metav1.ConditionFalse, "NotReady") - - plan, err := buildRunningPlan(node) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(plan).NotTo(BeNil()) - // Image update plan ends with MarkReady, which also resolves the sidecar. - g.Expect(plan.Tasks).To(HaveLen(5), "should be full node-update plan, not one-task mark-ready") - g.Expect(planTaskTypes(plan)).To(Equal([]string{ - task.TaskTypeApplyStatefulSet, - task.TaskTypeApplyService, - task.TaskTypeReplacePod, - task.TaskTypeObserveImage, - TaskMarkReady, - })) -} - -func TestBuildMarkReadyPlan_FreshIDEveryCall(t *testing.T) { - g := NewWithT(t) - node := runningFullNode() - - p1, err := buildMarkReadyPlan(node) - g.Expect(err).NotTo(HaveOccurred()) - p2, err := buildMarkReadyPlan(node) - g.Expect(err).NotTo(HaveOccurred()) - - g.Expect(p1.ID).NotTo(Equal(p2.ID)) - g.Expect(p1.Tasks[0].ID).NotTo(Equal(p2.Tasks[0].ID)) -} - -func TestSidecarNeedsReapproval(t *testing.T) { - g := NewWithT(t) - - // missing condition - node := runningFullNode() - g.Expect(sidecarNeedsReapproval(node)).To(BeFalse()) - - // True - setSidecarReady(node, metav1.ConditionTrue, "Ready") - g.Expect(sidecarNeedsReapproval(node)).To(BeFalse()) - - // Unknown - setSidecarReady(node, metav1.ConditionUnknown, "Unreachable") - g.Expect(sidecarNeedsReapproval(node)).To(BeFalse()) - - // False + wrong reason - setSidecarReady(node, metav1.ConditionFalse, "SomethingElse") - g.Expect(sidecarNeedsReapproval(node)).To(BeFalse()) - - // False + NotReady - setSidecarReady(node, metav1.ConditionFalse, "NotReady") - g.Expect(sidecarNeedsReapproval(node)).To(BeTrue()) -} diff --git a/internal/planner/planner.go b/internal/planner/planner.go index 1b62a92..ae5b28c 100644 --- a/internal/planner/planner.go +++ b/internal/planner/planner.go @@ -32,6 +32,7 @@ const ( TaskConfigureGenesis = sidecar.TaskTypeConfigureGenesis TaskConfigureStateSync = sidecar.TaskTypeConfigureStateSync TaskConfigApply = sidecar.TaskTypeConfigApply + TaskConfigPatch = sidecar.TaskTypeConfigPatch TaskConfigValidate = sidecar.TaskTypeConfigValidate TaskMarkReady = sidecar.TaskTypeMarkReady TaskSnapshotUpload = sidecar.TaskTypeSnapshotUpload @@ -57,8 +58,12 @@ var baseProgression = map[string][]string{ "genesis": {TaskConfigApply, TaskConfigValidate, TaskMarkReady}, } -// NodePlanner encapsulates mode-specific logic for validating a SeiNode -// and building its initialization task plan with fully embedded params. +// NodePlanner encapsulates mode-specific plan construction. BuildPlan +// dispatches internally between startup (init) and an existing +// running resource — callers don't see the distinction. +// +// Convention: init writes the whole config via TaskConfigApply; an +// existing resource patches only controller-owned keys via TaskConfigPatch. type NodePlanner interface { Validate(node *seiv1alpha1.SeiNode) error BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) @@ -702,24 +707,14 @@ func commonOverrides(node *seiv1alpha1.SeiNode) map[string]string { return out } -// buildRunningPlan returns a steady-state drift plan, or nil if no drift. -// Image drift is checked first — its plan ends with MarkReady, so it also -// resolves any stale sidecar. -func buildRunningPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) { - if node.Spec.Image != node.Status.CurrentImage { - return buildNodeUpdatePlan(node) - } - if sidecarNeedsReapproval(node) { - return buildMarkReadyPlan(node) - } - return nil, nil -} - +// sidecarNeedsReapproval reports whether the sidecar has been observed +// to have lost readiness. Mode-agnostic. func sidecarNeedsReapproval(node *seiv1alpha1.SeiNode) bool { cond := meta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionSidecarReady) return cond != nil && cond.Status == metav1.ConditionFalse && cond.Reason == "NotReady" } +// buildMarkReadyPlan is the single-task plan that re-marks sidecar readiness. func buildMarkReadyPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) { planID := uuid.New().String() t, err := buildPlannedTask(planID, sidecar.TaskTypeMarkReady, 0, paramsForTaskType(node, sidecar.TaskTypeMarkReady, nil, nil)) @@ -734,28 +729,44 @@ func buildMarkReadyPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error }, nil } -// buildNodeUpdatePlan constructs a plan to roll out an image update on a -// Running node. The plan applies the new StatefulSet spec, waits for the -// rollout to complete, then re-initializes the sidecar. -// -// FailedPhase is deliberately empty: a failure retries on the next reconcile -// rather than transitioning the node out of Running. -func buildNodeUpdatePlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) { - setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted", - fmt.Sprintf("image drift detected: spec=%s current=%s", node.Spec.Image, node.Status.CurrentImage)) +func imageDrifted(node *seiv1alpha1.SeiNode) bool { + return node.Spec.Image != node.Status.CurrentImage +} - prog := []string{ - task.TaskTypeApplyStatefulSet, - task.TaskTypeApplyService, - task.TaskTypeReplacePod, - task.TaskTypeObserveImage, - sidecar.TaskTypeMarkReady, +// imageDriftMessage formats the standard NodeUpdateInProgress message +// for image-drift-triggered plans, used by every mode planner's +// buildRunningPlan before calling assembleUpdatePlan. +func imageDriftMessage(node *seiv1alpha1.SeiNode) string { + return fmt.Sprintf("image drift detected: spec=%s current=%s", node.Spec.Image, node.Status.CurrentImage) +} + +// externalAddressPatch is the config.toml patch that stamps the +// publishable-P2P external address. An empty Spec.ExternalAddress +// stamps an empty value so opt-out reaches the pod symmetrically. +func externalAddressPatch(node *seiv1alpha1.SeiNode) map[string]map[string]any { + return map[string]map[string]any{ + "config.toml": { + "p2p": map[string]any{ + "external-address": node.Spec.ExternalAddress, + }, + }, } +} +// assembleUpdatePlan composes a task progression for a Running node into +// a TaskPlan. Pure boilerplate (planID, task ordering, param marshaling, +// phase fields) — the progression slice is the per-mode authorship. +// Callers own the NodeUpdateInProgress condition: stamp it before calling +// so the reason/message reflects the actual trigger. +// +// FailedPhase is deliberately empty: a failure retries on the next reconcile +// rather than transitioning the node out of Running. +func assembleUpdatePlan(node *seiv1alpha1.SeiNode, prog []string, patch map[string]map[string]any) (*seiv1alpha1.TaskPlan, error) { planID := uuid.New().String() tasks := make([]seiv1alpha1.PlannedTask, len(prog)) for i, taskType := range prog { - t, err := buildPlannedTask(planID, taskType, i, paramsForTaskType(node, taskType, nil, nil)) + params := paramsForUpdateTask(node, taskType, patch) + t, err := buildPlannedTask(planID, taskType, i, params) if err != nil { return nil, err } @@ -769,6 +780,16 @@ func buildNodeUpdatePlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, erro }, nil } +// paramsForUpdateTask returns the config-patch params for TaskConfigPatch +// and delegates everything else to paramsForTaskType with nil intent — +// update plans on a Running node never carry a ConfigIntent. +func paramsForUpdateTask(node *seiv1alpha1.SeiNode, taskType string, patch map[string]map[string]any) any { + if taskType == TaskConfigPatch { + return task.ConfigPatchTask{Files: patch} + } + return paramsForTaskType(node, taskType, nil, nil) +} + // mergeOverrides combines controller-generated overrides with user-specified // overrides. User overrides take precedence. func mergeOverrides(controllerOverrides, userOverrides map[string]string) map[string]string { diff --git a/internal/planner/replay.go b/internal/planner/replay.go index e6ec1dc..594fce0 100644 --- a/internal/planner/replay.go +++ b/internal/planner/replay.go @@ -4,8 +4,10 @@ import ( "fmt" seiconfig "github.com/sei-protocol/sei-config" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/task" ) type replayerPlanner struct { @@ -35,16 +37,38 @@ func (p *replayerPlanner) Validate(node *seiv1alpha1.SeiNode) error { func (p *replayerPlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) { if node.Status.Phase == seiv1alpha1.PhaseRunning { - return buildRunningPlan(node) + return p.buildRunningPlan(node) } - params := &seiconfig.ConfigIntent{ + intent := &seiconfig.ConfigIntent{ Mode: seiconfig.ModeFull, Overrides: mergeOverrides(mergeOverrides(commonOverrides(node), p.controllerOverrides()), node.Spec.Overrides), } if NeedsBootstrap(node) { - return buildBootstrapPlan(node, node.Spec.Peers, &node.Spec.Replayer.Snapshot, params) + return buildBootstrapPlan(node, node.Spec.Peers, &node.Spec.Replayer.Snapshot, intent) } - return buildBasePlan(node, node.Spec.Peers, &node.Spec.Replayer.Snapshot, params) + return buildBasePlan(node, node.Spec.Peers, &node.Spec.Replayer.Snapshot, intent) +} + +// buildRunningPlan returns the update plan for a Running replayer node. +// Same shape as full and archive. +func (p *replayerPlanner) buildRunningPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) { + if imageDrifted(node) { + setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted", imageDriftMessage(node)) + prog := []string{ + task.TaskTypeApplyStatefulSet, + task.TaskTypeApplyService, + TaskConfigPatch, + TaskConfigValidate, + task.TaskTypeReplacePod, + task.TaskTypeObserveImage, + TaskMarkReady, + } + return assembleUpdatePlan(node, prog, externalAddressPatch(node)) + } + if sidecarNeedsReapproval(node) { + return buildMarkReadyPlan(node) + } + return nil, nil } func (p *replayerPlanner) controllerOverrides() map[string]string { diff --git a/internal/planner/validator.go b/internal/planner/validator.go index 88157b5..602239f 100644 --- a/internal/planner/validator.go +++ b/internal/planner/validator.go @@ -4,8 +4,10 @@ import ( "fmt" seiconfig "github.com/sei-protocol/sei-config" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/task" ) type validatorPlanner struct { @@ -94,18 +96,45 @@ func validateOperatorKeyringDistinctness(v *seiv1alpha1.ValidatorSpec) error { func (p *validatorPlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) { if node.Status.Phase == seiv1alpha1.PhaseRunning { - return buildRunningPlan(node) + return p.buildRunningPlan(node) } if isGenesisCeremonyNode(node) { return buildGenesisPlan(node) } v := node.Spec.Validator - params := &seiconfig.ConfigIntent{ + intent := &seiconfig.ConfigIntent{ Mode: seiconfig.ModeValidator, Overrides: mergeOverrides(commonOverrides(node), node.Spec.Overrides), } if NeedsBootstrap(node) { - return buildBootstrapPlan(node, node.Spec.Peers, v.Snapshot, params) + return buildBootstrapPlan(node, node.Spec.Peers, v.Snapshot, intent) } - return buildBasePlan(node, node.Spec.Peers, v.Snapshot, params) + return buildBasePlan(node, node.Spec.Peers, v.Snapshot, intent) +} + +// buildRunningPlan returns the update plan for a Running validator. The +// key-validation gates run before any STS mutation so a missing or +// malformed secret aborts with a clear controller-side error rather than +// a kubelet volume-mount failure on the recreated pod. +func (p *validatorPlanner) buildRunningPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) { + if imageDrifted(node) { + setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted", imageDriftMessage(node)) + prog := []string{ + task.TaskTypeValidateSigningKey, + task.TaskTypeValidateNodeKey, + task.TaskTypeValidateOperatorKeyring, + task.TaskTypeApplyStatefulSet, + task.TaskTypeApplyService, + TaskConfigPatch, + TaskConfigValidate, + task.TaskTypeReplacePod, + task.TaskTypeObserveImage, + TaskMarkReady, + } + return assembleUpdatePlan(node, prog, externalAddressPatch(node)) + } + if sidecarNeedsReapproval(node) { + return buildMarkReadyPlan(node) + } + return nil, nil } diff --git a/internal/task/config_patch.go b/internal/task/config_patch.go new file mode 100644 index 0000000..b5696e6 --- /dev/null +++ b/internal/task/config_patch.go @@ -0,0 +1,23 @@ +package task + +import ( + sidecar "github.com/sei-protocol/seictl/sidecar/client" +) + +// ConfigPatchTask stamps controller-owned TOML keys into named seid +// config files via a generic merge-and-write on the sidecar — no +// sei-config involvement. JSON tag matches the wire format +// sidecar.ConfigPatchTask emits. +type ConfigPatchTask struct { + Files map[string]map[string]any `json:"files"` +} + +func (t ConfigPatchTask) TaskType() string { return sidecar.TaskTypeConfigPatch } + +func (t ConfigPatchTask) Validate() error { + return sidecar.ConfigPatchTask{Files: t.Files}.Validate() +} + +func (t ConfigPatchTask) ToTaskRequest() sidecar.TaskRequest { + return sidecar.ConfigPatchTask{Files: t.Files}.ToTaskRequest() +} diff --git a/internal/task/config_patch_test.go b/internal/task/config_patch_test.go new file mode 100644 index 0000000..9bd832b --- /dev/null +++ b/internal/task/config_patch_test.go @@ -0,0 +1,34 @@ +package task + +import ( + "encoding/json" + "testing" + + . "github.com/onsi/gomega" +) + +// The sidecar handler keys its deserialization on a literal "files" JSON +// field (sidecar/tasks/config.go ConfigPatchRequest). Dropping the json +// tag on ConfigPatchTask.Files would silently break every patch at +// runtime — Go's default would emit "Files" capitalized and the sidecar +// would see an empty params block. Pin the wire format here so a +// regression in the tag fails fast at unit-test time. +func TestConfigPatchTask_JSONWireFormatUsesFilesKey(t *testing.T) { + g := NewWithT(t) + in := ConfigPatchTask{Files: map[string]map[string]any{ + "config.toml": {"p2p": map[string]any{"external-address": "host:26656"}}, + }} + + raw, err := json.Marshal(in) + g.Expect(err).NotTo(HaveOccurred()) + + var asMap map[string]any + g.Expect(json.Unmarshal(raw, &asMap)).To(Succeed()) + g.Expect(asMap).To(HaveKey("files"), "sidecar deserializes by 'files' key — drop the json tag and patches silently no-op") + g.Expect(asMap).NotTo(HaveKey("Files"), "capitalised 'Files' would mean the json tag was dropped") + + // Roundtrip back into the controller-side type. + var out ConfigPatchTask + g.Expect(json.Unmarshal(raw, &out)).To(Succeed()) + g.Expect(out.Files).To(Equal(in.Files)) +} diff --git a/internal/task/task.go b/internal/task/task.go index 5705f7d..90f02e6 100644 --- a/internal/task/task.go +++ b/internal/task/task.go @@ -204,6 +204,7 @@ var registry = map[string]taskDeserializer{ sidecar.TaskTypeConfigureStateSync: sidecarTask[sidecar.ConfigureStateSyncTask](false), sidecar.TaskTypeAwaitCondition: sidecarTask[sidecar.AwaitConditionTask](false), sidecar.TaskTypeConfigApply: sidecarTask[configApplyTask](false), + sidecar.TaskTypeConfigPatch: sidecarTask[ConfigPatchTask](false), sidecar.TaskTypeConfigValidate: sidecarTask[sidecar.ConfigValidateTask](true), sidecar.TaskTypeConfigureGenesis: sidecarTask[sidecar.ConfigureGenesisTask](false), sidecar.TaskTypeDiscoverPeers: sidecarTask[sidecar.DiscoverPeersTask](false),