Skip to content
13 changes: 5 additions & 8 deletions internal/command/deploy/machines_launchinput.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package deploy

import (
"fmt"
"slices"
"strconv"
"strings"

Expand Down Expand Up @@ -297,18 +298,14 @@ func skipLaunch(origMachineRaw *fly.Machine, mConfig *fly.MachineConfig) bool {
}

switch {
case state == fly.MachineStateStarted:
case slices.Contains([]string{fly.MachineStateStarted, "starting", "failed"}, state):
return false
case len(mConfig.Standbys) > 0:
return true
case state == fly.MachineStateStopped, state == fly.MachineStateSuspended:
for _, s := range mConfig.Services {
if (s.Autostop != nil && *s.Autostop != fly.MachineAutostopOff) || (s.Autostart != nil && *s.Autostart) {
return true
}
}
case origMachineRaw == nil:
return false
}
return false
return true
}

// updateContainerImage sets container.Image = mConfig.Image in any container where image == "."
Expand Down
5 changes: 3 additions & 2 deletions internal/command/deploy/machines_launchinput_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import (
)

func makeTerminalLoggerQuiet(tb testing.TB) {
var originalLogger = terminal.DefaultLogger
originalLogger := terminal.DefaultLogger
terminal.DefaultLogger = logger.New(os.Stdout, logger.Error, true)

tb.Cleanup(func() {
Expand Down Expand Up @@ -85,6 +85,7 @@ func testLaunchInputForBasic(t *testing.T) {
Region: li.Region,
Config: helpers.Clone(li.Config),
HostStatus: fly.HostStatusOk,
State: fly.MachineStateStarted,
}
// also must preserve any user's added metadata except for known fly metadata keys
origMachineRaw.Config.Metadata["user-added-me"] = "keep it"
Expand All @@ -104,6 +105,7 @@ func testLaunchInputForBasic(t *testing.T) {
Region: li.Region,
Config: helpers.Clone(li.Config),
HostStatus: fly.HostStatusOk,
State: fly.MachineStateStarted,
}
want.Config.Image = "super/globe"
want.Config.Env["NOT_SET_ON_RESTART_ONLY"] = "true"
Expand Down Expand Up @@ -253,7 +255,6 @@ func testLaunchInputForOnMounts(t *testing.T) {
assert.Equal(t, "ab1234567890", li.ID)
assert.True(t, li.RequiresReplacement)
assert.Empty(t, li.Config.Mounts)

}

// test mounts with auto volume resize
Expand Down
3 changes: 3 additions & 0 deletions internal/command/deploy/machines_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ func Test_resolveUpdatedMachineConfig_Mounts(t *testing.T) {
}, li)

origMachine := &fly.Machine{
State: fly.MachineStateStarted,
HostStatus: fly.HostStatusOk,
Config: &fly.MachineConfig{
Mounts: []fly.MachineMount{{
Expand Down Expand Up @@ -329,6 +330,7 @@ func Test_resolveUpdatedMachineConfig_restartOnly(t *testing.T) {
md.img = "SHOULD-NOT-USE-THIS-TAG"

origMachine := &fly.Machine{
State: fly.MachineStateStarted,
HostStatus: fly.HostStatusOk,
ID: "OrigID",
Config: &fly.MachineConfig{
Expand Down Expand Up @@ -371,6 +373,7 @@ func Test_resolveUpdatedMachineConfig_restartOnlyProcessGroup(t *testing.T) {
md.img = "SHOULD-NOT-USE-THIS-TAG"

origMachine := &fly.Machine{
State: fly.MachineStateStarted,
HostStatus: fly.HostStatusOk,
ID: "OrigID",
Config: &fly.MachineConfig{
Expand Down
1 change: 1 addition & 0 deletions internal/command/machine/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Machines REST fly.`
newStart(),
newStop(),
newStatus(),
newWait(),
newProxy(),
newClone(),
newUpdate(),
Expand Down
15 changes: 1 addition & 14 deletions internal/command/machine/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import (
"github.com/superfly/flyctl/internal/flag"
"github.com/superfly/flyctl/internal/flyerr"
mach "github.com/superfly/flyctl/internal/machine"
"github.com/superfly/flyctl/internal/watch"
)

func newUpdate() *cobra.Command {
Expand Down Expand Up @@ -79,9 +78,7 @@ func newUpdate() *cobra.Command {

func runUpdate(ctx context.Context) (err error) {
var (
io = iostreams.FromContext(ctx)
colorize = io.ColorScheme()

io = iostreams.FromContext(ctx)
autoConfirm = flag.GetBool(ctx, "yes")
skipHealthChecks = flag.GetBool(ctx, "skip-health-checks")
skipStart = flag.GetBool(ctx, "skip-start")
Expand Down Expand Up @@ -169,20 +166,10 @@ func runUpdate(ctx context.Context) (err error) {
Descript: timeoutErr.Description(),
Suggest: "Try increasing the --wait-timeout",
}

}
return err
}

if !(input.SkipLaunch || flag.GetDetach(ctx)) {
fmt.Fprintln(io.Out, colorize.Green("==> "+"Monitoring health checks"))

if err := watch.MachinesChecks(ctx, appName, []*fly.Machine{machine}); err != nil {
return err
}
fmt.Fprintln(io.Out)
}

fmt.Fprintf(io.Out, "\nMonitor machine status here:\nhttps://fly.io/apps/%s/machines/%s\n", appName, machine.ID)

return nil
Expand Down
185 changes: 185 additions & 0 deletions internal/command/machine/wait.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
package machine

import (
"context"
"errors"
"fmt"
"net/http"
"strings"
"time"

"github.com/spf13/cobra"
"github.com/superfly/fly-go/flaps"
"github.com/superfly/flyctl/internal/appconfig"
"github.com/superfly/flyctl/internal/command"
"github.com/superfly/flyctl/internal/flag"
"github.com/superfly/flyctl/internal/flapsutil"
"github.com/superfly/flyctl/iostreams"
)

func newWait() *cobra.Command {
const (
short = "Wait for a machine to reach a state"
long = short + "\n"

usage = "wait [id]"
)

cmd := command.New(usage, short, long, runMachineWait,
command.RequireSession,
command.LoadAppNameIfPresent,
)

cmd.Args = cobra.RangeArgs(0, 1)

flag.Add(
cmd,
flag.App(),
flag.AppConfig(),
selectFlag,
flag.String{
Name: "state",
Description: "Machine state to wait for",
Default: "settled",
},
flag.Duration{
Name: "wait-timeout",
Shorthand: "w",
Description: "Time duration to wait for the machine to reach the requested state.",
Default: 5 * time.Minute,
},
)

return cmd
}

func runMachineWait(ctx context.Context) error {
var (
io = iostreams.FromContext(ctx)
desiredState = flag.GetString(ctx, "state")
waitTimeout = flag.GetDuration(ctx, "wait-timeout")
)

if desiredState == "" {
return fmt.Errorf("state cannot be empty")
}

machineID := flag.FirstArg(ctx)
haveMachineID := len(flag.Args(ctx)) > 0
machine, ctx, err := selectOneMachine(ctx, "", machineID, haveMachineID)
if err != nil {
return err
}

appName := appconfig.NameFromContext(ctx)
client := flapsutil.ClientFromContext(ctx)

fmt.Fprintf(io.Out, "Waiting up to %s for machine %s to reach %q...\n", waitTimeout, machine.ID, desiredState)

startedWaitAt := time.Now()
const maxAttempts = 3

for attempt := 1; attempt <= maxAttempts; attempt++ {
remainingTimeout := waitTimeout
if waitTimeout > 0 {
remainingTimeout = waitTimeout - time.Since(startedWaitAt)
if remainingTimeout <= 0 {
return fmt.Errorf("machine %s did not reach %q within %s", machine.ID, desiredState, waitTimeout)
}
}

err = client.Wait(ctx, appName, machine, desiredState, remainingTimeout)
if err == nil {
break
}

if attempt == maxAttempts || !isRetryableWaitError(err) {
return fmt.Errorf("machine %s did not reach %q within %s: %w", machine.ID, desiredState, waitTimeout, err)
}

fmt.Fprintf(io.Out, "Retrying wait for machine %s due to transient error: %v\n", machine.ID, err)

machine, err = client.Get(ctx, appName, machine.ID)
if err != nil {
return fmt.Errorf("machine %s could not be refetched before retrying wait: %w", machine.ID, err)
}

retryDelay := retryDelayForAttempt(attempt)
if waitTimeout > 0 {
remainingTimeout = waitTimeout - time.Since(startedWaitAt)
if remainingTimeout <= 0 {
return fmt.Errorf("machine %s did not reach %q within %s", machine.ID, desiredState, waitTimeout)
}
if retryDelay > remainingTimeout {
retryDelay = remainingTimeout
}
}

select {
case <-time.After(retryDelay):
case <-ctx.Done():
return ctx.Err()
}
}

if desiredState == "settled" {
machine, err = client.Get(ctx, appName, machine.ID)
if err != nil {
return fmt.Errorf("machine %s reached settled state but could not fetch final state: %w", machine.ID, err)
}
desiredState = machine.State
}

fmt.Fprintf(io.Out, "Machine %s reached state %q\n", machine.ID, desiredState)

return nil
}

func isRetryableWaitError(err error) bool {
if err == nil {
return false
}

message := strings.ToLower(err.Error())
if strings.Contains(message, "currently replaced") {
return true
}

var flapsErr *flaps.FlapsError
if errors.As(err, &flapsErr) {
if flapsErr.ResponseStatusCode == http.StatusTooManyRequests || (flapsErr.ResponseStatusCode >= 500 && flapsErr.ResponseStatusCode < 600) {
return true
}
}

transientSubstrings := []string{
"connection reset by peer",
"connection refused",
"network is unreachable",
"temporary failure in name resolution",
"i/o timeout",
"timeout",
"eof",
}

for _, s := range transientSubstrings {
if strings.Contains(message, s) {
return true
}
}

return false
}

func retryDelayForAttempt(attempt int) time.Duration {
if attempt <= 0 {
return 500 * time.Millisecond
}

delay := 500 * time.Millisecond
for i := 1; i < attempt; i++ {
delay *= 2
}

return delay
}
8 changes: 6 additions & 2 deletions internal/machine/leasable_machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ func (lm *leasableMachine) Destroy(ctx context.Context, kill bool) error {

func (lm *leasableMachine) Cordon(ctx context.Context) error {
if lm.IsDestroyed() {
return fmt.Errorf("cannon cordon machine %s that was already destroyed", lm.machine.ID)
return fmt.Errorf("cannot cordon machine %s that was already destroyed", lm.machine.ID)
}

return lm.flapsClient.Cordon(ctx, lm.appName, lm.machine.ID, lm.leaseNonce)
Expand Down Expand Up @@ -535,6 +535,10 @@ func (lm *leasableMachine) refreshLeaseUntilCanceled(ctx context.Context, durati

for {
time.Sleep(b.Duration())
if lm.IsDestroyed() {
return
}

switch err := lm.RefreshLease(ctx, duration); {
case err == nil:
// good times
Expand All @@ -556,7 +560,7 @@ func (lm *leasableMachine) ReleaseLease(ctx context.Context) error {

nonce := lm.leaseNonce
lm.resetLease()
if nonce == "" {
if nonce == "" || lm.IsDestroyed() {
return nil
}

Expand Down
22 changes: 10 additions & 12 deletions internal/machine/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,25 +85,23 @@ func Update(ctx context.Context, appName string, m *fly.Machine, input *fly.Laun
return fmt.Errorf("could not update machine %s: %w", m.ID, err)
}

waitForAction := "start"
if input.SkipLaunch || m.Config.Schedule != "" || m.State != fly.MachineStateStarted {
waitForAction = "stop"
}

waitTimeout := time.Second * 300
if input.Timeout != 0 {
waitTimeout = time.Duration(input.Timeout) * time.Second
}

if err := WaitForStartOrStop(ctx, appName, updatedMachine, waitForAction, waitTimeout); err != nil {
return err
state, err := WaitForState(ctx, appName, updatedMachine, "settled", waitTimeout)
if err != nil {
return fmt.Errorf("error while waiting for machine to update: %w", err)
}

if state == "failed" {
return fmt.Errorf("machine %s update failed: machine entered %q state", m.ID, state)
}

if !input.SkipLaunch {
if !input.SkipHealthChecks {
if err := watch.MachinesChecks(ctx, appName, []*fly.Machine{updatedMachine}); err != nil {
return fmt.Errorf("failed to wait for health checks to pass: %w", err)
}
if state == "started" && !input.SkipHealthChecks {
if err := watch.MachinesChecks(ctx, appName, []*fly.Machine{updatedMachine}); err != nil {
return fmt.Errorf("failed to wait for health checks to pass: %w", err)
}
}

Expand Down
Loading