From f7849b7141b8c3d5f0d5a9bf6db9a83cea7f1166 Mon Sep 17 00:00:00 2001 From: Jon Bastien Date: Thu, 12 Mar 2026 15:49:24 -0400 Subject: [PATCH 1/2] Make PRESENCE_CHECK_TIMEOUT configurable via environment variable Deploy tasks can be reaped as dead during long bundle install steps (e.g. native extension compilation) because the hardcoded 30-second presence check timeout is too short. Add SHIPIT_PRESENCE_CHECK_TIMEOUT env var to allow configuring this per-environment while preserving the 30-second default. Co-Authored-By: Claude Opus 4.6 --- app/models/shipit/task.rb | 5 ++--- docs/setup.md | 8 ++++++++ lib/shipit.rb | 4 ++++ test/unit/shipit_test.rb | 11 +++++++++++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/app/models/shipit/task.rb b/app/models/shipit/task.rb index 2a23d4000..99374634d 100644 --- a/app/models/shipit/task.rb +++ b/app/models/shipit/task.rb @@ -10,7 +10,6 @@ def message end end - PRESENCE_CHECK_TIMEOUT = 30 ACTIVE_STATUSES = %w[pending running aborting].freeze COMPLETED_STATUSES = %w[success flapping faulty validating].freeze UNSUCCESSFUL_STATUSES = %w[error failed aborted flapping timedout faulty].freeze @@ -327,7 +326,7 @@ def finished? end def ping - Shipit.redis.set(status_key, 'alive', ex: PRESENCE_CHECK_TIMEOUT) + Shipit.redis.set(status_key, 'alive', ex: Shipit.presence_check_timeout) end def alive? @@ -335,7 +334,7 @@ def alive? end def report_dead! - write("ERROR: Background job hasn't reported back in #{PRESENCE_CHECK_TIMEOUT} seconds.") + write("ERROR: Background job hasn't reported back in #{Shipit.presence_check_timeout} seconds.") error! end diff --git a/docs/setup.md b/docs/setup.md index 52ce5ea0a..bf0788487 100644 --- a/docs/setup.md +++ b/docs/setup.md @@ -171,6 +171,14 @@ production: git_progress_output: true ``` +### Environment Variables + +**`SHIPIT_PRESENCE_CHECK_TIMEOUT`** is the duration (in seconds) after which a deploy task is considered dead if the background job hasn't reported back. Defaults to `30`. Increase this if deploy tasks are being reaped during long-running steps like `bundle install` with native extension compilation. + +```bash +export SHIPIT_PRESENCE_CHECK_TIMEOUT=120 +``` + ### Using Multiple Github Applications A Github application can only authenticate to the Github organization it's installed in. If you want to deploy code from multiple Github organizations the `github` section of your `config/secrets.yml` will need to be formatted differently. The top-level keys should be the name of each Github organization, and the following sub-keys are the Github app details for that particular organization. diff --git a/lib/shipit.rb b/lib/shipit.rb index 499a04e2e..95005db2e 100644 --- a/lib/shipit.rb +++ b/lib/shipit.rb @@ -92,6 +92,10 @@ def enable_samesite_middleware? ENV['SHIPIT_ENABLE_SAMESITE_NONE'].present? end + def presence_check_timeout + ENV.fetch('SHIPIT_PRESENCE_CHECK_TIMEOUT', 30).to_i + end + def app_name @app_name ||= secrets.app_name || Rails.application.class.name.split(':').first || 'Shipit' end diff --git a/test/unit/shipit_test.rb b/test/unit/shipit_test.rb index a85c851fb..33962be9c 100644 --- a/test/unit/shipit_test.rb +++ b/test/unit/shipit_test.rb @@ -30,6 +30,17 @@ class ShipitTest < ActiveSupport::TestCase assert_equal(['shopify/developers'], Shipit.github_teams.map(&:handle)) end + test ".presence_check_timeout defaults to 30" do + assert_equal 30, Shipit.presence_check_timeout + end + + test ".presence_check_timeout returns the configured value from ENV" do + ENV['SHIPIT_PRESENCE_CHECK_TIMEOUT'] = '120' + assert_equal 120, Shipit.presence_check_timeout + ensure + ENV.delete('SHIPIT_PRESENCE_CHECK_TIMEOUT') + end + class RedisTest < self setup do @client = mock(:client) From cc318c15d7dbdf57597c43d12437760c6a6bf1bb Mon Sep 17 00:00:00 2001 From: Jon Bastien Date: Fri, 13 Mar 2026 16:19:13 -0400 Subject: [PATCH 2/2] fix: handle bad input case --- lib/shipit.rb | 6 +++++- test/unit/shipit_test.rb | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/lib/shipit.rb b/lib/shipit.rb index 95005db2e..4856db07f 100644 --- a/lib/shipit.rb +++ b/lib/shipit.rb @@ -93,7 +93,11 @@ def enable_samesite_middleware? end def presence_check_timeout - ENV.fetch('SHIPIT_PRESENCE_CHECK_TIMEOUT', 30).to_i + raw = ENV.fetch('SHIPIT_PRESENCE_CHECK_TIMEOUT', '30') + timeout = Integer(raw) + raise ArgumentError, "SHIPIT_PRESENCE_CHECK_TIMEOUT must be a positive integer, got #{raw.inspect}" if timeout <= 0 + + timeout end def app_name diff --git a/test/unit/shipit_test.rb b/test/unit/shipit_test.rb index 33962be9c..c5c25936c 100644 --- a/test/unit/shipit_test.rb +++ b/test/unit/shipit_test.rb @@ -41,6 +41,29 @@ class ShipitTest < ActiveSupport::TestCase ENV.delete('SHIPIT_PRESENCE_CHECK_TIMEOUT') end + test ".presence_check_timeout raises on non-numeric string" do + ENV['SHIPIT_PRESENCE_CHECK_TIMEOUT'] = 'abc' + assert_raises(ArgumentError) { Shipit.presence_check_timeout } + ensure + ENV.delete('SHIPIT_PRESENCE_CHECK_TIMEOUT') + end + + test ".presence_check_timeout raises on zero" do + ENV['SHIPIT_PRESENCE_CHECK_TIMEOUT'] = '0' + error = assert_raises(ArgumentError) { Shipit.presence_check_timeout } + assert_match(/must be a positive integer/, error.message) + ensure + ENV.delete('SHIPIT_PRESENCE_CHECK_TIMEOUT') + end + + test ".presence_check_timeout raises on negative value" do + ENV['SHIPIT_PRESENCE_CHECK_TIMEOUT'] = '-5' + error = assert_raises(ArgumentError) { Shipit.presence_check_timeout } + assert_match(/must be a positive integer/, error.message) + ensure + ENV.delete('SHIPIT_PRESENCE_CHECK_TIMEOUT') + end + class RedisTest < self setup do @client = mock(:client)