From 32ed12b82f8f08cd3800a0f927bfc1bd83a374f8 Mon Sep 17 00:00:00 2001 From: aayushshah15 Date: Thu, 7 May 2026 20:22:22 +0000 Subject: [PATCH 1/3] add background heartbeat process (30s interval) Co-authored-by: Codesmith Staging --- action.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/action.yml b/action.yml index d5a5793..f15f4ef 100644 --- a/action.yml +++ b/action.yml @@ -111,6 +111,39 @@ runs: echo "Testbox initialized: testbox_id=${TESTBOX_ID}" + - name: Start heartbeat + if: steps.metadata.outputs.available == 'true' && inputs.testbox_id + shell: bash + run: | + STATE=/tmp/.testbox + if [ ! -f "$STATE/testbox_id" ]; then + echo "Warning: state directory not initialized, skipping heartbeat" + exit 0 + fi + + TESTBOX_ID=$(cat "$STATE/testbox_id") + INSTALLATION_MODEL_ID=$(cat "$STATE/installation_model_id") + AUTH_TOKEN=$(cat "$STATE/auth_token") + API_URL=$(cat "$STATE/api_url") + + # Launch a background heartbeat loop that POSTs to the backend + # every 30 seconds. The backend uses this to detect dead VMs + # whose phone-home (completed) never arrived. + ( + while true; do + sleep 30 + curl -s -f --max-time 10 -X POST "${API_URL}/api/testbox/heartbeat" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${AUTH_TOKEN}" \ + -d "{ + \"testbox_id\": \"${TESTBOX_ID}\", + \"installation_model_id\": ${INSTALLATION_MODEL_ID} + }" >/dev/null 2>&1 || true + done + ) & + echo $! > "$STATE/heartbeat_pid" + echo "Heartbeat started (PID $!, every 30s)" + - name: Install SSH public key if: steps.metadata.outputs.available == 'true' && inputs.testbox_id shell: bash From 76a18efc6ca8e0354fa2c2db7b4770815b3797a3 Mon Sep 17 00:00:00 2001 From: aayushshah15 Date: Thu, 7 May 2026 20:38:03 +0000 Subject: [PATCH 2/3] disown heartbeat process so it survives across composite action steps The background subshell was a child of the step's bash process. GitHub Actions can wait on or SIGTERM child processes between composite action steps, killing the heartbeat before run-testbox starts. Using nohup + fd redirects + disown to fully detach. The script now reads state files directly instead of relying on shell variable scope from the parent. Co-authored-by: Codesmith Staging --- action.yml | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/action.yml b/action.yml index f15f4ef..729aa2d 100644 --- a/action.yml +++ b/action.yml @@ -121,28 +121,33 @@ runs: exit 0 fi - TESTBOX_ID=$(cat "$STATE/testbox_id") - INSTALLATION_MODEL_ID=$(cat "$STATE/installation_model_id") - AUTH_TOKEN=$(cat "$STATE/auth_token") - API_URL=$(cat "$STATE/api_url") - # Launch a background heartbeat loop that POSTs to the backend # every 30 seconds. The backend uses this to detect dead VMs # whose phone-home (completed) never arrived. - ( - while true; do - sleep 30 - curl -s -f --max-time 10 -X POST "${API_URL}/api/testbox/heartbeat" \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer ${AUTH_TOKEN}" \ - -d "{ - \"testbox_id\": \"${TESTBOX_ID}\", - \"installation_model_id\": ${INSTALLATION_MODEL_ID} - }" >/dev/null 2>&1 || true - done - ) & + # + # nohup + fd redirects + disown fully detaches the process + # from this step's shell. Without this, GitHub Actions may + # wait on or SIGTERM child processes between composite action + # steps, killing the heartbeat before run-testbox starts. + # + # The state files are read inside the loop so the script is + # self-contained and does not depend on shell variable scope. + nohup bash -c 'STATE=/tmp/.testbox + API_URL=$(cat "$STATE/api_url") + AUTH_TOKEN=$(cat "$STATE/auth_token") + TESTBOX_ID=$(cat "$STATE/testbox_id") + INSTALLATION_MODEL_ID=$(cat "$STATE/installation_model_id") + while true; do + sleep 30 + curl -s -f --max-time 10 -X POST "${API_URL}/api/testbox/heartbeat" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${AUTH_TOKEN}" \ + -d "{\"testbox_id\":\"${TESTBOX_ID}\",\"installation_model_id\":${INSTALLATION_MODEL_ID}}" \ + >/dev/null 2>&1 || true + done' /dev/null 2>&1 & + disown echo $! > "$STATE/heartbeat_pid" - echo "Heartbeat started (PID $!, every 30s)" + echo "Heartbeat started (PID $!, every 30s, disowned)" - name: Install SSH public key if: steps.metadata.outputs.available == 'true' && inputs.testbox_id From de50e6f33ca6a6c929165f5aa59e4635856bf716 Mon Sep 17 00:00:00 2001 From: Aayush Shah Date: Tue, 26 May 2026 12:49:47 -0400 Subject: [PATCH 3/3] Make heartbeat disown target explicit --- action.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/action.yml b/action.yml index 729aa2d..5f18615 100644 --- a/action.yml +++ b/action.yml @@ -145,9 +145,10 @@ runs: -d "{\"testbox_id\":\"${TESTBOX_ID}\",\"installation_model_id\":${INSTALLATION_MODEL_ID}}" \ >/dev/null 2>&1 || true done' /dev/null 2>&1 & - disown - echo $! > "$STATE/heartbeat_pid" - echo "Heartbeat started (PID $!, every 30s, disowned)" + HEARTBEAT_PID=$! + echo "$HEARTBEAT_PID" > "$STATE/heartbeat_pid" + disown "$HEARTBEAT_PID" + echo "Heartbeat started (PID $HEARTBEAT_PID, every 30s, disowned)" - name: Install SSH public key if: steps.metadata.outputs.available == 'true' && inputs.testbox_id