Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
73eba62
status check
seanrathier Apr 2, 2026
36e8e88
more logs
seanrathier Apr 2, 2026
8ced536
fix(cdr): use agent_policies endpoint for Fleet readiness check
seanrathier Apr 2, 2026
f56c468
fix(cdr): use POST /api/fleet/setup for Fleet readiness check
seanrathier Apr 2, 2026
e2e4fb4
fix(cdr): add Fleet Server readiness check after fleet setup
seanrathier Apr 2, 2026
fdcaa50
Merge branch 'main' into seanrathier/cdr-kibana-readiness-check
seanrathier Apr 2, 2026
16b2623
fix(cdr): require 3 consecutive Fleet OK responses before proceeding
seanrathier Apr 2, 2026
bb61c55
fix(cdr): increase Fleet stability window and API retry resilience
seanrathier Apr 2, 2026
4a0b6d3
fix(fleet-api): retry 429s and fail fast on package version fetch errors
seanrathier Apr 2, 2026
0c9cbc2
Merge branch 'main' into seanrathier/cdr-kibana-readiness-check
gurevichdmitry Apr 12, 2026
852c710
Fix entity store v2 api call
gurevichdmitry Apr 12, 2026
eb37892
Merge branch 'seanrathier/cdr-kibana-readiness-check' of https://gith…
gurevichdmitry Apr 12, 2026
901092a
Disable installation of the cloudtrail integration
gurevichdmitry Apr 12, 2026
7e75ca8
Merge branch 'main' into seanrathier/cdr-kibana-readiness-check
gurevichdmitry Apr 12, 2026
1509163
refactor script and cdr action
gurevichdmitry Apr 12, 2026
c74b79d
Merge branch 'seanrathier/cdr-kibana-readiness-check' of https://gith…
gurevichdmitry Apr 12, 2026
6fa82fe
fix linter issues
gurevichdmitry Apr 12, 2026
776d540
fix review comments
gurevichdmitry Apr 13, 2026
78d1330
Merge branch 'main' into seanrathier/cdr-kibana-readiness-check
gurevichdmitry Apr 13, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 146 additions & 0 deletions .ci/scripts/wait_for_kibana_and_fleet.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#!/usr/bin/env bash
#
# Poll Kibana until ready, then (unless serverless) wait for Fleet setup and EPM stability.
#
# Required environment variables:
# ES_USER, ES_PASSWORD, KIBANA_URL
# Optional:
# SERVERLESS_MODE — when "true", skip Fleet checks (Elastic Cloud manages Fleet).
#
# Curl resilience (timeouts, transport failures as retriable non-200) addresses Copilot review on
# https://github.com/elastic/cloudbeat/pull/4092 — discussions r3043340796, r3043340806, r3043340816.

set -euo pipefail

: "${ES_USER:?ES_USER must be set}"
: "${ES_PASSWORD:?ES_PASSWORD must be set}"
: "${KIBANA_URL:?KIBANA_URL must be set}"
SERVERLESS_MODE="${SERVERLESS_MODE:-false}"

# -s: silent; no -f so HTTP 4xx/5xx still return exit 0 and we read %{http_code}.
# --connect-timeout / --max-time: avoid hanging; failed connect uses || echo 000 below.
readonly CURL_COMMON=(--connect-timeout 5 --max-time 20 -s)

wait_kibana_ready() {
echo "Waiting for Kibana to report 'available' at ${KIBANA_URL}/api/status"
local kibana_ok=""
local i
for i in $(seq 1 30); do
local http_code
http_code=$(
curl "${CURL_COMMON[@]}" -u "${ES_USER}:${ES_PASSWORD}" \
-o /tmp/kibana_status.json \
-w "%{http_code}" \
"${KIBANA_URL}/api/status" || echo "000"
)
if [[ "$http_code" == "200" ]]; then
local level state
level=$(jq -r '.status.overall.level // empty' /tmp/kibana_status.json 2>/dev/null || true)
state=$(jq -r '.status.overall.state // empty' /tmp/kibana_status.json 2>/dev/null || true)
if [[ "$level" == "available" || "$state" == "green" ]]; then
echo "Kibana is ready (level=${level:-n/a}, state=${state:-n/a})"
kibana_ok=1
break
fi
echo "attempt $i/30: Kibana not ready yet (level=${level:-n/a}, state=${state:-n/a}), sleeping 10s"
else
echo "attempt $i/30: Kibana returned HTTP ${http_code}, sleeping 10s"
fi
sleep 10
done
if [[ -z "$kibana_ok" ]]; then
echo "Timed out waiting for Kibana to become ready after 300s"
return 1
fi
return 0
}

wait_fleet_setup() {
echo "Triggering Fleet setup at ${KIBANA_URL}/api/fleet/setup"
local fleet_ok=""
local i
for i in $(seq 1 30); do
local http_code
http_code=$(
curl "${CURL_COMMON[@]}" \
-X POST \
-u "${ES_USER}:${ES_PASSWORD}" \
-H "Content-Type: application/json" \
-H "kbn-xsrf: true" \
-o /tmp/fleet_status.json \
-w "%{http_code}" \
"${KIBANA_URL}/api/fleet/setup" || echo "000"
)
local body
if [[ "$http_code" == "200" ]]; then
local is_initialized
is_initialized=$(jq -r '.isInitialized // false' /tmp/fleet_status.json 2>/dev/null || true)
if [[ "$is_initialized" == "true" ]]; then
echo "Fleet setup complete (isInitialized=true)"
fleet_ok=1
break
fi
body=$(cat /tmp/fleet_status.json 2>/dev/null | head -c 300 || true)
echo "attempt $i/30: Fleet setup not complete yet, body: ${body}, sleeping 10s"
else
body=$(cat /tmp/fleet_status.json 2>/dev/null | head -c 300 || true)
echo "attempt $i/30: Fleet setup returned HTTP ${http_code}, body: ${body}, sleeping 10s"
fi
sleep 10
done
if [[ -z "$fleet_ok" ]]; then
echo "Timed out waiting for Fleet setup to complete after 300s"
return 1
fi
return 0
}

wait_fleet_epm_stable() {
echo "Waiting for Fleet Server to be stable at ${KIBANA_URL}/api/fleet/epm/packages"
local consecutive_ok=0
local required_ok=5
local i
for i in $(seq 1 60); do
local http_code
http_code=$(
curl "${CURL_COMMON[@]}" \
-u "${ES_USER}:${ES_PASSWORD}" \
-H "Content-Type: application/json" \
-H "kbn-xsrf: true" \
-o /tmp/fleet_epm.json \
-w "%{http_code}" \
"${KIBANA_URL}/api/fleet/epm/packages" || echo "000"
)
if [[ "$http_code" == "200" ]]; then
consecutive_ok=$((consecutive_ok + 1))
echo "attempt $i/60: Fleet Server OK (${consecutive_ok}/${required_ok} consecutive)"
if [[ "$consecutive_ok" -ge "$required_ok" ]]; then
echo "Fleet Server is stable"
break
fi
else
consecutive_ok=0
local body
body=$(cat /tmp/fleet_epm.json 2>/dev/null | head -c 300 || true)
echo "attempt $i/60: Fleet Server returned HTTP ${http_code}, body: ${body}, resetting consecutive counter, sleeping 10s"
fi
sleep 10
done
if [[ "$consecutive_ok" -lt "$required_ok" ]]; then
echo "Timed out waiting for Fleet Server to stabilise"
return 1
fi
return 0
}

wait_kibana_ready

if [[ "${SERVERLESS_MODE}" == "true" ]]; then
echo "Serverless mode: skipping Fleet status check (Fleet is managed by Elastic Cloud)"
exit 0
fi

wait_fleet_setup
wait_fleet_epm_stable

exit 0
36 changes: 28 additions & 8 deletions .github/actions/cdr/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ inputs:
default: true
required: false
type: boolean
serverless-mode:
description: "When true, skip the Fleet status check (serverless deployments manage Fleet internally)"
default: false
required: false
type: boolean

runs:
using: composite
Expand Down Expand Up @@ -159,9 +164,22 @@ runs:
working-directory: "deploy/test-environments/cdr"
run: ./export_terraform_outputs.sh

- name: Wait for Kibana and Fleet to be ready
id: wait-for-kibana
if: success()
shell: bash
env:
ES_USER: ${{ inputs.es-user }}
ES_PASSWORD: ${{ inputs.es-password }}
KIBANA_URL: ${{ inputs.kibana-url }}
SERVERLESS_MODE: ${{ inputs.serverless-mode }}
run: "${{ github.workspace }}/.ci/scripts/wait_for_kibana_and_fleet.sh"

# CloudTrail install temporarily disabled until the follow-up PR that fixes CloudTrail lands (https://github.com/elastic/cloudbeat/pull/4284).
# Restore: if: ${{ !cancelled() && steps.deploy-cdr-infra.outcome == 'success' && steps.wait-for-kibana.outcome == 'success' }}
- name: Install AWS Cloudtrail integration
id: cloudtrail-integration
if: ${{ !cancelled() && steps.deploy-cdr-infra.outcome == 'success' }}
if: ${{ false }}
working-directory: tests/integrations_setup
shell: bash
env:
Expand All @@ -172,8 +190,10 @@ runs:
run: |
poetry run python ./install_cloudtrail_integration.py

# CloudTrail agent deploy skipped with install above; re-enable with the same follow-up PR as CloudTrail install.
# Restore: if: ${{ !cancelled() && steps.deploy-cdr-infra.outcome == 'success' && steps.cloudtrail-integration.outcome == 'success' }}
- name: Deploy AWS Cloudtrail agent
if: ${{ !cancelled() && steps.deploy-cdr-infra.outcome == 'success' && steps.cloudtrail-integration.outcome == 'success' }}
if: ${{ false }}
working-directory: deploy/test-environments/cdr
shell: bash
env:
Expand All @@ -187,7 +207,7 @@ runs:

- name: Install Azure Activity Logs integration
id: az-activity-logs-integration
if: ${{ !cancelled() && steps.deploy-cdr-infra.outcome == 'success' }}
if: ${{ !cancelled() && steps.deploy-cdr-infra.outcome == 'success' && steps.wait-for-kibana.outcome == 'success' }}
working-directory: tests/integrations_setup
shell: bash
env:
Expand Down Expand Up @@ -216,7 +236,7 @@ runs:

- name: Install GCP Audit Logs integration
id: gcp-audit-logs-integration
if: ${{ !cancelled() && steps.deploy-cdr-infra.outcome == 'success' }}
if: ${{ !cancelled() && steps.deploy-cdr-infra.outcome == 'success' && steps.wait-for-kibana.outcome == 'success' }}
working-directory: tests/integrations_setup
shell: bash
env:
Expand All @@ -243,7 +263,7 @@ runs:

- name: Install WIZ integration
id: wiz-integration
if: ${{ !cancelled() && steps.deploy-cdr-infra.outcome == 'success' }}
if: ${{ !cancelled() && steps.deploy-cdr-infra.outcome == 'success' && steps.wait-for-kibana.outcome == 'success' }}
working-directory: tests/integrations_setup
shell: bash
env:
Expand Down Expand Up @@ -304,7 +324,7 @@ runs:

- name: Install Elastic Defend (Fleet)
id: elastic-defend-fleet
if: ${{ !cancelled() && steps.deploy-cdr-infra.outcome == 'success' }}
if: ${{ !cancelled() && steps.deploy-cdr-infra.outcome == 'success' && steps.wait-for-kibana.outcome == 'success' }}
working-directory: tests/integrations_setup
shell: bash
env:
Expand Down Expand Up @@ -416,7 +436,7 @@ runs:

- name: Check Asset Inventory supported version
id: asset-inventory-version-check
if: ${{ !cancelled() && steps.deploy-cdr-infra.outcome == 'success' }}
if: ${{ !cancelled() && steps.deploy-cdr-infra.outcome == 'success' && steps.wait-for-kibana.outcome == 'success' }}
shell: bash
env:
STACK_VERSION: ${{ inputs.elk-stack-version }}
Expand Down Expand Up @@ -466,7 +486,7 @@ runs:
- name: Deploy Azure Asset Inventory agent
id: azure-asset-inventory-agent
working-directory: deploy/azure
if: ${{ !cancelled() && steps.asset-inventory-version-check.outputs.asset_inventory_supported == 'true' }}
if: ${{ !cancelled() && steps.asset-inventory-version-check.outputs.asset_inventory_supported == 'true' && steps.azure-asset-inventory-integration.outcome == 'success' }}
shell: bash
env:
AZURE_TAGS: ${{ inputs.azure-tags }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -519,10 +519,9 @@ jobs:
}}
run: |
response=$(curl -s -w "\n%{http_code}" -X POST \
"$KIBANA_URL/internal/security/entity_store/install?apiVersion=2" \
"$KIBANA_URL/api/security/entity_store/install?apiVersion=2023-10-31" \
-u "$ES_USER:$ES_PASSWORD" \
-H "kbn-xsrf: true" \
-H "x-elastic-internal-origin: kibana" \
-H "Content-Type: application/json" \
-d '{}')
http_code=$(echo "$response" | tail -1)
Expand Down Expand Up @@ -561,6 +560,7 @@ jobs:
kibana-url: ${{ env.KIBANA_URL }}
elk-stack-version: ${{ env.STACK_VERSION }}
enable-entity-store-v2: ${{ inputs.enable-entity-store-v2 }}
serverless-mode: ${{ inputs.serverless_mode }}
azure-tags: ${{ env.AZURE_DEFAULT_TAGS }}
tag-project: ${{ github.actor }}
tag-owner: ${{ github.actor }}
Expand Down
Loading
Loading