From c775e9db502c76ccd840185438c0f2ef5a34da12 Mon Sep 17 00:00:00 2001 From: Juanpe Araque Date: Tue, 12 May 2026 12:51:45 +0200 Subject: [PATCH] Wait for default FelixConfiguration before patching it in calico e2e setup The default FelixConfiguration is created asynchronously by calico-node after pods report Ready, so the immediate calicoctl patch sometimes ran before the resource existed. The patch failed silently (run_command defaults to check=False), Prometheus metrics were never enabled, and the downstream metrics endpoint check timed out 200s later with a generic connection-refused error. Wait for the resource to exist before patching, and use check=True on the patch so any future regression fails fast and visibly. --- calico/tests/conftest.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/calico/tests/conftest.py b/calico/tests/conftest.py index 118546f049686..7f7453240259c 100644 --- a/calico/tests/conftest.py +++ b/calico/tests/conftest.py @@ -5,7 +5,7 @@ import pytest -from datadog_checks.dev.conditions import CheckEndpoints +from datadog_checks.dev.conditions import CheckEndpoints, WaitFor from datadog_checks.dev.kind import kind_run from datadog_checks.dev.kube_port_forward import port_forward from datadog_checks.dev.subprocess import run_command @@ -16,6 +16,11 @@ HERE = path.dirname(path.abspath(__file__)) +def _felix_config_default_exists(): + result = run_command(["kubectl", "get", "felixconfiguration", "default"], capture='both') + return result.code == 0 + + def setup_calico(): # Deploy calico run_command(["kubectl", "apply", "-f", path.join(HERE, 'kind', 'calico.yaml')]) @@ -32,10 +37,27 @@ def setup_calico(): # Wait for pods run_command(["kubectl", "wait", "--for=condition=Ready", "pods", "--all", "--all-namespaces", "--timeout=300s"]) - # Activate Felix + # calico-node creates the default FelixConfiguration asynchronously after pods report Ready. + WaitFor(_felix_config_default_exists, attempts=60, wait=2)() + + # check=True so a missed patch fails loudly here instead of as a connection-refused timeout later. run_command( - """kubectl exec -i -n kube-system calicoctl -- /calicoctl patch felixConfiguration - default --patch '{"spec":{"prometheusMetricsEnabled": true}}'""" + [ + "kubectl", + "exec", + "-i", + "-n", + "kube-system", + "calicoctl", + "--", + "/calicoctl", + "patch", + "felixConfiguration", + "default", + "--patch", + '{"spec":{"prometheusMetricsEnabled": true}}', + ], + check=True, )