diff --git a/tests/e2e/cloud_test.go b/tests/e2e/cloud_test.go index 5dab6cd0..77213479 100644 --- a/tests/e2e/cloud_test.go +++ b/tests/e2e/cloud_test.go @@ -12,6 +12,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" @@ -25,12 +26,10 @@ import ( func TestNodeSetCorrectNodeLabelsAndIPAddresses(t *testing.T) { t.Parallel() - ctx := context.Background() - - node, err := testCluster.k8sClient.CoreV1().Nodes().Get(ctx, testCluster.ControlNodeName(), metav1.GetOptions{}) + node, err := testCluster.k8sClient.CoreV1().Nodes().Get(t.Context(), testCluster.ControlNodeName(), metav1.GetOptions{}) assert.NoError(t, err) - server, _, err := testCluster.hcloud.Server.Get(ctx, testCluster.ControlNodeName()) + server, _, err := testCluster.hcloud.Server.Get(t.Context(), testCluster.ControlNodeName()) if err != nil { return } @@ -77,32 +76,41 @@ func TestServiceLoadBalancersMinimalSetup(t *testing.T) { t: t, podName: "loadbalancer-minimal", } + t.Cleanup(func() { + lbTest.TearDown() + }) - pod := lbTest.DeployTestPod() + pod, err := lbTest.DeployTestPod() + require.NoError(t, err) lbSvc := lbTest.ServiceDefinition(pod, map[string]string{ string(annotation.LBLocation): "nbg1", }) - lbSvc, err := lbTest.CreateService(lbSvc) - if assert.NoError(t, err, "deploying test svc") { - WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, false) - } + lbSvc, err = lbTest.CreateService(lbSvc) + require.NoError(t, err) - lbTest.TearDown() + err = lbTest.WaitForHTTPAvailable(lbSvc.Status.LoadBalancer.Ingress[0].IP, false) + require.NoError(t, err) } func TestServiceLoadBalancersHTTPS(t *testing.T) { t.Parallel() - cert := testCluster.CreateTLSCertificate(t, "loadbalancer-https") lbTest := lbTestHelper{ t: t, podName: "loadbalancer-https", port: 443, } + t.Cleanup(func() { + lbTest.TearDown() + }) + + cert, err := testCluster.CreateTLSCertificate(t, "loadbalancer-https") + require.NoError(t, err) - pod := lbTest.DeployTestPod() + pod, err := lbTest.DeployTestPod() + require.NoError(t, err) lbSvc := lbTest.ServiceDefinition(pod, map[string]string{ string(annotation.LBLocation): "nbg1", @@ -110,19 +118,16 @@ func TestServiceLoadBalancersHTTPS(t *testing.T) { string(annotation.LBSvcProtocol): "https", }) - lbSvc, err := lbTest.CreateService(lbSvc) - if assert.NoError(t, err, "deploying test svc") { - WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, true) - } + lbSvc, err = lbTest.CreateService(lbSvc) + require.NoError(t, err) - lbTest.TearDown() + err = lbTest.WaitForHTTPAvailable(lbSvc.Status.LoadBalancer.Ingress[0].IP, false) + require.NoError(t, err) } func TestServiceLoadBalancersHTTPSWithManagedCertificate(t *testing.T) { t.Parallel() - ctx := context.Background() - if testCluster.certDomain == "" { t.Skip("Skipping because CERT_DOMAIN is not set") } @@ -133,8 +138,12 @@ func TestServiceLoadBalancersHTTPSWithManagedCertificate(t *testing.T) { podName: "loadbalancer-https", port: 443, } + t.Cleanup(func() { + lbTest.TearDown() + }) - pod := lbTest.DeployTestPod() + pod, err := lbTest.DeployTestPod() + require.NoError(t, err) lbSvc := lbTest.ServiceDefinition(pod, map[string]string{ string(annotation.LBLocation): "nbg1", @@ -144,62 +153,62 @@ func TestServiceLoadBalancersHTTPSWithManagedCertificate(t *testing.T) { string(annotation.LBSvcHTTPManagedCertificateUseACMEStaging): "true", }) - lbSvc, err := lbTest.CreateService(lbSvc) - if assert.NoError(t, err, "deploying test svc") { - certs, err := testCluster.hcloud.Certificate.AllWithOpts(ctx, hcloud.CertificateListOpts{ - ListOpts: hcloud.ListOpts{ - LabelSelector: fmt.Sprintf("%s=%s", hcops.LabelServiceUID, lbSvc.ObjectMeta.UID), - }, - }) - assert.NoError(t, err) - if assert.Len(t, certs, 1) { - testCluster.certificates.Add(certs[0].ID) - } - } + lbSvc, err = lbTest.CreateService(lbSvc) + require.NoError(t, err) - lbTest.TearDown() + certs, err := testCluster.hcloud.Certificate.AllWithOpts(t.Context(), hcloud.CertificateListOpts{ + ListOpts: hcloud.ListOpts{ + LabelSelector: fmt.Sprintf("%s=%s", hcops.LabelServiceUID, lbSvc.ObjectMeta.UID), + }, + }) + assert.NoError(t, err) + if assert.Len(t, certs, 1) { + testCluster.certificates.Add(certs[0].ID) + } } func TestServiceLoadBalancersWithPrivateNetwork(t *testing.T) { t.Parallel() lbTest := lbTestHelper{t: t, podName: "loadbalancer-private-network"} + t.Cleanup(func() { + lbTest.TearDown() + }) - pod := lbTest.DeployTestPod() + pod, err := lbTest.DeployTestPod() + require.NoError(t, err) ipRange := &net.IPNet{ IP: net.IPv4(10, 0, 0, 0), Mask: net.CIDRMask(24, 32), } - lbSvcDefinition := lbTest.ServiceDefinition(pod, map[string]string{ + lbSvc := lbTest.ServiceDefinition(pod, map[string]string{ string(annotation.LBLocation): "nbg1", string(annotation.LBUsePrivateIP): "true", string(annotation.PrivateSubnetIPRange): ipRange.String(), }) - lbSvc, err := lbTest.CreateService(lbSvcDefinition) - if assert.NoError(t, err, "deploying test svc") { - WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, false) + lbSvc, err = lbTest.CreateService(lbSvc) + require.NoError(t, err) - anyInIPRange := slices.ContainsFunc(lbSvc.Status.LoadBalancer.Ingress, func(ingress corev1.LoadBalancerIngress) bool { - ip := net.ParseIP(ingress.IP) - if ip == nil { - return false - } - return ipRange.Contains(ip) - }) - - assert.True(t, anyInIPRange) - } + err = lbTest.WaitForHTTPAvailable(lbSvc.Status.LoadBalancer.Ingress[0].IP, false) + require.NoError(t, err) - lbTest.TearDown() + anyInIPRange := slices.ContainsFunc(lbSvc.Status.LoadBalancer.Ingress, func(ingress corev1.LoadBalancerIngress) bool { + ip := net.ParseIP(ingress.IP) + if ip == nil { + return false + } + return ipRange.Contains(ip) + }) + assert.True(t, anyInIPRange) } func TestRouteNetworksPodIPsAreAccessible(t *testing.T) { t.Parallel() - err := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + err := wait.PollUntilContextTimeout(t.Context(), 1*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { node, err := testCluster.k8sClient.CoreV1().Nodes().Get(ctx, testCluster.ControlNodeName(), metav1.GetOptions{}) if err != nil { return false, err @@ -222,6 +231,6 @@ func TestRouteNetworksPodIPsAreAccessible(t *testing.T) { return false, nil }) if err != nil { - t.Fatal(err) + t.Errorf("error waiting for pod IPs being accessible: %v", err) } } diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go index 2bce4a6f..2d76d9fa 100644 --- a/tests/e2e/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -3,7 +3,6 @@ package e2e import ( - "context" "fmt" "os" "strings" @@ -34,7 +33,7 @@ func TestPodIsPresent(t *testing.T) { t.Parallel() t.Run("hcloud-cloud-controller-manager pod is present in kube-system", func(t *testing.T) { - pods, err := testCluster.k8sClient.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{}) + pods, err := testCluster.k8sClient.CoreV1().Pods("kube-system").List(t.Context(), metav1.ListOptions{}) assert.NoError(t, err) found := false @@ -51,7 +50,7 @@ func TestPodIsPresent(t *testing.T) { t.Run("pod with app=hcloud-cloud-controller-manager is present in kube-system", func(t *testing.T) { pods, err := testCluster.k8sClient.CoreV1().Pods("kube-system"). - List(context.Background(), metav1.ListOptions{ + List(t.Context(), metav1.ListOptions{ LabelSelector: "app.kubernetes.io/name=hcloud-cloud-controller-manager", }) assert.NoError(t, err) diff --git a/tests/e2e/helper_test.go b/tests/e2e/helper_test.go index 5957cf39..2292ca48 100644 --- a/tests/e2e/helper_test.go +++ b/tests/e2e/helper_test.go @@ -126,7 +126,7 @@ func (tc *TestCluster) Stop() error { // // The baseName of the certificate gets a random number suffix attached. // baseName and suffix are separated by a single "-" character. -func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) *hcloud.Certificate { +func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) (*hcloud.Certificate, error) { rndInt := rng.Int() name := fmt.Sprintf("%s-%d", baseName, rndInt) @@ -138,15 +138,15 @@ func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) *hclo } cert, _, err := tc.hcloud.Certificate.Create(context.Background(), opts) if err != nil { - t.Fatalf("%s: %v", name, err) + return nil, fmt.Errorf("%s: %v", name, err) } if cert == nil { - t.Fatalf("no certificate created") + return nil, fmt.Errorf("no certificate created") } tc.certificates.Add(cert.ID) - return cert + return cert, nil } // NetworkName returns the network name. @@ -174,7 +174,7 @@ type lbTestHelper struct { // DeployTestPod deploys a basic nginx pod within the k8s cluster // and waits until it is "ready". -func (l *lbTestHelper) DeployTestPod() *corev1.Pod { +func (l *lbTestHelper) DeployTestPod() (*corev1.Pod, error) { l.t.Helper() ctx := context.Background() @@ -188,7 +188,7 @@ func (l *lbTestHelper) DeployTestPod() *corev1.Pod { }, }, metav1.CreateOptions{}) if err != nil && !k8serrors.IsAlreadyExists(err) { - l.t.Fatal(err) + return nil, fmt.Errorf("error deploying test pod: %w", err) } podName := fmt.Sprintf("pod-%s", l.podName) @@ -217,8 +217,9 @@ func (l *lbTestHelper) DeployTestPod() *corev1.Pod { pod, err := testCluster.k8sClient.CoreV1().Pods(l.namespace).Create(ctx, &testPod, metav1.CreateOptions{}) if err != nil { - l.t.Fatalf("could not create test pod: %s", err) + return nil, fmt.Errorf("could not create test pod: %w", err) } + err = wait.PollUntilContextTimeout(ctx, 1*time.Second, 1*time.Minute, false, func(ctx context.Context) (done bool, err error) { p, err := testCluster.k8sClient.CoreV1().Pods(l.namespace).Get(ctx, podName, metav1.GetOptions{}) if err != nil { @@ -233,9 +234,10 @@ func (l *lbTestHelper) DeployTestPod() *corev1.Pod { return false, nil }) if err != nil { - l.t.Fatalf("pod %s did not come up after 1 minute: %s", podName, err) + return nil, fmt.Errorf("pod %s did not come up after 1 minute: %w", podName, err) } - return pod + + return pod, nil } // ServiceDefinition returns a service definition for a Hetzner Cloud Load Balancer (k8s service). @@ -274,37 +276,36 @@ func (l *lbTestHelper) ServiceDefinition(pod *corev1.Pod, annotations map[string func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, error) { l.t.Helper() - ctx := context.Background() - - // Default is 15s interval, 10s timeout, 3 retries => 45 seconds until up - // With these changes it should be 1 seconds until up - // lbSvc.Annotations[string(annotation.LBSvcHealthCheckInterval)] = "1s" - // lbSvc.Annotations[string(annotation.LBSvcHealthCheckTimeout)] = "2s" - // lbSvc.Annotations[string(annotation.LBSvcHealthCheckRetries)] = "1" - // lbSvc.Annotations[string(annotation.LBSvcHealthCheckProtocol)] = "tcp" - - _, err := testCluster.k8sClient.CoreV1().Services(l.namespace).Create(ctx, lbSvc, metav1.CreateOptions{}) + lbSvc, err := testCluster.k8sClient.CoreV1().Services(l.namespace).Create(l.t.Context(), lbSvc, metav1.CreateOptions{}) if err != nil { return nil, fmt.Errorf("could not create service: %s", err) } - err = wait.PollUntilContextTimeout(ctx, 1*time.Second, 5*time.Minute, false, func(ctx context.Context) (done bool, err error) { + ctx, cancel := context.WithTimeout(l.t.Context(), 4*time.Minute) + defer cancel() + + backoffFunc := hcloud.ExponentialBackoff(2.0, time.Second) + retries := 0 + for { svc, err := testCluster.k8sClient.CoreV1().Services(l.namespace).Get(ctx, lbSvc.Name, metav1.GetOptions{}) if err != nil { - return false, err + return nil, fmt.Errorf("error fetching load balancer service: %w", err) } ingressIPs := svc.Status.LoadBalancer.Ingress if len(ingressIPs) > 0 { lbSvc = svc - return true, nil + return lbSvc, nil + } + + select { + case <-ctx.Done(): + return nil, fmt.Errorf("timed out waiting for load balancer service to receive ingress IPs") + case <-time.After(backoffFunc(retries)): + retries++ + continue } - return false, nil - }) - if err != nil { - return nil, fmt.Errorf("test service (load balancer) did not come up after 5 minute: %s", err) } - return lbSvc, nil } // TearDown deletes the created pod and service. @@ -323,12 +324,14 @@ func (l *lbTestHelper) TearDown() { } } -// WaitForHTTPAvailable tries to connect to the given IP via http -// It tries it for 2 minutes, if after two minutes the connection -// wasn't successful and it wasn't a HTTP 200 response it will fail. -func WaitForHTTPAvailable(t *testing.T, ingressIP string, useHTTPS bool) { +// WaitForHTTPAvailable tries to connect to the given IP via HTTP. +// It uses exponential backoff starting at 1s and capping at 30s, +// waiting up to 6 minutes for a successful HTTP 200 response. +func (l *lbTestHelper) WaitForHTTPAvailable(ingressIP string, useHTTPS bool) error { + l.t.Helper() + client := &http.Client{ - Timeout: 1 * time.Second, + Timeout: 5 * time.Second, Transport: &http.Transport{ TLSClientConfig: &tls.Config{ InsecureSkipVerify: true, // nolint @@ -340,24 +343,33 @@ func WaitForHTTPAvailable(t *testing.T, ingressIP string, useHTTPS bool) { proto = "https" } - err := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 4*time.Minute, false, func(_ context.Context) (bool, error) { + ctx, cancel := context.WithTimeout(l.t.Context(), 4*time.Minute) + defer cancel() + + backoffFunc := hcloud.ExponentialBackoff(2.0, time.Second) + retries := 0 + for { resp, err := client.Get(fmt.Sprintf("%s://%s", proto, ingressIP)) if err != nil { - return false, nil + l.t.Log("service still unavailable, keep waiting") + } else { + resp.Body.Close() + switch resp.StatusCode { + case http.StatusOK: + return nil + case http.StatusServiceUnavailable: + l.t.Log("service still unavailable, keep waiting") + default: + return fmt.Errorf("got unexpected HTTP status %d", resp.StatusCode) + } } - defer resp.Body.Close() - switch resp.StatusCode { - case http.StatusOK: - // Success - return true, nil - case http.StatusServiceUnavailable: - // Health checks are still evaluating - return false, nil - default: - return false, fmt.Errorf("got HTTP Code %d instead of 200", resp.StatusCode) + + select { + case <-ctx.Done(): + return fmt.Errorf("timed out after 6m waiting for %s to be available", ingressIP) + case <-time.After(backoffFunc(retries)): + retries++ + continue } - }) - if err != nil { - t.Errorf("%s not available: %s", ingressIP, err) } } diff --git a/tests/e2e/robot_test.go b/tests/e2e/robot_test.go index 66d11581..97d5b088 100644 --- a/tests/e2e/robot_test.go +++ b/tests/e2e/robot_test.go @@ -3,10 +3,10 @@ package e2e import ( - "context" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -20,10 +20,9 @@ func TestRobotClientIsAvailable(t *testing.T) { func TestNodeSetCorrectNodeLabelsAndIPAddressesRobot(t *testing.T) { t.Parallel() - ctx := context.Background() // Get a random Robot server from all Nodes in the cluster - nodes, err := testCluster.k8sClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{ + nodes, err := testCluster.k8sClient.CoreV1().Nodes().List(t.Context(), metav1.ListOptions{ LabelSelector: "instance.hetzner.cloud/is-root-server=true", }) assert.NoError(t, err) @@ -75,8 +74,12 @@ func TestServiceLoadBalancersRobot(t *testing.T) { t: t, podName: "loadbalancer-robot-only", } + t.Cleanup(func() { + lbTest.TearDown() + }) - pod := lbTest.DeployTestPod() + pod, err := lbTest.DeployTestPod() + require.NoError(t, err) lbSvc := lbTest.ServiceDefinition(pod, map[string]string{ string(annotation.LBLocation): "nbg1", @@ -84,10 +87,9 @@ func TestServiceLoadBalancersRobot(t *testing.T) { string(annotation.LBNodeSelector): "instance.hetzner.cloud/is-root-server=true", }) - lbSvc, err := lbTest.CreateService(lbSvc) - if assert.NoError(t, err, "deploying test svc") { - WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, false) - } + lbSvc, err = lbTest.CreateService(lbSvc) + require.NoError(t, err) - lbTest.TearDown() + err = lbTest.WaitForHTTPAvailable(lbSvc.Status.LoadBalancer.Ingress[0].IP, false) + require.NoError(t, err) }