diff --git a/Makefile b/Makefile index 95ff2e0..9e08dd7 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help install-precommit setup-dev lint lint-actions lint-devspace test test-install test-e2e smoke clean verify +.PHONY: help install-precommit setup-dev lint lint-actions lint-devspace lint-yaml lint-helm test test-install test-e2e smoke smoke-gke clean format verify check help: ## Display this help message @echo "Available targets:" @@ -69,6 +69,9 @@ test-e2e: ## Run full ephemeral-cluster DevSpace e2e validation smoke: test-e2e ## Run expensive smoke validation +smoke-gke: ## Run expensive GKE smoke validation + CLUSTER_PROVIDER=gke go run ./tests/e2e/cmd/smoke + clean: ## Clean up generated files @echo "Cleaning up..." rm -rf charts/*/charts/ diff --git a/README.md b/README.md index e924945..ee34a30 100644 --- a/README.md +++ b/README.md @@ -25,347 +25,166 @@ This starter pack provides a complete local Kubernetes development infrastructur ## Prerequisites -### Required Tools +Install these tools before using the local path: -- **DevSpace** (>= v6.0): [Install Guide](https://devspace.sh/docs/getting-started/installation) -- **kubectl**: Kubernetes CLI -- **yq** (>= v4): YAML processor -- **Helm** (>= v3): Package manager for Kubernetes +- **DevSpace** >= v6.0 +- **kubectl** +- **Helm** >= v3 +- **yq** >= v4 -### Supported Kubernetes Platforms +Supported local Kubernetes contexts are Docker Desktop, Minikube, Rancher Desktop, MicroK8s, and +`kind*`. macOS host integration also requires Homebrew and admin privileges for DNS and certificate +trust setup. -- Docker Desktop -- Minikube (edit `DOCKER_CIDR_PREFIX`) - -### macOS-Specific Requirements - -- **Homebrew**: For installing `docker-mac-net-connect` -- **Admin privileges**: Required for DNS configuration and certificate import +GKE setup additionally requires `terraform`, `gcloud`, and `gke-gcloud-auth-plugin`. Managed GKE +deploys that install Config Connector also need `tar` for the official operator bundle. GKE +`ensure-cluster` bootstraps Google login, derives unambiguous billing/org inputs from the logged-in +account, and tells you which `devspace set var ...` command to run when it cannot choose. Run +`devspace run check-tools` for a fast local preflight; it checks tools only and does not +authenticate or install anything. See [GKE setup](docs/gke.md). ## Getting Started -### 1. Clone and Navigate - ```bash git clone cd devspace-starter-pack -``` - -### 2. Deploy Infrastructure -Deploy all infrastructure components: - -```bash devspace deploy ``` -Deploy specific profiles: +DevSpace auto-selects local or GKE profiles from the active kube context. Select the local workflow +explicitly when you want to persist the current local context and DNS defaults: ```bash -# Add databases -devspace deploy --profile local-psql,local-redis - -# Add Grafana -devspace deploy --profile o11y-grafana - -# Add logs and Grafana trace backend addons -devspace deploy --profile o11y-grafana,o11y-addons +devspace run ensure-cluster ``` -### 3. Verify Installation - -Check that all components are running: +Verify the install: ```bash +devspace run test-install kubectl get pods --all-namespaces +devspace run print-cluster-env ``` -Test DNS resolution: +On macOS, use system resolver tools for DNS checks: ```bash dns-sd -q ns.dns.kube ``` -**NOTE**: on macOS, do not rely on `dig` for testing DNS resolution. - -### Ephemeral Smoke Validation - -Run the full advertised deploy path against a throwaway local cluster: - -```bash -make smoke -``` - -This creates an ephemeral `kind` cluster with an isolated kubeconfig, runs `devspace deploy`, runs the -live install diagnostics, and deletes the cluster. The direct e2e target is also available: +`dig` does not exercise the same macOS resolver path. -```bash -make test-e2e -``` +## Common Workflows -Useful local overrides: +Add common optional services: ```bash -E2E_CLUSTER_NAME=my-smoke E2E_KEEP_CLUSTER=1 make smoke -E2E_DEVSPACE_ARGS="--profile o11y-grafana" make smoke -E2E_TIMEOUT=30m E2E_READY_TIMEOUT=10m make smoke -``` - -`E2E_CLUSTER_PROVIDER=kind` is the current default and only implemented provider. `vind` is reserved -as a future provider name. Timeout knobs use Go duration syntax and include `E2E_TIMEOUT`, -`E2E_CLUSTER_CREATE_WAIT`, `E2E_CLEANUP_TIMEOUT`, `E2E_READY_TIMEOUT`, -`E2E_READY_REPORT_INTERVAL`, `E2E_DIAGNOSTIC_TIMEOUT`, and `E2E_TEST_TIMEOUT`. - -## Available Profiles - -| Profile | Description | Components | -|---------|-------------|------------| -| `local-network` | Core networking infrastructure | MetalLB, Istio, Gateway API | -| `local-dns` | DNS integration for development | External DNS, CoreDNS, etcd | -| `local-certs` | Certificate management | cert-manager, trust-manager, reflector | -| `local-aux` | Auxiliary services | Reloader | -| `local-test` | Test applications | httpbin with routes | -| `with-o11y` | Core observability | Prometheus, metrics-server, OpenTelemetry Collector, Jaeger | -| `o11y-grafana` | Grafana UI | Grafana, Grafana HTTPRoute, datasource/dashboard sidecars | -| `o11y-addons` | Extended observability | Alloy, Loki, Tempo, Grafana datasource ConfigMaps | -| `local-psql` | PostgreSQL database | PostgreSQL with persistence | -| `local-redis` | Redis cache | Redis with persistence | -| `local-es` | ElasticSearch | Single-node ElasticSearch | - -## Available Commands +# Databases +devspace deploy --profile local-psql,local-redis -Find all available commands: +# Grafana +devspace deploy --profile o11y-grafana -```bash -devspace list commands +# Grafana, logs, and Tempo +devspace deploy --profile o11y-grafana,o11y-addons ``` -### Network Commands +Manage host DNS and trust integration: ```bash -# Configure host DNS to use cluster DNS for .kube domains devspace run update-cluster-dns - -# Reset DNS configuration devspace run reset-cluster-dns - -# Import cluster root CA certificate to macOS keychain devspace run import-root-ca ``` -### Observability Commands - -The tracing services are `ClusterIP` services by default. Service workloads should use the in-cluster -collector DNS name directly. The Jaeger UI is exposed through the shared local HTTPS gateway at -`https://jaeger.int.kube`. Grafana is available at `https://grafana.int.kube` when the -`o11y-grafana` profile is deployed. +Use GKE instead of a local cluster: ```bash -# Forward OTLP/gRPC and OTLP/HTTP for host-side trace smoke tests -devspace run port-forward-otel -``` - -## Key Features - -### Automatic macOS Integration - -- **Network Connectivity**: Automatically installs and configures `docker-mac-net-connect` for seamless networking -- **DNS Integration**: Configures macOS to resolve `.kube` domains through the cluster DNS -- **Certificate Trust**: Imports cluster CA certificates to macOS keychain for trusted HTTPS - -### HTTP(S) Gateway with Istio - -- `*.int.kube` autowired for Gateway API -- `*.istio.kube` autowired for Istio Ingress -- Gateway API and Istio Ingress support for traffic management -- Automatic TLS termination with custom certificates -- Traffic routing for microservices - -The Istio mesh config defines an optional Gateway API external authorization provider named -`gateway-ext-authz-grpc`. This is only a generic extension point: infra does not install an ext-authz -backend, does not create a `gateway-ext-authz` Service, and does not create an AuthorizationPolicy. -If no app installs an AuthorizationPolicy that uses the provider, the provider is inert. - -Apps that want gateway-level external authorization must install their own ext-authz backend, a -Service alias named `gateway-ext-authz` in the `istio-ingress` namespace on port `3001`, and an -AuthorizationPolicy targeting the Gateway API generated gateway workload: -`gateway.networking.k8s.io/gateway-name=gateway`. - -Example app-side AuthorizationPolicy: - -```yaml -apiVersion: security.istio.io/v1 -kind: AuthorizationPolicy -metadata: - name: example-gateway-ext-authz - namespace: istio-ingress -spec: - selector: - matchLabels: - gateway.networking.k8s.io/gateway-name: gateway - action: CUSTOM - provider: - name: gateway-ext-authz-grpc - rules: - - {} -``` +devspace --var CLUSTER_PROVIDER=gke run ensure-cluster -Example app-side Service alias: - -```yaml -apiVersion: v1 -kind: Service -metadata: - name: gateway-ext-authz - namespace: istio-ingress -spec: - type: ExternalName - externalName: my-ext-authz.my-app-namespace.svc.cluster.local - ports: - - name: grpc - port: 3001 - targetPort: 3001 +devspace deploy --var HOST_INTEGRATION=false ``` -### Certificate Management - -- Complete CA chain (Cluster Root CA → Intermediate CA → Leaf certificates) -- Automatic certificate renewal -- Trust bundle distribution across namespaces -- Custom certificate chain in `charts/cert-chain/` - -### Local DNS Resolution - -- `.kube` domain resolution for all services of type `Loadbalancer` -- External DNS automatically creates DNS records - -### Observability Stack +If your account can see multiple billing accounts or organizations, `ensure-cluster` stops before +Terraform and prints the exact `devspace set var ...` command to disambiguate. -- **OpenTelemetry Collector**: Local OTLP/gRPC and OTLP/HTTP trace receiver for service repositories -- **Jaeger**: Lightweight trace UI with transient in-memory storage -- **Prometheus**: Default local metrics collection and alerting -- **Grafana**: Optional visualization with default local cluster dashboards and dashboard provisioning -- **Loki**: Optional log aggregation -- **Tempo**: Optional distributed tracing backend -- **Alloy**: Optional OpenTelemetry collection - -The default local deployment includes Prometheus and lightweight tracing. Deploy Grafana when a -host-browser UI is needed: +Select an already-prepared GKE cluster by switching to its `gke_*` kube context, setting the +non-derivable deploy inputs, and running the same command: ```bash -devspace deploy --profile o11y-grafana +kubectl config use-context gke_PROJECT_REGION_CLUSTER +devspace set var GKE_DNS_NAMESERVERS=ns-cloud-example1.googledomains.com.,ns-cloud-example2.googledomains.com. +devspace run ensure-cluster ``` -Service repositories can export traces and metrics to the collector with: +Full GKE setup, DNS, IAP, Config Connector, registry, and smoke details are in +[docs/gke.md](docs/gke.md). -```bash -OTEL_SERVICE_NAME= -OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector.observability.svc.cluster.local:4317 -OTEL_EXPORTER_OTLP_PROTOCOL=grpc -``` +## Validation -For OTLP/HTTP exporters, use: +Run the advertised local deploy path against a throwaway cluster: ```bash -OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector.observability.svc.cluster.local:4318 -OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf +make smoke ``` -Open the trace UI at `https://jaeger.int.kube`. In-cluster workloads export to -`otel-collector.observability.svc.cluster.local` without port-forwarding. Grafana reads metrics from -Prometheus, including OTLP metrics remote-written by the collector. The collector preserves resource -attributes as metric labels for local querying, while keeping a single remote-write sender path for -the in-cluster Prometheus receiver. - -Istio gateway and control-plane metrics are scraped directly by Prometheus so the official Istio RED -dashboards keep their upstream metric and label expectations. Istio proxy tracing is sent to the same -OpenTelemetry Collector and Jaeger path with local-only 100% sampling. - -Open Grafana at `https://grafana.int.kube` and log in with `admin` / `admin` -(the local-only credentials configured in `helm-values/grafana.yaml`). - -Deploy Loki, Alloy, and Tempo when logs or Grafana-backed trace exploration are needed: +Run the GKE smoke path: ```bash -devspace deploy --profile o11y-grafana,o11y-addons +make smoke-gke ``` -Grafana discovers additional dashboards and datasources from Kubernetes objects: - -- Dashboards: create a ConfigMap or Secret with label `grafana_dashboard: "1"` and dashboard JSON data. -- Datasources: create a ConfigMap or Secret with label `grafana_datasource: "1"` and Grafana provisioning YAML. -- Optional dashboard folders: set annotation `grafana_folder` on the dashboard ConfigMap or Secret. -- These objects can live in service repository namespaces; the Grafana sidecars watch all namespaces. - -The `o11y-grafana` profile installs starter dashboards in the `Kubernetes` folder for API server, -compute resource, and kubelet/runtime health. - -It also installs upstream Grafana.com dashboards in the `Candidates` folder for comparison: -`Kubernetes Overview` and `OpenTelemetry Collector`. `Kubernetes Overview` is configured as the -Grafana home dashboard for the local instance. - -The `Istio` folder contains official Istio `1.26.2` dashboards for mesh, service, workload, and -control-plane RED drilldowns. - -### Helm Values - -Customize component configurations in `helm-values/`: - -### Certificate Configuration - -Customize the certificate chain in `charts/cert-chain/values.yaml` or create custom values files. - -## Troubleshooting - -### DNS Issues +Run only the install diagnostics for the current context: ```bash -# Check DNS configuration -devspace run reset-cluster-dns -devspace run update-cluster-dns - -# Verify CoreDNS is running -kubectl get pods -n external-dns +devspace run test-install ``` -### Certificate Issues +More test knobs and smoke harness details are in +[docs/devspace-reference.md](docs/devspace-reference.md). -```bash -# Check certificate status -kubectl get certificates --all-namespaces -kubectl describe certificate cluster-root-ca -n cert-manager +## Profiles And Commands -# Re-import root CA -devspace run import-root-ca -``` +DevSpace auto-activates the base local or GKE infrastructure profiles from the kube context. Add +optional profiles only for workloads you want on top: -### Network Connectivity +| Need | Profiles | +|------|----------| +| Local databases | `local-psql`, `local-redis`, `local-es` | +| Metrics, tracing, Jaeger | `with-o11y` plus context-activated `local-o11y` or `gke-o11y` | +| Grafana | `o11y-grafana` (`GKE` auto-activates it; local clusters opt in) | +| Logs and Tempo | `o11y-addons` | -```bash -# Check docker-mac-net-connect status -brew services list | grep docker-mac-net-connect +Useful commands: -# Restart network connectivity -sudo brew services restart chipmk/tap/docker-mac-net-connect +```bash +devspace list commands +devspace run ensure-cluster +devspace --var CLUSTER_PROVIDER=gke run ensure-cluster +devspace run print-cluster-env +devspace run gke-gateway-resources +devspace run gke-dev-registry-info +devspace run port-forward-otel ``` -### LoadBalancer Issues +Starter-pack publishes a non-secret cluster environment contract at +`devspace-system/devspace-starter-pack-env`. App repos can read provider, domain, gateway, and +registry settings directly from the active cluster without a local starter-pack checkout. -```bash -# Check MetalLB status -kubectl get pods -n metallb-system -kubectl get ipaddresspools -n metallb-system -``` +The full profile, command, variable, and smoke-reference tables live in +[docs/devspace-reference.md](docs/devspace-reference.md). -## Development Workflow +## Feature Guides -1. **Deploy Infrastructure**: `devspace deploy --profile local-network,local-certs` -2. **Add DNS** (optional): `devspace deploy --profile local-dns` -3. **Use Metrics/Tracing**: included by default through `with-o11y` on local clusters -4. **Add Grafana** (optional): `devspace deploy --profile o11y-grafana` -5. **Add Logs/Tempo** (optional): `devspace deploy --profile o11y-grafana,o11y-addons` -6. **Deploy Your Applications**: Use the configured Gateway and DNS -7. **Access Services**: Via `*.kube` domains with automatic HTTPS +- [GKE setup and validation](docs/gke.md) +- [Gateway and authz attachment conventions](docs/gateway-authz.md) +- [Observability](docs/observability.md) +- [DevSpace reference](docs/devspace-reference.md) +- [Troubleshooting](docs/troubleshooting.md) + +Customize Helm values in `helm-values/`. Customize the certificate chain in +`charts/cert-chain/values.yaml`. ## Cleanup @@ -381,6 +200,8 @@ Reset macOS DNS configuration: devspace run reset-cluster-dns ``` +For GKE Terraform cleanup, see [docs/gke.md](docs/gke.md). + ## License Licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE) for the full license text. diff --git a/charts/cert-chain/Chart.yaml b/charts/cert-chain/Chart.yaml index 35fe762..3caf9eb 100644 --- a/charts/cert-chain/Chart.yaml +++ b/charts/cert-chain/Chart.yaml @@ -5,4 +5,4 @@ type: application version: 0.1.0 appVersion: "1.0" dependencies: [] -kubeVersion: ">=1.19.0" +kubeVersion: ">=1.19.0-0" diff --git a/charts/gke-iap-protection/Chart.yaml b/charts/gke-iap-protection/Chart.yaml new file mode 100644 index 0000000..adff269 --- /dev/null +++ b/charts/gke-iap-protection/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: gke-iap-protection +description: GKE BackendPolicy resources for IAP-protected development routes +type: application +version: 0.1.0 +appVersion: "0.1.0" diff --git a/charts/gke-iap-protection/templates/gcpbackendpolicies.yaml b/charts/gke-iap-protection/templates/gcpbackendpolicies.yaml new file mode 100644 index 0000000..c70d2da --- /dev/null +++ b/charts/gke-iap-protection/templates/gcpbackendpolicies.yaml @@ -0,0 +1,40 @@ +{{- if eq .Values.protection "iap" -}} +{{- if or (and .Values.oauthClientID (not .Values.oauthClientSecret)) (and .Values.oauthClientSecret (not .Values.oauthClientID)) -}} +{{- fail "custom GKE IAP OAuth requires both oauthClientID and oauthClientSecret; leave both empty to use Google-managed OAuth" -}} +{{- end -}} +{{- $customOAuth := and .Values.oauthClientID .Values.oauthClientSecret -}} +{{- if $customOAuth }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Values.secretName | quote }} +type: Opaque +stringData: + key: {{ .Values.oauthClientSecret | quote }} +--- +{{- end }} +{{- range .Values.targets }} +apiVersion: networking.gke.io/v1 +kind: GCPBackendPolicy +metadata: + name: {{ .policyName | quote }} +spec: + default: + iap: + enabled: true + {{- if $customOAuth }} + oauth2ClientSecret: + name: {{ $.Values.secretName | quote }} + clientID: {{ $.Values.oauthClientID | quote }} + {{- end }} + targetRef: + group: "" + kind: Service + name: {{ .serviceName | quote }} +--- +{{- end }} +{{- else if eq .Values.protection "vpn" -}} +{{- fail "GKE_PROTECTION=vpn is reserved for future private-Gateway/VPN support and is not implemented yet" -}} +{{- else -}} +{{- fail (printf "unsupported GKE_PROTECTION=%s; expected iap or vpn" .Values.protection) -}} +{{- end -}} diff --git a/charts/gke-iap-protection/values.yaml b/charts/gke-iap-protection/values.yaml new file mode 100644 index 0000000..dd77129 --- /dev/null +++ b/charts/gke-iap-protection/values.yaml @@ -0,0 +1,5 @@ +protection: iap +oauthClientID: "" +oauthClientSecret: "" +secretName: iap-oauth-client +targets: [] diff --git a/devspace.yaml b/devspace.yaml index 311c2ee..0759f0a 100644 --- a/devspace.yaml +++ b/devspace.yaml @@ -10,12 +10,64 @@ require: version: ">= 4" vars: + CLUSTER_PROVIDER: + default: "" ISTIO_CHART_VERSION: 1.26.2 GATEWAY_EXT_AUTHZ_PROVIDER_NAME: gateway-ext-authz-grpc GATEWAY_EXT_AUTHZ_SERVICE_NAME: gateway-ext-authz GATEWAY_EXT_AUTHZ_SERVICE_NAMESPACE: istio-ingress GATEWAY_EXT_AUTHZ_SERVICE_PORT: default: 3001 + GATEWAY_NAMESPACE: + default: istio-ingress + DNS_MODE: + default: local + DNS_SERVICE_ID: + default: kube + HOST_INTEGRATION: + default: "true" + GKE_DNS_DOMAIN: + default: gcp.kube + GKE_DNS_NAMESERVERS: + default: "" + GKE_SELECTED_CONTEXT: + default: "" + GKE_PROJECT_ID: + default: "" + GKE_REGION: + default: us-central1 + GKE_GCLOUD_CONFIGURATION: + default: devspace-starter-pack + GKE_GCLOUD_ACCOUNT: + default: "" + GKE_PROTECTION: + default: iap + GKE_IAP_OAUTH_CLIENT_ID: + default: "" + GKE_IAP_OAUTH_CLIENT_SECRET: + default: "" + GKE_IAP_SECRET_NAME: + default: iap-oauth-client + GCP_BILLING_ACCOUNT_ID: + default: "" + GCP_ORG_ID: + default: "" + GCP_FOLDER_ID: + default: "" + DEV_REGISTRY_HOST: + default: "" + DEV_REGISTRY: + default: "" + DEV_REGISTRY_IMAGE_PREFIX: + default: "" + DEV_REGISTRY_REPOSITORY: + default: devspace-dev + DEV_REGISTRY_WRITER_MEMBERS: + default: "" + GKE_IAP_ACCESSOR_MEMBERS: + default: "" + GRAFANA_ROUTE_MANIFEST: + default: ./manifests/grafana-route.yaml METALLB_CHART_VERSION: v0.15.2 POSTGRES_VERSION: 16 POSTGRES_CHART_VERSION: 16.7.20 @@ -233,6 +285,9 @@ profiles: name: cert-manager repo: https://charts.jetstack.io values: + global: + leaderElection: + namespace: cert-manager installCRDs: true config: apiVersion: controller.config.cert-manager.io/v1alpha1 @@ -308,11 +363,220 @@ profiles: - ./manifests/httpbin-istio-vs.yaml - ./manifests/httpbin-service-mesh.yaml + - name: gke-network + description: GKE Gateway API networking without Istio + activation: + - vars: + DEVSPACE_CONTEXT: "^gke_.+" + merge: + vars: + GATEWAY_NAMESPACE: gke-gateway + DNS_MODE: cloud-dns + DNS_SERVICE_ID: gcp-kube + deployments: + gke-gateway: + namespace: gke-gateway + kubectl: + inlineManifest: |- + --- + apiVersion: v1 + kind: Namespace + metadata: + name: gke-gateway + --- + apiVersion: gateway.networking.k8s.io/v1 + kind: Gateway + metadata: + name: gateway + namespace: gke-gateway + spec: + gatewayClassName: gke-l7-regional-external-managed + listeners: + - name: http + hostname: "*.${GKE_DNS_DOMAIN}" + port: 80 + protocol: HTTP + allowedRoutes: + namespaces: + from: All + - name: https + hostname: "*.${GKE_DNS_DOMAIN}" + port: 443 + protocol: HTTPS + tls: + mode: Terminate + certificateRefs: + - name: gateway-tls-secret + allowedRoutes: + namespaces: + from: All + + - name: gke-dns + description: external-dns backed by Google Cloud DNS for GKE + activation: + - vars: + DEVSPACE_CONTEXT: "^gke_.+" + merge: + vars: + DNS_MODE: cloud-dns + DNS_SERVICE_ID: gcp-kube + deployments: + external-dns: + namespace: external-dns + helm: + chart: + name: external-dns + version: 1.21.1 + repo: https://kubernetes-sigs.github.io/external-dns/ + values: + provider: + name: google + policy: sync + registry: txt + txtOwnerId: ${GKE_PROJECT_ID} + domainFilters: + - ${GKE_DNS_DOMAIN} + sources: + - gateway-httproute + serviceAccount: + create: true + name: external-dns + annotations: + iam.gke.io/gcp-service-account: external-dns@${GKE_PROJECT_ID}.iam.gserviceaccount.com + rbac: + create: true + logLevel: debug + extraArgs: + google-project: ${GKE_PROJECT_ID} + gateway-namespace: gke-gateway + gateway-name: gateway + + - name: gke-certs + description: Internal CA and trusted TLS for GKE Gateway + activation: + - vars: + DEVSPACE_CONTEXT: "^gke_.+" + merge: + vars: + GATEWAY_NAMESPACE: gke-gateway + DNS_MODE: cloud-dns + DNS_SERVICE_ID: gcp-kube + deployments: + cert-manager: + namespace: cert-manager + helm: + chart: + name: cert-manager + repo: https://charts.jetstack.io + values: + installCRDs: true + global: + leaderElection: + namespace: cert-manager + config: + apiVersion: controller.config.cert-manager.io/v1alpha1 + kind: ControllerConfiguration + enableGatewayAPI: true + trust-manager: + namespace: cert-manager + helm: + chart: + name: trust-manager + repo: https://charts.jetstack.io + reflector: + namespace: reflector + helm: + chart: + name: oci://ghcr.io/emberstack/helm-charts/reflector + cert-chain: + namespace: cert-manager + helm: + chart: + name: cert-chain + path: ./charts/cert-chain + values: + issuers: + intermediate: + namespace: gke-gateway + gatewayCert: + enabled: true + dnsNames: + - "*.${GKE_DNS_DOMAIN}" + + - name: gke-test + description: GKE raw httpbin authz/plugin test route + merge: + vars: + GATEWAY_NAMESPACE: gke-gateway + DNS_MODE: cloud-dns + DNS_SERVICE_ID: gcp-kube + deployments: + gke-httpbin-namespace: + namespace: httpbin + kubectl: + inlineManifest: |- + --- + apiVersion: v1 + kind: Namespace + metadata: + name: httpbin + gke-httpbin-app: + namespace: httpbin + kubectl: + manifests: + - https://raw.githubusercontent.com/istio/istio/release-1.26/samples/httpbin/httpbin.yaml + gke-httpbin-route: + namespace: httpbin + kubectl: + inlineManifest: |- + --- + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: https + namespace: httpbin + spec: + parentRefs: + - name: gateway + namespace: gke-gateway + sectionName: https + hostnames: + - "httpbin.${GKE_DNS_DOMAIN}" + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: httpbin + port: 8000 + --- + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: http-redirect + namespace: httpbin + spec: + parentRefs: + - name: gateway + namespace: gke-gateway + sectionName: http + hostnames: + - "httpbin.${GKE_DNS_DOMAIN}" + rules: + - filters: + - type: RequestRedirect + requestRedirect: + scheme: https + statusCode: 301 + - name: with-o11y - description: Cluster observability with Prometheus metrics and lightweight tracing + description: Shared observability base with Prometheus metrics and lightweight tracing activation: - vars: DEVSPACE_CONTEXT: "^(kind(?:-.+)?|docker-desktop|minikube|rancher-desktop|microk8s)$" + - vars: + DEVSPACE_CONTEXT: "^gke_.+" merge: deployments: prometheus: @@ -323,15 +587,6 @@ profiles: repo: https://prometheus-community.github.io/helm-charts valuesFiles: - ./helm-values/prometheus.yaml - metric-server: - namespace: kube-system - helm: - chart: - name: metrics-server - repo: https://kubernetes-sigs.github.io/metrics-server - values: - args: - - --kubelet-insecure-tls jaeger: namespace: observability helm: @@ -352,6 +607,23 @@ profiles: valuesFiles: - ./helm-values/otel-collector.yaml + - name: local-o11y + description: Local observability overlay for metrics-server, Istio monitors, and local routes + activation: + - vars: + DEVSPACE_CONTEXT: "^(kind(?:-.+)?|docker-desktop|minikube|rancher-desktop|microk8s)$" + merge: + deployments: + metric-server: + namespace: kube-system + helm: + chart: + name: metrics-server + repo: https://kubernetes-sigs.github.io/metrics-server + values: + args: + - --kubelet-insecure-tls + jaeger-route: namespace: observability kubectl: @@ -364,8 +636,88 @@ profiles: manifests: - ./manifests/istio-metrics-monitors.yaml + - name: gke-o11y + description: GKE observability overlay for managed metrics, routes, and IAP + activation: + - vars: + DEVSPACE_CONTEXT: "^gke_.+" + merge: + vars: + GRAFANA_ROUTE_MANIFEST: ./manifests/grafana-route-gke.yaml + deployments: + prometheus: + helm: + values: + defaultRules: + rules: + kubeControllerManager: false + kubelet: false + kubeProxy: false + kubeSchedulerAlerting: false + kubeSchedulerRecording: false + kubelet: + enabled: false + kubeControllerManager: + enabled: false + kubeScheduler: + enabled: false + kubeEtcd: + enabled: false + kubeProxy: + enabled: false + coreDns: + enabled: false + kubeDns: + enabled: false + prometheusOperator: + kubeletService: + enabled: false + jaeger-route: + namespace: observability + kubectl: + manifests: + - ./manifests/jaeger-route-gke.yaml + gke-o11y-protection: + namespace: observability + helm: + chart: + name: gke-iap-protection + path: ./charts/gke-iap-protection + values: + protection: ${GKE_PROTECTION} + oauthClientID: ${GKE_IAP_OAUTH_CLIENT_ID} + oauthClientSecret: ${GKE_IAP_OAUTH_CLIENT_SECRET} + secretName: ${GKE_IAP_SECRET_NAME} + targets: + - policyName: jaeger-iap + serviceName: jaeger + - policyName: grafana-iap + serviceName: grafana + grafana-health-check: + namespace: observability + kubectl: + inlineManifest: |- + apiVersion: networking.gke.io/v1 + kind: HealthCheckPolicy + metadata: + name: grafana + spec: + default: + config: + type: HTTP + httpHealthCheck: + portSpecification: USE_SERVING_PORT + requestPath: /api/health + targetRef: + group: "" + kind: Service + name: grafana + - name: o11y-grafana - description: Grafana UI for local observability + description: Grafana UI for observability + activation: + - vars: + DEVSPACE_CONTEXT: "^gke_.+" merge: deployments: grafana: @@ -381,7 +733,7 @@ profiles: namespace: observability kubectl: manifests: - - ./manifests/grafana-route.yaml + - ${GRAFANA_ROUTE_MANIFEST} grafana-datasource-prometheus: namespace: observability @@ -509,17 +861,31 @@ hooks: - name: update-cluster-dns-hook-darwin os: darwin - command: devspace run update-cluster-dns + command: | + if [ "${HOST_INTEGRATION}" != "true" ]; then + echo >&2 "I: Skipping host DNS integration because HOST_INTEGRATION=${HOST_INTEGRATION}" + exit 0 + fi + devspace run update-cluster-dns events: ["after:deploy:cert-chain"] - name: update-cluster-dns-hook-linux os: linux - command: devspace run update-cluster-dns-linux + command: | + if [ "${HOST_INTEGRATION}" != "true" ]; then + echo >&2 "I: Skipping host DNS integration because HOST_INTEGRATION=${HOST_INTEGRATION}" + exit 0 + fi + devspace run update-cluster-dns-linux events: ["after:deploy:cert-chain"] - name: cert-chain-hook-darwin os: darwin command: | + if [ "${HOST_INTEGRATION}" != "true" ]; then + echo >&2 "I: Skipping host CA import because HOST_INTEGRATION=${HOST_INTEGRATION}" + exit 0 + fi while ! devspace run import-root-ca; do echo >&2 "I: Waiting for root CA to be available..." sleep 5 @@ -530,6 +896,10 @@ hooks: - name: cert-chain-hook-linux os: linux command: | + if [ "${HOST_INTEGRATION}" != "true" ]; then + echo >&2 "I: Skipping host CA import because HOST_INTEGRATION=${HOST_INTEGRATION}" + exit 0 + fi while ! devspace run import-root-ca-linux; do echo >&2 "I: Waiting for root CA to be available..." sleep 5 @@ -555,8 +925,15 @@ hooks: pipelines: deploy: run: |- + devspace run check-tools + ./scripts/devspace-cluster-backend.sh validate-provider + ./scripts/devspace-cluster-backend.sh publish-cluster-env + devspace run install-config-connector + PHASES=( - "etcd gateway-api-crds" + # Phase entries are filtered against enabled deployments, so profile-only + # names like gke-gateway are no-ops unless their profile is active. + "etcd gateway-api-crds gke-gateway" "cert-manager metallb reflector prometheus" "metallb-resources" "cert-chain trust-manager" @@ -590,6 +967,31 @@ pipelines: fi commands: + check-tools: + description: Check required local tools for the active cluster provider + section: network + command: HOST_INTEGRATION="${HOST_INTEGRATION}" DNS_MODE="${DNS_MODE}" DOCKER_CIDR_PREFIX="${DOCKER_CIDR_PREFIX}" ./scripts/check-tools.sh + + ensure-cluster: + description: Ensure/select a development cluster backend + section: network + command: ./scripts/devspace-cluster-backend.sh ensure + + test-install: + description: Run install diagnostics for the current cluster provider + section: network + command: ./scripts/devspace-cluster-backend.sh test-install + + install-config-connector: + description: Install Config Connector when enabled by the cluster contract + section: network + command: ./scripts/config-connector.sh install + + print-cluster-env: + description: Print the cluster environment contract + section: network + command: ./scripts/devspace-cluster-backend.sh print-cluster-env + docker-mac-net-connect: description: Connect Docker for Mac to the cluster section: network @@ -610,56 +1012,61 @@ commands: update-cluster-dns: description: Make external cluster DNS available on the host section: network - command: | - echo >&2 "I: Updating DNS settings..." - sudo scutil <&2 "I: Updating systemd-resolved DNS settings..." - LINK="$(ip route get "${DNS_IP}" | awk '{for (i = 1; i <= NF; i++) if ($i == "dev") {print $(i + 1); exit}}')" - if [ -z "${LINK}" ]; then - echo >&2 "E: Could not determine Linux route interface for ${DNS_IP}" - exit 1 - fi - sudo resolvectl dns "${LINK}" "${DNS_IP}" - sudo resolvectl domain "${LINK}" "~kube" - sudo resolvectl flush-caches + command: ./scripts/cluster-dns.sh update-linux "${DNS_MODE}" "${DNS_SERVICE_ID}" "${GKE_DNS_DOMAIN}" "${GKE_DNS_NAMESERVERS}" "${DOCKER_CIDR_PREFIX}" reset-cluster-dns: description: Reset DNS service section: network - command: | - echo >&2 "I: Resetting DNS..." - sudo scutil <&2 "I: Resetting systemd-resolved DNS settings..." - LINK="$(ip route get "${DNS_IP}" | awk '{for (i = 1; i <= NF; i++) if ($i == "dev") {print $(i + 1); exit}}')" - if [ -z "${LINK}" ]; then - echo >&2 "E: Could not determine Linux route interface for ${DNS_IP}" - exit 1 + GATEWAY_NAME="${GATEWAY_NAME:-gateway}" + GATEWAY_NS="${GATEWAY_NAMESPACE:-gke-gateway}" + echo "gateway=${GATEWAY_NS}/${GATEWAY_NAME}" + echo "region=${GKE_REGION}" + echo "address=$(kubectl -n "${GATEWAY_NS}" get gateway "${GATEWAY_NAME}" -o go-template='{{ range .status.addresses }}{{ .value }}{{ "\n" }}{{ end }}' | head -n1)" + echo "forwarding_rules=$(kubectl -n "${GATEWAY_NS}" get gateway "${GATEWAY_NAME}" -o go-template='{{ index .metadata.annotations "networking.gke.io/forwarding-rules" }}')" + echo "backend_services=$(kubectl -n "${GATEWAY_NS}" get gateway "${GATEWAY_NAME}" -o go-template='{{ index .metadata.annotations "networking.gke.io/backend-services" }}')" + echo "url_maps=$(kubectl -n "${GATEWAY_NS}" get gateway "${GATEWAY_NAME}" -o go-template='{{ index .metadata.annotations "networking.gke.io/url-maps" }}')" + + gke-dev-registry-info: + description: Print GKE dev image registry configuration + section: network + command: | + HOST="${DEV_REGISTRY_HOST}" + REGISTRY="${DEV_REGISTRY}" + PREFIX="" + if [ -z "${HOST}" ]; then + HOST="${GKE_REGION}-docker.pkg.dev" + fi + if [ -z "${REGISTRY}" ]; then + if [ -z "${GKE_PROJECT_ID}" ]; then + echo >&2 "E: GKE registry configuration is missing." + echo >&2 "I: Set the required GKE vars, then run 'devspace --var CLUSTER_PROVIDER=gke run ensure-cluster'." + exit 1 + fi + REGISTRY="${HOST}/${GKE_PROJECT_ID}/${DEV_REGISTRY_REPOSITORY}" + fi + if [ -z "${PREFIX}" ]; then + PREFIX="${REGISTRY}" fi - sudo resolvectl revert "${LINK}" - sudo resolvectl flush-caches + echo "export DEV_REGISTRY_HOST=${HOST}" + echo "export DEV_REGISTRY=${REGISTRY}" + echo "export DEV_REGISTRY_IMAGE_PREFIX=${PREFIX}" + echo "gcloud auth configure-docker ${HOST}" import-root-ca: description: Import the root CA certificate of the cluster @@ -670,8 +1077,7 @@ commands: trap 'rm -f "${CERTFILE}"' EXIT echo >&2 "I: Extracting Root CA certificate..." - kubectl get secret -n istio-ingress cluster-root-ca-secret \ - -o jsonpath='{.data.tls\.crt}' | base64 -d > "${CERTFILE}" + kubectl get secret -n "${GATEWAY_NAMESPACE}" cluster-root-ca-secret -o jsonpath='{.data.tls\.crt}' | base64 -d > "${CERTFILE}" if [ ! -s "${CERTFILE}" ]; then echo >&2 "E: Failed to extract certificate or certificate is empty" exit 1 @@ -693,8 +1099,7 @@ commands: trap 'rm -f "${CERTFILE}"' EXIT echo >&2 "I: Extracting Root CA certificate..." - kubectl get secret -n istio-ingress cluster-root-ca-secret \ - -o jsonpath='{.data.tls\.crt}' | base64 -d > "${CERTFILE}" + kubectl get secret -n "${GATEWAY_NAMESPACE}" cluster-root-ca-secret -o jsonpath='{.data.tls\.crt}' | base64 -d > "${CERTFILE}" if [ ! -s "${CERTFILE}" ]; then echo >&2 "E: Failed to extract certificate or certificate is empty" exit 1 diff --git a/docs/devspace-reference.md b/docs/devspace-reference.md new file mode 100644 index 0000000..186c30c --- /dev/null +++ b/docs/devspace-reference.md @@ -0,0 +1,148 @@ +# DevSpace Reference + +## Profiles + +| Profile | Description | Components | +|---------|-------------|------------| +| `local-network` | Core local networking | MetalLB, Istio, Gateway API | +| `local-dns` | Local DNS integration | External DNS, CoreDNS, etcd | +| `local-certs` | Local certificate management | cert-manager, trust-manager, reflector | +| `local-aux` | Local auxiliary services | Reloader | +| `local-test` | Local test applications | httpbin with routes | +| `gke-network` | GKE networking | GKE Gateway API without Istio or MetalLB | +| `gke-dns` | GKE DNS integration | Cloud DNS-backed external-dns with Workload Identity | +| `gke-certs` | GKE certificate management | cert-manager, trust-manager, reflector, starter-pack CA | +| `gke-test` | GKE test applications | raw httpbin authz/plugin route on `*.gcp.kube` | +| `with-o11y` | Shared observability base | Prometheus, OpenTelemetry Collector, Jaeger | +| `local-o11y` | Local observability overlay | metrics-server, local Jaeger route, Istio metrics monitors | +| `gke-o11y` | GKE observability overlay | GKE Prometheus overrides, GKE Jaeger route, IAP policies | +| `o11y-grafana` | Grafana UI | Grafana, route, datasources, dashboards | +| `o11y-addons` | Extended observability | Alloy, Loki, Tempo, Grafana datasource ConfigMaps | +| `local-psql` | PostgreSQL database | PostgreSQL with persistence | +| `local-redis` | Redis cache | Redis with persistence | +| `local-es` | ElasticSearch | Single-node ElasticSearch | + +Base local and GKE infrastructure profiles activate from the active kube context. Observability uses +the same pattern: `with-o11y` is the shared base, then `local-o11y` or `gke-o11y` activates from the +context. Do not persist `CLUSTER_PROVIDER`; use it only as an explicit setup/test selector. + +## Commands + +List all commands: + +```bash +devspace list commands +``` + +Common commands: + +```bash +devspace run check-tools +devspace run ensure-cluster +devspace --var CLUSTER_PROVIDER=gke run ensure-cluster +devspace run test-install +devspace run print-cluster-env +devspace run install-config-connector +devspace run update-cluster-dns +devspace run reset-cluster-dns +devspace run import-root-ca +devspace run gke-gateway-resources +devspace run gke-dev-registry-info +devspace run port-forward-otel +``` + +## Tool Inventory + +`devspace run check-tools` is a fast local preflight. It checks commands on `PATH` for the active +kube context and does not authenticate, install tools, contact cloud APIs, run `terraform init`, or +check registry login state. + +| Scenario | Tools | +|----------|-------| +| Core deploy | `devspace`, `kubectl`, `helm`, `yq` | +| Local deploy with inferred Docker CIDR | `docker` unless `DOCKER_CIDR_PREFIX` is set | +| Local host integration on macOS | `brew`, `scutil`, `security`, `openssl`, `base64` | +| Local host integration on Linux | `resolvectl`, `ip`, `openssl`, `base64` | +| Managed or prepared GKE deploy | `gcloud`, `terraform`, `gke-gcloud-auth-plugin` | +| GKE host DNS integration | `dig` when `HOST_INTEGRATION=true` and `DNS_MODE=cloud-dns` | +| Managed GKE Config Connector install | `tar` for the official operator bundle | +| Smoke tests | `go`; local smoke also needs `kind` | +| Chart tests | Helm `unittest` plugin | +| Optional GKE registry smoke | `docker` plus authenticated Artifact Registry access | + +## Variables + +Frequently used variables: + +| Variable | Purpose | +|----------|---------| +| `CLUSTER_PROVIDER` | Setup/test selector. Supported values: `local`, `gke`. | +| `HOST_INTEGRATION` | Set `false` to skip host DNS and CA hooks. | +| `DNS_MODE` | Persisted selection output: `local` or `cloud-dns`. | +| `DNS_SERVICE_ID` | Host DNS service ID for resolver configuration. | +| `GKE_PROJECT_ID` | Selected GKE project. | +| `GKE_REGION` | Selected GKE region. | +| `GKE_GCLOUD_CONFIGURATION` | gcloud configuration used by GKE `ensure-cluster`; default `devspace-starter-pack`. | +| `GKE_GCLOUD_ACCOUNT` | Optional gcloud account for GKE auth bootstrap. | +| `GCP_BILLING_ACCOUNT_ID` | Managed GKE billing account; derived when exactly one open account is visible. | +| `GCP_ORG_ID` | Managed GKE organization parent; derived when exactly one org is visible and no folder is set. | +| `GCP_FOLDER_ID` | Optional managed GKE folder parent; takes precedence over `GCP_ORG_ID`. | +| `GKE_DNS_DOMAIN` | GKE DNS suffix, default `gcp.kube`. | +| `GKE_DNS_NAMESERVERS` | Cloud DNS authoritative nameservers. | +| `GKE_SELECTED_CONTEXT` | Persisted selected GKE kube context; deploy fails if it differs from the active context. | +| `GKE_PROTECTION` | GKE protected-route mode. Default `iap`; `vpn` is future work. | +| `CONFIG_CONNECTOR_ENABLED` | Published `true` when managed GKE selected Config Connector metadata is available. | +| `CONFIG_CONNECTOR_SERVICE_ACCOUNT` | Config Connector cluster-mode Google service account for managed GKE. | +| `DEV_REGISTRY` | GKE Artifact Registry Docker repository path. | +| `DEV_REGISTRY_IMAGE_PREFIX` | Image prefix consumed by app repos. | + +## Cluster Environment Contract + +Starter-pack publishes non-secret topology values into the active cluster: + +```bash +kubectl get configmap -n devspace-system devspace-starter-pack-env -o yaml +devspace run print-cluster-env +``` + +Downstream repos can read the contract without invoking starter-pack through `-p with-infra`: + +```bash +kubectl get configmap -n devspace-system devspace-starter-pack-env \ + -o jsonpath='{.data.DEPLOYMENT_DOMAIN}' +``` + +The v1 contract is owned by starter-pack and uses ConfigMap keys such as `CLUSTER_PROVIDER`, +`DEPLOYMENT_DOMAIN`, `DNS_DOMAIN`, `DNS_MODE`, `DNS_SERVICE_ID`, `GATEWAY_NAMESPACE`, +`DEV_REGISTRY_IMAGE_PREFIX`, `GKE_PROJECT_ID`, `GKE_REGION`, `GKE_PROTECTION`, +`CONFIG_CONNECTOR_ENABLED`, `CONFIG_CONNECTOR_MODE`, `CONFIG_CONNECTOR_PROJECT_ID`, and +`CONFIG_CONNECTOR_SERVICE_ACCOUNT`. It contains topology only, never credentials, OAuth secrets, +service account keys, or image pull secrets. Missing or incomplete GKE contract data is actionable: +run `devspace --var CLUSTER_PROVIDER=gke run ensure-cluster` from starter-pack. + +## Smoke Tests + +Local smoke: + +```bash +make smoke +make test-e2e +``` + +Useful local overrides: + +```bash +E2E_CLUSTER_NAME=my-smoke E2E_KEEP_CLUSTER=1 make smoke +E2E_DEVSPACE_ARGS="--profile o11y-grafana" make smoke +E2E_TIMEOUT=30m E2E_READY_TIMEOUT=10m make smoke +``` + +GKE smoke: + +```bash +make smoke-gke +``` + +Timeout knobs use Go duration syntax and include `E2E_TIMEOUT`, `E2E_CLUSTER_CREATE_WAIT`, +`E2E_CLEANUP_TIMEOUT`, `E2E_READY_TIMEOUT`, `E2E_READY_REPORT_INTERVAL`, +`E2E_DIAGNOSTIC_TIMEOUT`, and `E2E_TEST_TIMEOUT`. diff --git a/docs/gateway-authz.md b/docs/gateway-authz.md new file mode 100644 index 0000000..9791f22 --- /dev/null +++ b/docs/gateway-authz.md @@ -0,0 +1,107 @@ +# Gateway And Authz Attachment + +Starter-pack provides route and gateway surfaces, but plugin or application repos own their authz +backend, policy attachment, tests, and cleanup. + +## Local Istio Gateway + +Local clusters expose: + +- `*.int.kube` through Gateway API +- `*.istio.kube` through Istio Ingress +- HTTPS termination with starter-pack certificates + +The Istio mesh config defines an optional Gateway API external authorization provider named +`gateway-ext-authz-grpc`. It is inert until an app installs an AuthorizationPolicy that uses it. +Starter-pack does not install an ext-authz backend, create a `gateway-ext-authz` Service, or create a +default reject-all AuthorizationPolicy. + +Apps that want gateway-level external authorization should install: + +- an ext-authz backend +- a Service alias named `gateway-ext-authz` in namespace `istio-ingress` +- port `3001`, using Envoy `ext_authz` gRPC +- an AuthorizationPolicy targeting the Gateway-generated gateway workload label + `gateway.networking.k8s.io/gateway-name=gateway` + +Example AuthorizationPolicy: + +```yaml +apiVersion: security.istio.io/v1 +kind: AuthorizationPolicy +metadata: + name: example-gateway-ext-authz + namespace: istio-ingress +spec: + selector: + matchLabels: + gateway.networking.k8s.io/gateway-name: gateway + action: CUSTOM + provider: + name: gateway-ext-authz-grpc + rules: + - {} +``` + +Example Service alias: + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: gateway-ext-authz + namespace: istio-ingress +spec: + type: ExternalName + externalName: my-ext-authz.my-app-namespace.svc.cluster.local + ports: + - name: grpc + port: 3001 + targetPort: 3001 +``` + +## GKE Gateway + +The GKE target exposes `*.gcp.kube` through a regional external managed GKE Gateway and Cloud DNS. +It prepares the project for Google Cloud Service Extensions and Network Security authorization +policies, but it does not attach an authorization callout by default. + +The stable GKE authz backend convention mirrors the local attachment surface: + +- backend namespace: owned by the plugin repo +- backend Service name: `gateway-ext-authz` +- backend Service port: `3001` +- protocol: Envoy `ext_authz` gRPC, imported as `wireFormat: EXT_AUTHZ_GRPC` +- health expectation: the Service has ready endpoints before a plugin imports an authorization + extension pointing at it +- default attachment state: no `AuthzExtension` and no Network Security authz policy + +`httpbin.gcp.kube` is the raw authz/plugin development surface and is intentionally not IAP-protected. + +Plugin repos should discover the active provider, deployment domain, Gateway namespace, and GKE +registry prefix from the starter-pack-owned ConfigMap when it exists: + +```bash +kubectl get configmap -n devspace-system devspace-starter-pack-env \ + -o jsonpath='{.data.DEPLOYMENT_DOMAIN}' +``` + +On GKE, a missing contract should be treated as an actionable setup failure: run +`devspace --var CLUSTER_PROVIDER=gke run ensure-cluster` from starter-pack. + +Plugin repos that want GKE gateway-level authorization should deploy their backend, create or reuse +the Google Cloud backend service for that Service, import an `AuthzExtension`, and import a scoped +Network Security authz policy for a chosen host such as `httpbin.gcp.kube`. + +Discover Gateway-generated load balancer resources: + +```bash +devspace run gke-gateway-resources +``` + +Reusable opt-in templates live in `gcp-templates/`: + +- `gke-authz-extension.yaml` +- `gke-authz-policy.yaml` + +Starter-pack does not import those templates in any default profile. diff --git a/docs/gke.md b/docs/gke.md new file mode 100644 index 0000000..c946873 --- /dev/null +++ b/docs/gke.md @@ -0,0 +1,254 @@ +# GKE Setup And Validation + +The GKE target creates or selects a development cluster that uses GKE Gateway API, Cloud DNS, +Artifact Registry, Workload Identity, and the starter-pack certificate chain. It does not deploy +Istio, MetalLB, CoreDNS, or etcd. + +## Prerequisites + +- `terraform` +- `gcloud` +- `gke-gcloud-auth-plugin` +- Google user account with permission to create projects, link billing, and create IAM/service resources +- Access to one open billing account +- Access to one GCP organization, or an explicit folder ID + +`ensure-cluster` validates and bootstraps Google auth before Terraform runs. It uses a gcloud +configuration named `devspace-starter-pack` by default, validates both gcloud CLI credentials and +Application Default Credentials, and runs an interactive browser login when either credential store +is missing or expired. + +Manual fallback: + +```bash +gcloud config configurations activate devspace-starter-pack +gcloud config set account ACCOUNT +gcloud auth login ACCOUNT --update-adc +``` + +Terraform uses Application Default Credentials. `gcloud` commands and the GKE kubectl auth plugin +use gcloud CLI credentials, so both stores must be valid. Override the config or account with +`GKE_GCLOUD_CONFIGURATION` and `GKE_GCLOUD_ACCOUNT` when needed. + +After login, `ensure-cluster` derives `GCP_BILLING_ACCOUNT_ID` from the single visible open billing +account and `GCP_ORG_ID` from the single visible organization when those values are unambiguous. If +there are none or several, it stops before Terraform and prints the exact `devspace set var ...` +command to run. `GCP_FOLDER_ID` is never guessed; set it explicitly when the project should live in +a folder. + +## Managed GKE Cluster + +The managed path converges the Terraform in `infra/gcp-ephemeral`, selects the resulting GKE kube +context, and persists the DevSpace variables that normal deploys need. + +```bash +devspace --var CLUSTER_PROVIDER=gke run ensure-cluster +``` + +`GKE_REGION` defaults to `us-central1`; set it only when you want a different region: + +```bash +devspace set var GKE_REGION=us-east1 +``` + +Use `GCP_FOLDER_ID` when the project parent is a folder: + +```bash +devspace set var GCP_FOLDER_ID=FOLDER_ID +``` + +If both `GCP_FOLDER_ID` and `GCP_ORG_ID` are set, Terraform uses the folder. The command also reads +existing values from `infra/gcp-ephemeral/terraform.tfvars` when present. + +Terraform creates: + +- a dedicated GCP project +- an Autopilot GKE cluster +- a public Cloud DNS zone for `gcp.kube.` +- Workload Identity wiring for `external-dns` +- an Artifact Registry Docker repository for development images +- IAP API and project-scope accessor IAM for protected human-facing routes +- a Config Connector controller service account and Workload Identity binding + +Useful Terraform inputs: + +```hcl +dev_registry_writer_members = [ + "user:developer@example.com", + "group:platform@example.com", + "serviceAccount:ci@example-project.iam.gserviceaccount.com", +] + +iap_accessor_members = [ + "user:developer@example.com", + "group:platform@example.com", +] +``` + +When `GKE_PROTECTION=iap`, `ensure-cluster` grants browser access to the active `gcloud` account if +neither `GKE_IAP_ACCESSOR_MEMBERS` nor `iap_accessor_members` is set. Set either value explicitly to +grant a different user, group, or service account. + +## Prepared Or External GKE Cluster + +Use the same `ensure-cluster` command when Terraform has already prepared the project, registry, DNS +zone, and Gateway-compatible cluster, or when you are selecting an external compatible GKE cluster. +Switch to the target `gke_*` kube context first. If the context matches the Terraform outputs, +DevSpace reconverges Terraform and refreshes outputs. If it does not match, DevSpace treats the +cluster as external and does not run Terraform. + +```bash +kubectl config use-context gke_PROJECT_REGION_CLUSTER +devspace set var GKE_DNS_NAMESERVERS=ns-cloud-example1.googledomains.com.,ns-cloud-example2.googledomains.com. +devspace run ensure-cluster +``` + +`ensure-cluster` persists `GKE_SELECTED_CONTEXT` with the selected kube context. Later +`devspace deploy`, `devspace build`, and `devspace run` commands reuse the selected GKE context, +registry, DNS, and gateway settings. If the current GKE context differs from +`GKE_SELECTED_CONTEXT`, deploy fails early and asks you to rerun +`devspace --var CLUSTER_PROVIDER=gke run ensure-cluster`. + +`ensure-cluster` also publishes the non-secret cluster environment contract in +`devspace-system/devspace-starter-pack-env`. Plugin repos can discover `DEPLOYMENT_DOMAIN`, +`GKE_PROJECT_ID`, `GKE_REGION`, `GKE_PROTECTION`, `GATEWAY_NAMESPACE`, and +`DEV_REGISTRY_IMAGE_PREFIX` from the cluster alone: + +```bash +kubectl get configmap -n devspace-system devspace-starter-pack-env \ + -o jsonpath='{.data.DEV_REGISTRY_IMAGE_PREFIX}' +``` + +## Routes And Protection + +GKE app routes use HTTPS backend routes plus HTTP-to-HTTPS redirects. The default protection mode is +`GKE_PROTECTION=iap` for human/shared routes such as `jaeger.gcp.kube` and `grafana.gcp.kube`. +Starter-pack applies per-Service `GCPBackendPolicy` resources; IAP is not a global Gateway switch. + +By default, IAP uses Google-managed OAuth, so no OAuth client ID or secret is required: + +```bash +devspace deploy +``` + +On a GKE kube context, DevSpace activates `with-o11y`, `gke-o11y`, and `o11y-grafana` +automatically. `with-o11y` provides the common Prometheus, Jaeger, and OpenTelemetry Collector +deployments; `gke-o11y` adds the GKE route, managed-cluster Prometheus overrides, and IAP policies. + +Custom OAuth clients are only for advanced branding or external-user requirements. If used, provide +both `GKE_IAP_OAUTH_CLIENT_ID` and `GKE_IAP_OAUTH_CLIENT_SECRET`; setting only one fails during Helm +rendering. `GKE_PROTECTION=vpn` is reserved for future private Gateway access and fails clearly. + +IAP browser access is IAM-controlled separately from GCP project access. Managed GKE setup defaults +the accessor list to the active `gcloud` account; add more accessors with +`GKE_IAP_ACCESSOR_MEMBERS` or `iap_accessor_members`. + +`httpbin.gcp.kube` is intentionally not IAP-protected. It is the raw authz/plugin test surface, so +headers such as `Authorization: Bearer ...` reach the Gateway/authz/backend path. + +## Config Connector + +Managed GKE installs Config Connector in cluster mode during `devspace deploy`. The operator is +installed from Google's official bundle, then starter-pack applies one cluster-wide +`ConfigConnector` resource using the Terraform-created Google service account. + +The default IAM role set is intentionally broad for this ephemeral developer project, so downstream +repos can apply Google Cloud resources as Kubernetes manifests without adding Terraform. Override +`config_connector_iam_roles` in `infra/gcp-ephemeral/terraform.tfvars` only when you want to narrow +or expand that controller identity. + +No service account keys, Kubernetes Secrets, OAuth secrets, or `imagePullSecrets` are created for +Config Connector. Authentication uses Workload Identity Federation for GKE: + +```text +cnrm-system/cnrm-controller-manager -> config-connector@PROJECT_ID.iam.gserviceaccount.com +``` + +Downstream repos can discover whether Config Connector is available from the cluster contract: + +```bash +kubectl get configmap -n devspace-system devspace-starter-pack-env \ + -o jsonpath='{.data.CONFIG_CONNECTOR_ENABLED}' +``` + +Config Connector resources must still identify the target project. Use either a namespace annotation +or an annotation on each resource: + +```yaml +metadata: + annotations: + cnrm.cloud.google.com/project-id: PROJECT_ID +``` + +This is not enabled automatically for external GKE clusters. External clusters can still publish +compatible metadata if their operator and IAM setup are managed separately. + +## Dev Image Registry + +GKE app repos should push local developer/test images to the ephemeral project's Artifact Registry +Docker repository. GHCR remains for CI-published and release artifacts. + +Starter-pack exports: + +- `DEV_REGISTRY_HOST`, for example `us-central1-docker.pkg.dev` +- `DEV_REGISTRY`, for example `us-central1-docker.pkg.dev/devspace-gke-example/devspace-dev` +- `DEV_REGISTRY_IMAGE_PREFIX`, normally equal to `DEV_REGISTRY` + +Tag app images as: + +```bash +${DEV_REGISTRY_IMAGE_PREFIX}/my-app:${TAG} +``` + +Do not add Kubernetes `imagePullSecrets` for this registry. GKE nodes pull with their Google service +account, which Terraform grants `roles/artifactregistry.reader` on the repository. Developer and CI +push identities must be listed in `dev_registry_writer_members`. + +Authenticate Docker pushes from a developer machine: + +```bash +gcloud auth configure-docker us-central1-docker.pkg.dev +``` + +Run the opt-in registry smoke after authenticating: + +```bash +GKE_REGISTRY_SMOKE=1 CLUSTER_PROVIDER=gke go test -count=1 -v -timeout 10m ./tests/install +``` + +## Smoke Validation + +Run the GKE smoke: + +```bash +make smoke-gke +``` + +Useful overrides: + +```bash +GKE_TF_VAR_FILE=terraform.tfvars make smoke-gke +E2E_KEEP_CLUSTER=1 GKE_TF_VAR_FILE=terraform.tfvars make smoke-gke +E2E_DEVSPACE_ARGS="--profile gke-test" make smoke-gke +E2E_DEVSPACE_ARGS="--profile gke-test,with-o11y,gke-o11y,o11y-grafana" make smoke-gke +``` + +The smoke harness exports Terraform outputs into DevSpace as `GKE_PROJECT_ID`, +`GKE_DNS_NAMESERVERS`, `DEV_REGISTRY_HOST`, `DEV_REGISTRY`, `DEV_REGISTRY_IMAGE_PREFIX`, and related +variables. On macOS, the DNS hook installs a supplemental resolver for `gcp.kube` that points at the +Cloud DNS authoritative nameservers. + +## Cleanup + +Purge Kubernetes resources with DevSpace: + +```bash +devspace purge +devspace run reset-cluster-dns +``` + +Destroy managed GKE infrastructure with Terraform when the ephemeral project is no longer needed: + +```bash +terraform -chdir=infra/gcp-ephemeral destroy +``` diff --git a/docs/observability.md b/docs/observability.md new file mode 100644 index 0000000..4685b14 --- /dev/null +++ b/docs/observability.md @@ -0,0 +1,80 @@ +# Observability + +The observability stack is split into a shared base and provider overlays. `with-o11y` contains the +common Prometheus, Jaeger, and OpenTelemetry Collector pieces. `local-o11y` and `gke-o11y` activate +from the kube context for provider-specific routes and metrics settings. + +Local defaults include Prometheus metrics and lightweight tracing. Grafana, Loki, Tempo, and Alloy +are optional locally. GKE auto-activates the GKE overlay and Grafana because shared clusters have +more room for the full human-facing observability surface. + +## Profiles + +```bash +# Shared Jaeger, Prometheus, and OpenTelemetry Collector. +# The local or GKE overlay activates from the kube context. +devspace deploy --profile with-o11y + +# Add Grafana locally. GKE activates this profile automatically. +devspace deploy --profile o11y-grafana + +# Add Loki, Alloy, and Tempo +devspace deploy --profile o11y-grafana,o11y-addons + +# GKE explicit equivalent, normally selected automatically by context +devspace deploy --profile with-o11y,gke-o11y,o11y-grafana +``` + +## Routes + +- Local Jaeger: `https://jaeger.int.kube` +- Local Grafana: `https://grafana.int.kube` +- GKE Jaeger: `https://jaeger.gcp.kube` +- GKE Grafana: `https://grafana.gcp.kube` + +Grafana local credentials are `admin` / `admin`, as configured in `helm-values/grafana.yaml`. + +On GKE, Jaeger and Grafana are protected by `GKE_PROTECTION=iap` by default. `httpbin.gcp.kube` +remains raw and non-IAP for authz/plugin testing. + +## Service Export Configuration + +Service repositories can export traces and metrics to the in-cluster collector: + +```bash +OTEL_SERVICE_NAME= +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector.observability.svc.cluster.local:4317 +OTEL_EXPORTER_OTLP_PROTOCOL=grpc +``` + +For OTLP/HTTP exporters: + +```bash +OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector.observability.svc.cluster.local:4318 +OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf +``` + +For host-side trace smoke tests, forward OTLP/gRPC and OTLP/HTTP: + +```bash +devspace run port-forward-otel +``` + +## Metrics And Dashboards + +Grafana reads metrics from Prometheus, including OTLP metrics remote-written by the collector. The +collector preserves resource attributes as metric labels while using a single remote-write sender +path for the in-cluster Prometheus receiver. + +Istio gateway and control-plane metrics are scraped directly by Prometheus so upstream Istio RED +dashboards keep their expected metric and label shapes. Istio proxy tracing is sent to the same +OpenTelemetry Collector and Jaeger path with local-only 100% sampling. + +Grafana discovers additional dashboards and datasources from Kubernetes objects: + +- dashboards: ConfigMaps or Secrets with label `grafana_dashboard: "1"` +- datasources: ConfigMaps or Secrets with label `grafana_datasource: "1"` +- dashboard folders: optional annotation `grafana_folder` + +The `o11y-grafana` profile installs starter dashboards in the `Kubernetes`, `Candidates`, and +`Istio` folders. diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md new file mode 100644 index 0000000..44842e1 --- /dev/null +++ b/docs/troubleshooting.md @@ -0,0 +1,75 @@ +# Troubleshooting + +## DNS + +Reset and reapply host DNS integration: + +```bash +devspace run reset-cluster-dns +devspace run update-cluster-dns +``` + +Check local DNS components: + +```bash +kubectl get pods -n external-dns +kubectl get pods -n coredns +``` + +On macOS, prefer `dns-sd` or normal application resolution over `dig`; `dig` bypasses parts of the +system resolver path that DevSpace configures. + +For GKE, confirm the selected Cloud DNS nameservers are persisted: + +```bash +devspace list vars | grep GKE_DNS_NAMESERVERS +``` + +## Certificates + +Check certificate status: + +```bash +kubectl get certificates --all-namespaces +kubectl describe certificate cluster-root-ca -n cert-manager +``` + +Re-import the root CA: + +```bash +devspace run import-root-ca +``` + +## Network Connectivity + +On macOS local clusters, check Docker network bridging: + +```bash +brew services list | grep docker-mac-net-connect +sudo brew services restart chipmk/tap/docker-mac-net-connect +``` + +Check MetalLB status: + +```bash +kubectl get pods -n metallb-system +kubectl get ipaddresspools -n metallb-system +``` + +## GKE Gateway + +Inspect Gateway-generated forwarding rules, backend services, and URL maps: + +```bash +devspace run gke-gateway-resources +``` + +For raw authz/plugin requests, use `https://httpbin.gcp.kube`. HTTP should redirect to HTTPS. + +For protected observability routes, unauthenticated browser or curl access should be intercepted by +IAP when `GKE_PROTECTION=iap`. + +If IAP says the signed-in Google user does not have access, check that the user has +`roles/iap.httpsResourceAccessor`. Managed GKE setup defaults this to the active `gcloud` account +when no explicit accessor list is provided, but existing clusters may need a fresh +`devspace --var CLUSTER_PROVIDER=gke run ensure-cluster` after changing accessors. diff --git a/gcp-templates/gke-authz-extension.yaml b/gcp-templates/gke-authz-extension.yaml new file mode 100644 index 0000000..6de8e18 --- /dev/null +++ b/gcp-templates/gke-authz-extension.yaml @@ -0,0 +1,20 @@ +# Optional template for plugin repos. Starter-kit does not import this by default. +# Replace PROJECT_ID, REGION, AUTHZ_EXTENSION_NAME, AUTHZ_BACKEND_SERVICE_NAME, +# and AUTHORITY_HOST before running: +# gcloud beta service-extensions authz-extensions import AUTHZ_EXTENSION_NAME \ +# --source=gke-authz-extension.yaml \ +# --location=REGION \ +# --project=PROJECT_ID +name: AUTHZ_EXTENSION_NAME +authority: AUTHORITY_HOST +loadBalancingScheme: EXTERNAL_MANAGED +service: https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/backendServices/AUTHZ_BACKEND_SERVICE_NAME +forwardHeaders: + - Authorization + - Cookie + - X-Request-Id + - Traceparent + - Tracestate +failOpen: false +timeout: "1s" +wireFormat: EXT_AUTHZ_GRPC diff --git a/gcp-templates/gke-authz-policy.yaml b/gcp-templates/gke-authz-policy.yaml new file mode 100644 index 0000000..8a6728a --- /dev/null +++ b/gcp-templates/gke-authz-policy.yaml @@ -0,0 +1,22 @@ +# Optional template for plugin repos. Starter-kit does not import this by default. +# Replace PROJECT_ID, REGION, AUTHZ_POLICY_NAME, FORWARDING_RULE_NAME, +# AUTHZ_EXTENSION_NAME, and HOSTNAME before running: +# gcloud beta network-security authz-policies import AUTHZ_POLICY_NAME \ +# --source=gke-authz-policy.yaml \ +# --location=REGION \ +# --project=PROJECT_ID +name: AUTHZ_POLICY_NAME +target: + resources: + - https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/forwardingRules/FORWARDING_RULE_NAME +policyProfile: REQUEST_AUTHZ +httpRules: + - to: + operations: + - hosts: + - exact: HOSTNAME +action: CUSTOM +customProvider: + authzExtension: + resources: + - projects/PROJECT_ID/locations/REGION/authzExtensions/AUTHZ_EXTENSION_NAME diff --git a/infra/gcp-ephemeral/.gitignore b/infra/gcp-ephemeral/.gitignore new file mode 100644 index 0000000..629ed22 --- /dev/null +++ b/infra/gcp-ephemeral/.gitignore @@ -0,0 +1,4 @@ +.terraform/ +*.tfstate +*.tfstate.* +terraform.tfvars diff --git a/infra/gcp-ephemeral/.terraform-version b/infra/gcp-ephemeral/.terraform-version new file mode 100644 index 0000000..d324349 --- /dev/null +++ b/infra/gcp-ephemeral/.terraform-version @@ -0,0 +1 @@ +1.15.5 diff --git a/infra/gcp-ephemeral/main.tf b/infra/gcp-ephemeral/main.tf new file mode 100644 index 0000000..b9ed4b5 --- /dev/null +++ b/infra/gcp-ephemeral/main.tf @@ -0,0 +1,240 @@ +resource "random_id" "project_suffix" { + byte_length = 3 +} + +check "project_parent" { + assert { + condition = var.org_id != "" || var.folder_id != "" + error_message = "Set org_id or folder_id." + } +} + +resource "google_project" "ephemeral" { + name = var.project_name + project_id = local.project_id + billing_account = var.billing_account_id + org_id = local.parent_is_folder ? null : var.org_id + folder_id = local.parent_is_folder ? var.folder_id : null + auto_create_network = false + deletion_policy = "DELETE" + labels = var.labels +} + +locals { + project_services = toset([ + "cloudbilling.googleapis.com", + "cloudresourcemanager.googleapis.com", + "compute.googleapis.com", + "container.googleapis.com", + "dns.googleapis.com", + "artifactregistry.googleapis.com", + "iam.googleapis.com", + "iamcredentials.googleapis.com", + "iap.googleapis.com", + "networksecurity.googleapis.com", + "networkservices.googleapis.com", + "serviceusage.googleapis.com", + ]) +} + +resource "google_project_service" "enabled" { + for_each = local.project_services + + project = google_project.ephemeral.project_id + service = each.value + disable_on_destroy = false +} + +resource "google_service_account" "terraform" { + project = google_project.ephemeral.project_id + account_id = "terraform-runner" + display_name = "Terraform runner for ephemeral DevSpace GKE" + + depends_on = [google_project_service.enabled] +} + +resource "google_project_iam_member" "terraform_project_admin" { + for_each = local.terraform_project_admin_roles + + project = google_project.ephemeral.project_id + role = each.value + member = google_service_account.terraform.member +} + +resource "google_service_account" "config_connector" { + project = google_project.ephemeral.project_id + account_id = "config-connector" + display_name = "Config Connector controller for ephemeral DevSpace GKE" + + depends_on = [google_project_service.enabled] +} + +resource "google_project_iam_member" "config_connector_project_admin" { + for_each = toset(var.config_connector_iam_roles) + + project = google_project.ephemeral.project_id + role = each.value + member = google_service_account.config_connector.member +} + +resource "google_artifact_registry_repository" "dev" { + project = google_project.ephemeral.project_id + location = var.region + repository_id = var.dev_registry_repository_id + description = "Ephemeral DevSpace Starter Pack developer/test Docker images." + format = "DOCKER" + labels = var.labels + + depends_on = [google_project_service.enabled] +} + +resource "google_artifact_registry_repository_iam_member" "dev_registry_writer" { + for_each = toset(var.dev_registry_writer_members) + + project = google_project.ephemeral.project_id + location = google_artifact_registry_repository.dev.location + repository = google_artifact_registry_repository.dev.name + role = "roles/artifactregistry.writer" + member = each.value +} + +resource "google_artifact_registry_repository_iam_member" "dev_registry_node_reader" { + project = google_project.ephemeral.project_id + location = google_artifact_registry_repository.dev.location + repository = google_artifact_registry_repository.dev.name + role = "roles/artifactregistry.reader" + member = "serviceAccount:${local.gke_node_service_account_email}" +} + +resource "google_iap_web_iam_member" "iap_accessor" { + for_each = toset(var.iap_accessor_members) + + project = google_project.ephemeral.project_id + role = "roles/iap.httpsResourceAccessor" + member = each.value + + depends_on = [google_project_service.enabled] +} + +resource "google_compute_network" "main" { + project = google_project.ephemeral.project_id + name = "devspace-gke" + auto_create_subnetworks = false + + depends_on = [google_project_service.enabled] +} + +resource "google_compute_subnetwork" "main" { + project = google_project.ephemeral.project_id + name = "devspace-gke" + region = var.region + network = google_compute_network.main.id + ip_cidr_range = "10.40.0.0/20" + + secondary_ip_range { + range_name = "pods" + ip_cidr_range = "10.44.0.0/14" + } + + secondary_ip_range { + range_name = "services" + ip_cidr_range = "10.48.0.0/20" + } +} + +resource "google_compute_subnetwork" "proxy_only" { + project = google_project.ephemeral.project_id + name = "devspace-gke-proxy-only" + region = var.region + network = google_compute_network.main.id + ip_cidr_range = "10.49.0.0/23" + purpose = "REGIONAL_MANAGED_PROXY" + role = "ACTIVE" +} + +resource "google_container_cluster" "main" { + project = google_project.ephemeral.project_id + name = var.cluster_name + location = var.region + enable_autopilot = true + deletion_protection = false + network = google_compute_network.main.id + subnetwork = google_compute_subnetwork.main.id + networking_mode = "VPC_NATIVE" + + ip_allocation_policy { + cluster_secondary_range_name = "pods" + services_secondary_range_name = "services" + } + + workload_identity_config { + workload_pool = "${google_project.ephemeral.project_id}.svc.id.goog" + } + + gateway_api_config { + channel = "CHANNEL_STANDARD" + } + + resource_labels = var.labels + + depends_on = [google_project_service.enabled] +} + +resource "google_dns_managed_zone" "gcp_kube" { + project = google_project.ephemeral.project_id + name = var.dns_zone_name + dns_name = var.dns_domain + description = "Ephemeral DevSpace Starter Pack split-DNS zone." + labels = var.labels + + depends_on = [google_project_service.enabled] +} + +resource "google_service_account" "external_dns" { + project = google_project.ephemeral.project_id + account_id = "external-dns" + display_name = "external-dns for ephemeral DevSpace GKE" + + depends_on = [google_project_service.enabled] +} + +resource "google_project_iam_custom_role" "external_dns_zone_writer" { + project = google_project.ephemeral.project_id + role_id = "externalDnsZoneWriter" + title = "External DNS Zone Writer" + description = "Minimal Cloud DNS permissions for external-dns in the ephemeral project." + permissions = [ + "dns.changes.create", + "dns.changes.get", + "dns.changes.list", + "dns.managedZones.get", + "dns.managedZones.list", + "dns.projects.get", + "dns.resourceRecordSets.create", + "dns.resourceRecordSets.delete", + "dns.resourceRecordSets.list", + "dns.resourceRecordSets.update", + ] +} + +resource "google_project_iam_member" "external_dns_zone_writer" { + project = google_project.ephemeral.project_id + role = google_project_iam_custom_role.external_dns_zone_writer.name + member = google_service_account.external_dns.member +} + +resource "google_service_account_iam_member" "external_dns_workload_identity" { + service_account_id = google_service_account.external_dns.name + role = "roles/iam.workloadIdentityUser" + member = "serviceAccount:${google_project.ephemeral.project_id}.svc.id.goog[external-dns/external-dns]" + + depends_on = [google_container_cluster.main] +} + +resource "google_service_account_iam_member" "config_connector_workload_identity" { + service_account_id = google_service_account.config_connector.name + role = "roles/iam.workloadIdentityUser" + member = "serviceAccount:${google_project.ephemeral.project_id}.svc.id.goog[cnrm-system/cnrm-controller-manager]" + + depends_on = [google_container_cluster.main] +} diff --git a/infra/gcp-ephemeral/outputs.tf b/infra/gcp-ephemeral/outputs.tf new file mode 100644 index 0000000..b9871fd --- /dev/null +++ b/infra/gcp-ephemeral/outputs.tf @@ -0,0 +1,75 @@ +output "project_id" { + description = "Ephemeral project ID." + value = google_project.ephemeral.project_id +} + +output "region" { + description = "GKE cluster region." + value = var.region +} + +output "cluster_name" { + description = "GKE cluster name." + value = google_container_cluster.main.name +} + +output "dns_domain" { + description = "Cloud DNS domain." + value = google_dns_managed_zone.gcp_kube.dns_name +} + +output "dns_zone_name" { + description = "Cloud DNS managed zone name." + value = google_dns_managed_zone.gcp_kube.name +} + +output "dns_name_servers" { + description = "Authoritative Cloud DNS nameservers for split DNS." + value = google_dns_managed_zone.gcp_kube.name_servers +} + +output "external_dns_service_account_email" { + description = "Google service account used by external-dns through Workload Identity." + value = google_service_account.external_dns.email +} + +output "dev_registry_host" { + description = "Artifact Registry Docker hostname for developer/test images." + value = local.dev_registry_host +} + +output "dev_registry" { + description = "Artifact Registry Docker repository path for developer/test images." + value = local.dev_registry +} + +output "dev_registry_image_prefix" { + description = "Image prefix app repos should use for GKE developer/test images." + value = local.dev_registry +} + +output "gke_node_service_account_email" { + description = "Google service account used by GKE nodes for image pulls." + value = local.gke_node_service_account_email +} + +output "terraform_service_account_email" { + description = "Google service account created for follow-up Terraform impersonation." + value = google_service_account.terraform.email +} + +output "config_connector_service_account_email" { + description = "Google service account used by Config Connector through Workload Identity." + value = google_service_account.config_connector.email +} + +output "get_credentials_args" { + description = "Arguments for gcloud container clusters get-credentials." + value = [ + google_container_cluster.main.name, + "--region", + var.region, + "--project", + google_project.ephemeral.project_id, + ] +} diff --git a/infra/gcp-ephemeral/terraform.tfvars.example b/infra/gcp-ephemeral/terraform.tfvars.example new file mode 100644 index 0000000..94dbc62 --- /dev/null +++ b/infra/gcp-ephemeral/terraform.tfvars.example @@ -0,0 +1,32 @@ +billing_account_id = "BILLING_ACCOUNT_ID" +org_id = "ORGANIZATION_ID" +# folder_id = "FOLDER_ID" # Takes precedence over org_id when set. + +# Optional overrides: +# project_id = "devspace-gke-example" +# project_name = "devspace-starter-pack-gke" +# region = "us-central1" +# dns_domain = "gcp.kube." +# dev_registry_repository_id = "devspace-dev" +# dev_registry_writer_members = [ +# "user:developer@example.com", +# "group:platform@example.com", +# "serviceAccount:ci@example-project.iam.gserviceaccount.com", +# ] +# iap_accessor_members = [ +# "user:developer@example.com", +# "group:platform@example.com", +# ] + +# Optional: override the broad project-scoped roles granted to the cluster-mode +# Config Connector controller. The Terraform default is intended for ephemeral +# development projects, not multi-tenant production clusters. +# config_connector_iam_roles = [ +# "roles/artifactregistry.admin", +# "roles/compute.networkAdmin", +# "roles/dns.admin", +# "roles/iam.serviceAccountAdmin", +# "roles/iam.serviceAccountUser", +# "roles/resourcemanager.projectIamAdmin", +# "roles/serviceusage.serviceUsageAdmin", +# ] diff --git a/infra/gcp-ephemeral/variables.tf b/infra/gcp-ephemeral/variables.tf new file mode 100644 index 0000000..8029671 --- /dev/null +++ b/infra/gcp-ephemeral/variables.tf @@ -0,0 +1,119 @@ +variable "billing_account_id" { + description = "Billing account ID to link to the ephemeral project." + type = string +} + +variable "org_id" { + description = "Organization ID for the ephemeral project. Ignored when folder_id is set." + type = string + default = "" +} + +variable "folder_id" { + description = "Folder ID for the ephemeral project. Takes precedence over org_id when set." + type = string + default = "" +} + +variable "project_id" { + description = "Optional explicit project ID. Defaults to devspace-gke-." + type = string + default = "" +} + +variable "project_name" { + description = "Display name for the ephemeral project." + type = string + default = "devspace-starter-pack-gke" +} + +variable "region" { + description = "GCP region for regional resources." + type = string + default = "us-central1" +} + +variable "cluster_name" { + description = "GKE cluster name." + type = string + default = "devspace-starter-pack" +} + +variable "dns_domain" { + description = "Cloud DNS zone DNS name. Keep the trailing dot." + type = string + default = "gcp.kube." +} + +variable "dns_zone_name" { + description = "Cloud DNS managed zone name." + type = string + default = "gcp-kube" +} + +variable "dev_registry_repository_id" { + description = "Artifact Registry Docker repository ID for local developer/test images." + type = string + default = "devspace-dev" +} + +variable "dev_registry_writer_members" { + description = "IAM members granted roles/artifactregistry.writer on the dev image repository." + type = list(string) + default = [] +} + +variable "iap_accessor_members" { + description = "IAM members granted roles/iap.httpsResourceAccessor for IAP-protected web backends in the ephemeral project." + type = list(string) + default = [] +} + +variable "config_connector_iam_roles" { + description = "Project IAM roles granted to the cluster-mode Config Connector controller in the ephemeral project." + type = list(string) + default = [ + "roles/artifactregistry.admin", + "roles/compute.networkAdmin", + "roles/compute.loadBalancerAdmin", + "roles/container.admin", + "roles/dns.admin", + "roles/iam.serviceAccountAdmin", + "roles/iam.serviceAccountUser", + "roles/iap.admin", + "roles/networkservices.serviceExtensionsAdmin", + "roles/resourcemanager.projectIamAdmin", + "roles/serviceusage.serviceUsageAdmin", + ] +} + +variable "labels" { + description = "Labels applied to supported GCP resources." + type = map(string) + default = { + app = "devspace-starter-pack" + lifecycle = "ephemeral" + } +} + +locals { + generated_project_id = "devspace-gke-${random_id.project_suffix.hex}" + project_id = var.project_id != "" ? var.project_id : local.generated_project_id + parent_is_folder = var.folder_id != "" + dev_registry_host = "${var.region}-docker.pkg.dev" + dev_registry = "${local.dev_registry_host}/${google_project.ephemeral.project_id}/${var.dev_registry_repository_id}" + gke_node_service_account_email = "${google_project.ephemeral.number}-compute@developer.gserviceaccount.com" + terraform_project_admin_roles = toset([ + "roles/artifactregistry.admin", + "roles/compute.networkAdmin", + "roles/compute.loadBalancerAdmin", + "roles/container.admin", + "roles/dns.admin", + "roles/iam.serviceAccountAdmin", + "roles/iam.serviceAccountUser", + "roles/iap.admin", + "roles/networkservices.serviceExtensionsAdmin", + "roles/resourcemanager.projectIamAdmin", + "roles/serviceusage.serviceUsageAdmin", + ]) +} diff --git a/infra/gcp-ephemeral/versions.tf b/infra/gcp-ephemeral/versions.tf new file mode 100644 index 0000000..58d9412 --- /dev/null +++ b/infra/gcp-ephemeral/versions.tf @@ -0,0 +1,18 @@ +terraform { + required_version = ">= 1.6.0" + + required_providers { + google = { + source = "hashicorp/google" + version = ">= 6.0.0" + } + random = { + source = "hashicorp/random" + version = ">= 3.6.0" + } + } +} + +provider "google" { + region = var.region +} diff --git a/manifests/grafana-route-gke.yaml b/manifests/grafana-route-gke.yaml new file mode 100644 index 0000000..e32218b --- /dev/null +++ b/manifests/grafana-route-gke.yaml @@ -0,0 +1,38 @@ +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: grafana +spec: + parentRefs: + - name: gateway + namespace: gke-gateway + sectionName: https + hostnames: + - "grafana.gcp.kube" + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: grafana + port: 3000 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: grafana-http-redirect +spec: + parentRefs: + - name: gateway + namespace: gke-gateway + sectionName: http + hostnames: + - "grafana.gcp.kube" + rules: + - filters: + - type: RequestRedirect + requestRedirect: + scheme: https + statusCode: 301 diff --git a/manifests/jaeger-route-gke.yaml b/manifests/jaeger-route-gke.yaml new file mode 100644 index 0000000..4a2212b --- /dev/null +++ b/manifests/jaeger-route-gke.yaml @@ -0,0 +1,38 @@ +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: jaeger +spec: + parentRefs: + - name: gateway + namespace: gke-gateway + sectionName: https + hostnames: + - "jaeger.gcp.kube" + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: jaeger + port: 16686 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: jaeger-http-redirect +spec: + parentRefs: + - name: gateway + namespace: gke-gateway + sectionName: http + hostnames: + - "jaeger.gcp.kube" + rules: + - filters: + - type: RequestRedirect + requestRedirect: + scheme: https + statusCode: 301 diff --git a/scripts/check-tools.sh b/scripts/check-tools.sh new file mode 100755 index 0000000..9397b61 --- /dev/null +++ b/scripts/check-tools.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash +set -euo pipefail + +LOCAL_CONTEXT_REGEX='^(kind(-.*)?|docker-desktop|minikube|rancher-desktop|microk8s)$' + +require_tool() { + if ! command -v "$1" >/dev/null 2>&1; then + echo >&2 "E: Required command not found: $1" + exit 1 + fi +} + +require_gke_gcloud_auth_plugin() { + if command -v gke-gcloud-auth-plugin >/dev/null 2>&1; then + return + fi + + local sdk_root + sdk_root="$(gcloud info --format='value(installation.sdk_root)' 2>/dev/null || true)" + if [[ -n "${sdk_root}" && -x "${sdk_root}/bin/gke-gcloud-auth-plugin" ]]; then + return + fi + + echo >&2 "E: Required command not found: gke-gcloud-auth-plugin" + echo >&2 "I: Install the gke-gcloud-auth-plugin component or add the Google Cloud SDK bin directory to PATH." + exit 1 +} + +current_context() { + kubectl config current-context 2>/dev/null || true +} + +provider_from_context() { + local context="$1" + if [[ "${context}" == gke_* ]]; then + printf 'gke' + return + fi + if [[ "${context}" =~ ${LOCAL_CONTEXT_REGEX} ]]; then + printf 'local' + return + fi + printf '' +} + +require_supported_context() { + local context provider + context="$(current_context)" + if [[ -z "${context}" ]]; then + echo >&2 "E: No current Kubernetes context is selected." + exit 1 + fi + + provider="$(provider_from_context "${context}")" + if [[ -z "${provider}" ]]; then + echo >&2 "E: Unsupported Kubernetes context ${context}; expected a GKE context or a local context." + exit 1 + fi + + printf '%s\n' "${provider}" +} + +require_host_integration_tools() { + case "$(uname -s)" in + Darwin) + require_tool brew + require_tool scutil + require_tool security + require_tool openssl + require_tool base64 + ;; + Linux) + require_tool resolvectl + require_tool ip + require_tool openssl + require_tool base64 + ;; + *) + echo >&2 "E: HOST_INTEGRATION=true is only supported on macOS and Linux." + exit 1 + ;; + esac +} + +require_tool kubectl +require_tool helm +require_tool yq + +provider="$(require_supported_context)" +host_integration="${HOST_INTEGRATION:-true}" +dns_mode="${DNS_MODE:-local}" +docker_cidr_prefix="${DOCKER_CIDR_PREFIX:-}" + +case "${provider}" in + local) + if [[ -z "${docker_cidr_prefix}" ]]; then + require_tool docker + fi + if [[ "${host_integration}" == "true" ]]; then + require_host_integration_tools + fi + ;; + gke) + require_tool gcloud + require_tool terraform + require_gke_gcloud_auth_plugin + if [[ "${host_integration}" == "true" && "${dns_mode}" == "cloud-dns" ]]; then + require_tool dig + fi + ;; +esac + +echo >&2 "I: Tool preflight passed for ${provider} context." diff --git a/scripts/cluster-dns.sh b/scripts/cluster-dns.sh new file mode 100755 index 0000000..46bf085 --- /dev/null +++ b/scripts/cluster-dns.sh @@ -0,0 +1,189 @@ +#!/usr/bin/env bash +set -euo pipefail + +command_name="${1:-}" +if [[ -z "${command_name}" ]]; then + echo >&2 "E: Missing DNS command" + exit 1 +fi +shift + +DNS_MODE_VALUE="${1:-local}" +DNS_SERVICE_ID_VALUE="${2:-kube}" +GKE_DNS_DOMAIN_VALUE="${3:-gcp.kube}" +GKE_DNS_NAMESERVERS_VALUE="${4:-}" +DOCKER_CIDR_PREFIX_VALUE="${5:-172.18.255}" + +require_tool() { + if ! command -v "$1" >/dev/null 2>&1; then + echo >&2 "E: Required command not found: $1" + exit 1 + fi +} + +cloud_dns_domain() { + printf '%s\n' "${GKE_DNS_DOMAIN_VALUE%.}" +} + +cloud_dns_nameserver_ips() { + local resolver="$1" + local ns + if [[ -z "${GKE_DNS_NAMESERVERS_VALUE}" ]]; then + echo >&2 "E: GKE_DNS_NAMESERVERS is required when DNS_MODE=cloud-dns" + exit 1 + fi + + printf '%s' "${GKE_DNS_NAMESERVERS_VALUE}" | tr ',' '\n' | while IFS= read -r ns; do + ns="${ns#"${ns%%[![:space:]]*}"}" + ns="${ns%"${ns##*[![:space:]]}"}" + [[ -n "${ns}" ]] || continue + case "${resolver}" in + darwin) + dig +short A "${ns%.}." + ;; + linux) + getent ahostsv4 "${ns%.}." | awk '{print $1}' + ;; + *) + echo >&2 "E: Unknown DNS resolver ${resolver}" + exit 1 + ;; + esac + done | awk 'NF && !seen[$0]++' +} + +read_cloud_dns_ips() { + local resolver="$1" + DNS_IPS=() + local ip + while IFS= read -r ip; do + [[ -n "${ip}" ]] || continue + DNS_IPS+=("${ip}") + done < <(cloud_dns_nameserver_ips "${resolver}") + + if [[ "${#DNS_IPS[@]}" -eq 0 ]]; then + echo >&2 "E: Could not resolve Cloud DNS nameserver addresses from ${GKE_DNS_NAMESERVERS_VALUE}" + exit 1 + fi +} + +linux_route_link_for_ip() { + local ip="$1" + ip route get "${ip}" | awk '{for (i = 1; i <= NF; i++) if ($i == "dev") {print $(i + 1); exit}}' +} + +linux_link_for_domain() { + local domain="$1" + resolvectl status | awk -v domain="~${domain}" ' + /^Link / {link=$2} + index($0, domain) {print link; exit} + ' +} + +update_darwin() { + require_tool sudo + require_tool scutil + echo >&2 "I: Updating DNS settings..." + + if [[ "${DNS_MODE_VALUE}" == "cloud-dns" ]]; then + require_tool dig + read_cloud_dns_ips darwin + sudo scutil <&2 "E: Could not determine Linux route interface for Cloud DNS" + exit 1 + fi + sudo resolvectl dns "${link}" "${DNS_IPS[@]}" + sudo resolvectl domain "${link}" "~$(cloud_dns_domain)" + sudo resolvectl flush-caches + return + fi + + local dns_ip="${DOCKER_CIDR_PREFIX_VALUE}.254" + echo >&2 "I: Updating systemd-resolved DNS settings..." + link="$(linux_route_link_for_ip "${dns_ip}")" + if [[ -z "${link}" ]]; then + echo >&2 "E: Could not determine Linux route interface for ${dns_ip}" + exit 1 + fi + sudo resolvectl dns "${link}" "${dns_ip}" + sudo resolvectl domain "${link}" "~kube" + sudo resolvectl flush-caches +} + +reset_darwin() { + require_tool sudo + require_tool scutil + echo >&2 "I: Resetting DNS..." + sudo scutil <&2 "I: No systemd-resolved link found for ~$(cloud_dns_domain)" + return + fi + sudo resolvectl revert "${link}" + sudo resolvectl flush-caches + return + fi + + require_tool ip + local dns_ip="${DOCKER_CIDR_PREFIX_VALUE}.254" + echo >&2 "I: Resetting systemd-resolved DNS settings..." + link="$(linux_route_link_for_ip "${dns_ip}")" + if [[ -z "${link}" ]]; then + echo >&2 "E: Could not determine Linux route interface for ${dns_ip}" + exit 1 + fi + sudo resolvectl revert "${link}" + sudo resolvectl flush-caches +} + +function_name="${command_name//-/_}" +if [[ ! "${function_name}" =~ ^[A-Za-z_][A-Za-z0-9_]*$ ]] || ! declare -F "${function_name}" >/dev/null; then + echo >&2 "E: Unknown DNS command ${command_name}" + exit 1 +fi + +"${function_name}" diff --git a/scripts/config-connector.sh b/scripts/config-connector.sh new file mode 100755 index 0000000..ffc9381 --- /dev/null +++ b/scripts/config-connector.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +set -euo pipefail + +command_name="${1:-}" +if [[ -z "${command_name}" ]]; then + echo >&2 "E: Missing command name" + exit 1 +fi +shift + +require_tool() { + if ! command -v "$1" >/dev/null 2>&1; then + echo >&2 "E: Required command not found: $1" + exit 1 + fi +} + +contract_value() { + local json="$1" + local key="$2" + printf '%s' "${json}" | yq -r ".data[\"${key}\"] // \"\"" +} + +install() { + require_tool kubectl + require_tool yq + + local json enabled mode service_account + if ! json="$(kubectl -n devspace-system get configmap devspace-starter-pack-env -o json 2>/dev/null)"; then + echo >&2 "I: Skipping Config Connector because the starter-pack cluster contract is not published." + return + fi + + enabled="$(contract_value "${json}" CONFIG_CONNECTOR_ENABLED)" + if [[ "${enabled}" != "true" ]]; then + echo >&2 "I: Skipping Config Connector because CONFIG_CONNECTOR_ENABLED=${enabled:-}." + return + fi + + mode="$(contract_value "${json}" CONFIG_CONNECTOR_MODE)" + service_account="$(contract_value "${json}" CONFIG_CONNECTOR_SERVICE_ACCOUNT)" + if [[ "${mode}" != "cluster" ]]; then + echo >&2 "E: Unsupported CONFIG_CONNECTOR_MODE=${mode}; expected cluster." + exit 1 + fi + if [[ -z "${service_account}" ]]; then + echo >&2 "E: CONFIG_CONNECTOR_SERVICE_ACCOUNT is required when CONFIG_CONNECTOR_ENABLED=true." + exit 1 + fi + + install_operator + apply_config_connector "${service_account}" + wait_ready +} + +install_operator() { + require_tool gcloud + require_tool kubectl + require_tool tar + + local tmpdir + tmpdir="$(mktemp -d)" + trap "rm -rf '${tmpdir}'" EXIT + + echo >&2 "I: Downloading Config Connector operator bundle..." + gcloud storage cp gs://configconnector-operator/latest/release-bundle.tar.gz "${tmpdir}/release-bundle.tar.gz" + tar -xzf "${tmpdir}/release-bundle.tar.gz" -C "${tmpdir}" + + echo >&2 "I: Applying Config Connector Autopilot operator..." + kubectl apply -f "${tmpdir}/operator-system/autopilot-configconnector-operator.yaml" + wait_for_pods configconnector-operator-system 180s +} + +apply_config_connector() { + local service_account="$1" + + echo >&2 "I: Applying Config Connector cluster-mode configuration..." + kubectl apply -f - <&2 "I: Waiting for pods in namespace ${namespace}..." + local i + for i in {1..60}; do + if kubectl -n "${namespace}" get pods -o name 2>/dev/null | grep -q '^pod/'; then + kubectl wait -n "${namespace}" --for=condition=Ready pod --all --timeout="${timeout}" + return + fi + sleep 5 + done + + echo >&2 "E: No pods appeared in namespace ${namespace}." + exit 1 +} + +case "${command_name}" in + install) + install + ;; + install-operator) + install_operator + ;; + wait-ready) + wait_ready + ;; + *) + echo >&2 "E: Unknown command ${command_name}" + exit 1 + ;; +esac diff --git a/scripts/devspace-cluster-backend.sh b/scripts/devspace-cluster-backend.sh new file mode 100755 index 0000000..1d148bc --- /dev/null +++ b/scripts/devspace-cluster-backend.sh @@ -0,0 +1,968 @@ +#!/usr/bin/env bash +set -euo pipefail + +DEFAULT_GKE_REGION="us-central1" +DEFAULT_GKE_DNS_DOMAIN="gcp.kube" +DEFAULT_GKE_GATEWAY_NAMESPACE="gke-gateway" +DEFAULT_GKE_PROTECTION="iap" +DEFAULT_GKE_GCLOUD_CONFIGURATION="devspace-starter-pack" +DEFAULT_DEV_REGISTRY_REPOSITORY="devspace-dev" +LOCAL_CONTEXT_REGEX='^(kind(-.*)?|docker-desktop|minikube|rancher-desktop|microk8s)$' +GKE_TERRAFORM_DIR="infra/gcp-ephemeral" +CLUSTER_ENV_NAMESPACE="devspace-system" +CLUSTER_ENV_CONFIGMAP="devspace-starter-pack-env" + +load_devspace_vars() { + require_tool devspace + + local key value + while IFS='=' read -r key value; do + [[ "${key}" =~ ^[A-Za-z_][A-Za-z0-9_]*$ ]] || continue + printf -v "${key}" '%s' "${value}" + export "${key}" + done < <(devspace list vars -o keyvalue) +} + +require_tool() { + if ! command -v "$1" >/dev/null 2>&1; then + echo >&2 "E: Required command not found: $1" + exit 1 + fi +} + +require_value() { + local name="$1" + local value="$2" + local hint="$3" + if [[ -z "${value}" ]]; then + echo >&2 "E: ${name} is required. ${hint}" + exit 1 + fi +} + +tfvar_string() { + local key="$1" + local file="$2" + awk -v key="${key}" ' + $1 == key && $2 == "=" { + value = $0 + sub("^[^=]*= *", "", value) + sub(" *#.*$", "", value) + gsub(/^"|"$/, "", value) + print value + exit + } + ' "${file}" +} + +tfvar_string_list_csv() { + local key="$1" + local file="$2" + awk -v key="${key}" ' + $1 == key && $2 == "=" && $3 == "[" {in_list = 1; next} + in_list && $0 ~ /\]/ {exit} + in_list { + value = $0 + sub(" *#.*$", "", value) + gsub(/[",]/, "", value) + gsub(/^ *| *$/, "", value) + if (value != "") { + if (seen) { + printf "," + } + printf "%s", value + seen = 1 + } + } + ' "${file}" +} + +current_context() { + kubectl config current-context 2>/dev/null || true +} + +current_namespace() { + kubectl config view --minify --output 'jsonpath={..namespace}' 2>/dev/null || true +} + +select_context_preserving_namespace() { + local context="$1" + local previous_namespace="${2:-}" + local active_context + + active_context="$(current_context)" + if [[ "${active_context}" != "${context}" ]]; then + kubectl config use-context "${context}" >/dev/null + devspace use context "${context}" + fi + + if [[ -n "${previous_namespace}" ]]; then + kubectl config set-context "${context}" --namespace="${previous_namespace}" >/dev/null + fi +} + +provider_from_context() { + local context="$1" + if [[ "${context}" == gke_* ]]; then + printf 'gke' + return + fi + if [[ "${context}" =~ ${LOCAL_CONTEXT_REGEX} ]]; then + printf 'local' + return + fi + printf '' +} + +context_field() { + local context="$1" + local field="$2" + printf '%s' "${context}" | cut -d_ -f"${field}" +} + +context_project() { + context_field "$1" 2 +} + +context_region() { + context_field "$1" 3 +} + +context_cluster() { + context_field "$1" 4 +} + +require_supported_context() { + local context provider + context="$(current_context)" + if [[ -z "${context}" ]]; then + echo >&2 "E: No current Kubernetes context is selected." + exit 1 + fi + + provider="$(provider_from_context "${context}")" + if [[ -z "${provider}" ]]; then + echo >&2 "E: Unsupported Kubernetes context ${context}; expected a GKE context or a local context." + exit 1 + fi + + printf '%s\n' "${provider}" +} + +validate_provider_override() { + local derived_provider="$1" + local requested_provider="${CLUSTER_PROVIDER:-}" + if [[ -n "${requested_provider}" && "${requested_provider}" != "${derived_provider}" ]]; then + echo >&2 "E: CLUSTER_PROVIDER=${requested_provider} does not match current Kubernetes context provider ${derived_provider}." + exit 1 + fi +} + +terraform_output_json() { + terraform -chdir="${GKE_TERRAFORM_DIR}" output -json 2>/dev/null || true +} + +tf_output_value() { + local tf_output="$1" + local output_name="$2" + printf '%s' "${tf_output}" | yq -r ".${output_name}.value // \"\"" +} + +terraform_outputs_match_context() { + local tf_output="$1" + local context="$2" + [[ -n "${tf_output}" ]] || return 1 + [[ "${context}" == gke_*_*_* ]] || return 1 + + local project region cluster + project="$(tf_output_value "${tf_output}" project_id)" + region="$(tf_output_value "${tf_output}" region)" + cluster="$(tf_output_value "${tf_output}" cluster_name)" + + [[ -n "${project}" && -n "${region}" && -n "${cluster}" ]] || return 1 + [[ "${project}" == "$(context_project "${context}")" && + "${region}" == "$(context_region "${context}")" && + "${cluster}" == "$(context_cluster "${context}")" ]] +} + +default_iap_accessor_member() { + local account + account="$(gcloud auth list --filter=status:ACTIVE --format='value(account)' 2>/dev/null | head -n1 || true)" + if [[ -z "${account}" ]]; then + return 1 + fi + + if [[ "${account}" == *gserviceaccount.com ]]; then + printf 'serviceAccount:%s\n' "${account}" + return + fi + printf 'user:%s\n' "${account}" +} + +gcloud_config_value() { + local value + value="$(gcloud config get-value "$1" 2>/dev/null || true)" + if [[ "${value}" == "(unset)" ]]; then + value="" + fi + printf '%s' "${value}" +} + +gcloud_configuration_exists() { + gcloud config configurations describe "$1" >/dev/null 2>&1 +} + +gcloud_cli_auth_valid() { + local account="$1" + [[ -n "${account}" ]] || return 1 + gcloud auth print-access-token --account "${account}" >/dev/null 2>&1 +} + +gcloud_adc_valid() { + gcloud auth application-default print-access-token >/dev/null 2>&1 +} + +gcloud_login_command() { + local account="$1" + if [[ -n "${account}" ]]; then + printf 'gcloud auth login %s --update-adc' "${account}" + else + printf 'gcloud auth login --update-adc' + fi +} + +ensure_gcloud_auth() { + local project_id="${1:-}" + local config_name="${GKE_GCLOUD_CONFIGURATION:-${DEFAULT_GKE_GCLOUD_CONFIGURATION}}" + local account="${GKE_GCLOUD_ACCOUNT:-}" + + if [[ -z "${account}" ]]; then + account="$(gcloud_config_value account)" + fi + + if ! gcloud_configuration_exists "${config_name}"; then + echo >&2 "I: Creating gcloud configuration ${config_name}..." + gcloud config configurations create "${config_name}" --quiet >/dev/null + fi + gcloud config configurations activate "${config_name}" --quiet >/dev/null + + if [[ -n "${account}" ]]; then + gcloud config set account "${account}" --quiet >/dev/null + fi + + if ! gcloud_cli_auth_valid "${account}" || ! gcloud_adc_valid; then + if [[ ! -t 0 ]]; then + echo >&2 "E: GKE setup requires valid gcloud CLI auth and Application Default Credentials." + echo >&2 "I: Run: $(gcloud_login_command "${account}")" + echo >&2 "I: Then retry: devspace --var CLUSTER_PROVIDER=gke run ensure-cluster" + exit 1 + fi + + echo >&2 "I: Starting Google Cloud login for DevSpace GKE setup..." + echo >&2 "I: This refreshes both gcloud CLI credentials and Application Default Credentials." + if [[ -n "${account}" ]]; then + gcloud auth login "${account}" --update-adc + else + gcloud auth login --update-adc + fi + + account="$(gcloud_config_value account)" + require_value "gcloud account" "${account}" "Google Cloud login did not select an account." + gcloud config set account "${account}" --quiet >/dev/null + fi + + if ! gcloud_cli_auth_valid "${account}"; then + echo >&2 "E: gcloud CLI credentials for ${account} are still invalid." + echo >&2 "I: Run: $(gcloud_login_command "${account}")" + exit 1 + fi + if ! gcloud_adc_valid; then + echo >&2 "E: Google Application Default Credentials are missing or expired." + echo >&2 "I: Run: $(gcloud_login_command "${account}")" + exit 1 + fi + + if [[ -n "${project_id}" ]]; then + gcloud config set project "${project_id}" --quiet >/dev/null + fi +} + +resource_id() { + local value="$1" + printf '%s' "${value##*/}" +} + +print_candidates() { + local title="$1" + shift + + [[ "$#" -gt 0 ]] || return + echo >&2 "I: ${title}:" + local candidate + for candidate in "$@"; do + echo >&2 "I: ${candidate}" + done +} + +discover_billing_account_id() { + local output + if ! output="$(gcloud billing accounts list --filter='open=true' --format='value(name,displayName)' 2>&1)"; then + echo >&2 "E: Could not list visible open billing accounts." + printf '%s\n' "${output}" >&2 + echo >&2 "I: Run: devspace set var GCP_BILLING_ACCOUNT_ID=BILLING_ACCOUNT_ID" + exit 1 + fi + + local -a ids=() + local -a candidates=() + local name display id + while IFS=$'\t' read -r name display; do + [[ -n "${name}" ]] || continue + id="$(resource_id "${name}")" + ids+=("${id}") + if [[ -n "${display}" ]]; then + candidates+=("${id} (${display})") + else + candidates+=("${id}") + fi + done <<< "${output}" + + case "${#ids[@]}" in + 1) + printf '%s' "${ids[0]}" + ;; + 0) + echo >&2 "E: No visible open GCP billing accounts were found for the active gcloud account." + echo >&2 "I: Run: devspace set var GCP_BILLING_ACCOUNT_ID=BILLING_ACCOUNT_ID" + exit 1 + ;; + *) + echo >&2 "E: Multiple visible open GCP billing accounts were found; choose one explicitly." + print_candidates "Visible open billing accounts" "${candidates[@]}" + echo >&2 "I: Run: devspace set var GCP_BILLING_ACCOUNT_ID=BILLING_ACCOUNT_ID" + exit 1 + ;; + esac +} + +discover_org_id() { + local output + if ! output="$(gcloud organizations list --format='value(name,displayName)' 2>&1)"; then + echo >&2 "E: Could not list visible GCP organizations." + printf '%s\n' "${output}" >&2 + echo >&2 "I: Run: devspace set var GCP_ORG_ID=ORGANIZATION_ID" + echo >&2 "I: Or run: devspace set var GCP_FOLDER_ID=FOLDER_ID" + exit 1 + fi + + local -a ids=() + local -a candidates=() + local name display id + while IFS=$'\t' read -r name display; do + [[ -n "${name}" ]] || continue + id="$(resource_id "${name}")" + ids+=("${id}") + if [[ -n "${display}" ]]; then + candidates+=("${id} (${display})") + else + candidates+=("${id}") + fi + done <<< "${output}" + + case "${#ids[@]}" in + 1) + printf '%s' "${ids[0]}" + ;; + 0) + echo >&2 "E: No visible GCP organizations were found for the active gcloud account." + echo >&2 "I: Run: devspace set var GCP_ORG_ID=ORGANIZATION_ID" + echo >&2 "I: Or run: devspace set var GCP_FOLDER_ID=FOLDER_ID" + exit 1 + ;; + *) + echo >&2 "E: Multiple visible GCP organizations were found; choose an org or folder explicitly." + print_candidates "Visible organizations" "${candidates[@]}" + echo >&2 "I: Run: devspace set var GCP_ORG_ID=ORGANIZATION_ID" + echo >&2 "I: Or run: devspace set var GCP_FOLDER_ID=FOLDER_ID" + exit 1 + ;; + esac +} + +persist_discovered_var() { + local name="$1" + local value="$2" + [[ -n "${value}" ]] || return + devspace set var "${name}=${value}" >/dev/null + echo >&2 "I: Persisted ${name}=${value}." +} + +write_managed_tfvars() { + local tfvars="$1" + local billing_account_id="$2" + local org_id="$3" + local folder_id="$4" + local region_value="$5" + local project_id="$6" + local dns_domain_value="$7" + local writer_members="$8" + local iap_accessor_members="$9" + local config_connector_iam_roles="${10:-}" + + { + printf 'billing_account_id = "%s"\n' "${billing_account_id}" + printf 'region = "%s"\n' "${region_value}" + printf 'dns_domain = "%s."\n' "${dns_domain_value%.}" + if [[ -n "${folder_id}" ]]; then + printf 'folder_id = "%s"\n' "${folder_id}" + elif [[ -n "${org_id}" ]]; then + printf 'org_id = "%s"\n' "${org_id}" + fi + if [[ -n "${project_id}" ]]; then + printf 'project_id = "%s"\n' "${project_id}" + fi + + printf 'dev_registry_writer_members = [\n' + write_hcl_string_list "${writer_members}" + printf ']\n' + + printf 'iap_accessor_members = [\n' + write_hcl_string_list "${iap_accessor_members}" + printf ']\n' + + if [[ -n "${config_connector_iam_roles}" ]]; then + printf 'config_connector_iam_roles = [\n' + write_hcl_string_list "${config_connector_iam_roles}" + printf ']\n' + fi + } > "${tfvars}" +} + +write_hcl_string_list() { + local csv="$1" + local old_ifs="${IFS}" + IFS="," + local member + for member in ${csv}; do + member="$(printf '%s' "${member}" | sed 's/^ *//;s/ *$//')" + if [[ -n "${member}" ]]; then + printf ' "%s",\n' "${member}" + fi + done + IFS="${old_ifs}" +} + +ensure() { + load_devspace_vars + require_tool kubectl + + local requested_provider="${CLUSTER_PROVIDER:-}" + local context context_provider provider + context="$(current_context)" + context_provider="$(provider_from_context "${context}")" + + if [[ -n "${requested_provider}" ]]; then + provider="${requested_provider}" + elif [[ -n "${context_provider}" ]]; then + provider="${context_provider}" + else + if [[ -z "${context}" ]]; then + echo >&2 "E: No current Kubernetes context is selected." + echo >&2 "I: Run 'devspace --var CLUSTER_PROVIDER=gke run ensure-cluster' to create/select managed GKE." + else + echo >&2 "E: Unsupported Kubernetes context ${context}; expected a GKE context or a local context." + fi + exit 1 + fi + + case "${provider}" in + local) + ensure_local + ;; + gke) + ensure_gke + ;; + *) + echo >&2 "E: Unsupported CLUSTER_PROVIDER=${provider}; expected local or gke." + exit 1 + ;; + esac +} + +ensure_local() { + require_tool kubectl + require_tool devspace + + local context provider + context="$(current_context)" + if [[ -z "${context}" ]]; then + echo >&2 "E: No current Kubernetes context is selected." + exit 1 + fi + provider="$(provider_from_context "${context}")" + if [[ "${provider}" != "local" ]]; then + echo >&2 "E: Current Kubernetes context ${context} is not a supported local context." + exit 1 + fi + kubectl cluster-info >/dev/null + + echo >&2 "I: Selecting local cluster context ${context}..." + devspace set var \ + DNS_MODE=local \ + DNS_SERVICE_ID=kube \ + GATEWAY_NAMESPACE=istio-ingress \ + DEV_REGISTRY_HOST="" \ + DEV_REGISTRY="" \ + DEV_REGISTRY_IMAGE_PREFIX="" \ + GKE_PROJECT_ID="" \ + GKE_DNS_NAMESERVERS="" \ + GKE_SELECTED_CONTEXT="" + select_context_preserving_namespace "${context}" "$(current_namespace)" + publish_cluster_env + echo >&2 "I: Local cluster selected." +} + +ensure_gke() { + load_devspace_vars + require_tool gcloud + require_tool kubectl + require_tool devspace + require_tool terraform + require_tool yq + + local context context_provider tf_output + context="$(current_context)" + context_provider="$(provider_from_context "${context}")" + tf_output="$(terraform_output_json)" + + if [[ "${context_provider}" == "gke" ]]; then + if terraform_outputs_match_context "${tf_output}" "${context}"; then + echo >&2 "I: GKE context ${context} matches Terraform state; converging managed GKE." + ensure_gke_managed + else + echo >&2 "I: GKE context ${context} does not match Terraform state; selecting as external GKE." + ensure_gke_external "${context}" + fi + return + fi + + echo >&2 "I: No active GKE context selected; converging managed GKE." + ensure_gke_managed +} + +ensure_gke_managed() { + local tfvars="${GKE_TERRAFORM_DIR}/terraform.tfvars" + local billing_account_id="${GCP_BILLING_ACCOUNT_ID:-}" + local org_id="${GCP_ORG_ID:-}" + local folder_id="${GCP_FOLDER_ID:-}" + local region_value="${GKE_REGION:-${DEFAULT_GKE_REGION}}" + local project_id="${GKE_PROJECT_ID:-}" + local dns_domain_value="${GKE_DNS_DOMAIN:-${DEFAULT_GKE_DNS_DOMAIN}}" + local writer_members="${DEV_REGISTRY_WRITER_MEMBERS:-}" + local iap_accessor_members="${GKE_IAP_ACCESSOR_MEMBERS:-}" + local config_connector_iam_roles="" + local gke_protection="${GKE_PROTECTION:-${DEFAULT_GKE_PROTECTION}}" + + if [[ -f "${tfvars}" ]]; then + [[ -n "${billing_account_id}" ]] || billing_account_id="$(tfvar_string billing_account_id "${tfvars}")" + [[ -n "${project_id}" ]] || project_id="$(tfvar_string project_id "${tfvars}")" + [[ -n "${writer_members}" ]] || writer_members="$(tfvar_string_list_csv dev_registry_writer_members "${tfvars}")" + [[ -n "${iap_accessor_members}" ]] || iap_accessor_members="$(tfvar_string_list_csv iap_accessor_members "${tfvars}")" + [[ -n "${config_connector_iam_roles}" ]] || config_connector_iam_roles="$(tfvar_string_list_csv config_connector_iam_roles "${tfvars}")" + [[ -n "${org_id}" ]] || org_id="$(tfvar_string org_id "${tfvars}")" + [[ -n "${folder_id}" ]] || folder_id="$(tfvar_string folder_id "${tfvars}")" + fi + + require_value "GKE_REGION" "${region_value}" "Default should be ${DEFAULT_GKE_REGION}." + + ensure_gcloud_auth "" + + if [[ -z "${billing_account_id}" ]]; then + billing_account_id="$(discover_billing_account_id)" + persist_discovered_var GCP_BILLING_ACCOUNT_ID "${billing_account_id}" + fi + if [[ -z "${folder_id}" && -z "${org_id}" ]]; then + org_id="$(discover_org_id)" + persist_discovered_var GCP_ORG_ID "${org_id}" + fi + + require_value "GCP_BILLING_ACCOUNT_ID" "${billing_account_id}" "Set it with 'devspace set var GCP_BILLING_ACCOUNT_ID=...'." + if [[ -z "${folder_id}" && -z "${org_id}" ]]; then + echo >&2 "E: Set GCP_ORG_ID or GCP_FOLDER_ID before running managed GKE setup." + exit 1 + fi + if [[ -n "${folder_id}" && -n "${org_id}" ]]; then + echo >&2 "I: GCP_FOLDER_ID is set; Terraform will use it instead of GCP_ORG_ID." + fi + + if [[ "${gke_protection}" == "iap" && -z "${iap_accessor_members}" ]]; then + local default_iap_member + if default_iap_member="$(default_iap_accessor_member)"; then + iap_accessor_members="${default_iap_member}" + echo >&2 "I: Defaulting IAP accessor to active gcloud account ${default_iap_member}." + echo >&2 "I: Set GKE_IAP_ACCESSOR_MEMBERS to override or add more accessors." + else + echo >&2 "E: GKE_PROTECTION=iap requires an IAP accessor member." + echo >&2 "I: Set GKE_IAP_ACCESSOR_MEMBERS=user:you@example.com or run 'gcloud auth login --update-adc'." + exit 1 + fi + fi + + write_managed_tfvars \ + "${tfvars}" \ + "${billing_account_id}" \ + "${org_id}" \ + "${folder_id}" \ + "${region_value}" \ + "${project_id}" \ + "${dns_domain_value}" \ + "${writer_members}" \ + "${iap_accessor_members}" \ + "${config_connector_iam_roles}" + + echo >&2 "I: Converging GKE Terraform in ${GKE_TERRAFORM_DIR}..." + terraform -chdir="${GKE_TERRAFORM_DIR}" init -input=false + terraform -chdir="${GKE_TERRAFORM_DIR}" apply -input=false -auto-approve + + local tf_output cluster_name dns_nameservers_value dev_registry_host_value dev_registry_value + tf_output="$(terraform -chdir="${GKE_TERRAFORM_DIR}" output -json)" + project_id="$(tf_output_value "${tf_output}" project_id)" + region_value="$(tf_output_value "${tf_output}" region)" + cluster_name="$(tf_output_value "${tf_output}" cluster_name)" + dns_domain_value="$(tf_output_value "${tf_output}" dns_domain | sed 's/\.$//')" + dns_nameservers_value="$(printf '%s' "${tf_output}" | yq -r '.dns_name_servers.value | join(",")')" + dev_registry_host_value="$(tf_output_value "${tf_output}" dev_registry_host)" + dev_registry_value="$(tf_output_value "${tf_output}" dev_registry)" + + ensure_gcloud_auth "${project_id}" + + persist_gke_selection \ + "${project_id}" \ + "${region_value}" \ + "${cluster_name}" \ + "" \ + "${dns_domain_value}" \ + "${dns_nameservers_value}" \ + "${dev_registry_host_value}" \ + "${dev_registry_value}" \ + "${GATEWAY_NAMESPACE:-${DEFAULT_GKE_GATEWAY_NAMESPACE}}" +} + +ensure_gke_external() { + local context="$1" + local project_id region cluster_name dns_domain dns_nameservers registry_host registry gateway_namespace + + project_id="${GKE_PROJECT_ID:-$(context_project "${context}")}" + region="${GKE_REGION:-$(context_region "${context}")}" + cluster_name="$(context_cluster "${context}")" + dns_domain="${GKE_DNS_DOMAIN:-${DEFAULT_GKE_DNS_DOMAIN}}" + dns_nameservers="${GKE_DNS_NAMESERVERS:-}" + registry_host="${DEV_REGISTRY_HOST:-}" + registry="${DEV_REGISTRY:-}" + gateway_namespace="${GATEWAY_NAMESPACE:-${DEFAULT_GKE_GATEWAY_NAMESPACE}}" + + if [[ -z "${registry_host}" ]]; then + registry_host="${region}-docker.pkg.dev" + fi + if [[ -z "${registry}" && -n "${project_id}" ]]; then + registry="${registry_host}/${project_id}/${DEV_REGISTRY_REPOSITORY:-${DEFAULT_DEV_REGISTRY_REPOSITORY}}" + fi + + require_value "GKE_PROJECT_ID" "${project_id}" "Use a standard GKE context or set GKE_PROJECT_ID." + require_value "GKE_REGION" "${region}" "Use a standard GKE context or set GKE_REGION." + require_value "GKE_DNS_NAMESERVERS" "${dns_nameservers}" "Set Cloud DNS nameservers with 'devspace set var GKE_DNS_NAMESERVERS=...'." + require_value "DEV_REGISTRY" "${registry}" "Set DEV_REGISTRY or provide enough registry vars to derive it." + + ensure_gcloud_auth "${project_id}" + + persist_gke_selection \ + "${project_id}" \ + "${region}" \ + "${cluster_name}" \ + "${context}" \ + "${dns_domain}" \ + "${dns_nameservers}" \ + "${registry_host}" \ + "${registry}" \ + "${gateway_namespace}" +} + +persist_gke_selection() { + local project_id="$1" + local region="$2" + local cluster_name="$3" + local context="$4" + local dns_domain="$5" + local dns_nameservers="$6" + local registry_host="$7" + local registry="$8" + local gateway_namespace="$9" + local previous_namespace + previous_namespace="$(current_namespace)" + + if [[ -z "${context}" ]]; then + require_value "GKE_PROJECT_ID" "${project_id}" "Managed GKE Terraform output is missing project_id." + require_value "GKE_REGION" "${region}" "Managed GKE Terraform output is missing region." + require_value "GKE_CLUSTER_NAME" "${cluster_name}" "Managed GKE Terraform output is missing cluster_name." + gcloud container clusters get-credentials "${cluster_name}" --region "${region}" --project "${project_id}" + context="$(current_context)" + else + select_context_preserving_namespace "${context}" "${previous_namespace}" + fi + select_context_preserving_namespace "${context}" "${previous_namespace}" + + local derived_provider + derived_provider="$(provider_from_context "${context}")" + if [[ "${derived_provider}" != "gke" ]]; then + echo >&2 "E: Selected context ${context} is not a GKE context." + exit 1 + fi + kubectl cluster-info >/dev/null + + devspace set var \ + GKE_PROJECT_ID="${project_id}" \ + GKE_REGION="${region}" \ + GKE_DNS_DOMAIN="${dns_domain%.}" \ + GKE_DNS_NAMESERVERS="${dns_nameservers}" \ + GKE_SELECTED_CONTEXT="${context}" \ + DEV_REGISTRY_HOST="${registry_host}" \ + DEV_REGISTRY="${registry}" \ + DEV_REGISTRY_IMAGE_PREFIX="${registry}" \ + GATEWAY_NAMESPACE="${gateway_namespace}" \ + DNS_MODE=cloud-dns \ + DNS_SERVICE_ID=gcp-kube + gcloud auth configure-docker "${registry_host}" + + publish_cluster_env + echo >&2 "I: GKE cluster selected: ${context}" + print_gke_registry_info "${registry_host}" "${registry}" +} + +print_gke_registry_info() { + local registry_host="$1" + local registry="$2" + + echo "export DEV_REGISTRY_HOST=${registry_host}" + echo "export DEV_REGISTRY=${registry}" + echo "export DEV_REGISTRY_IMAGE_PREFIX=${registry}" + echo "gcloud auth configure-docker ${registry_host}" +} + +validate_gke_selection() { + local context="$1" + local selected_context="${GKE_SELECTED_CONTEXT:-}" + + if [[ -z "${selected_context}" ]]; then + echo >&2 "E: Current context ${context} is GKE, but no GKE selection has been persisted." + echo >&2 "I: Run 'devspace --var CLUSTER_PROVIDER=gke run ensure-cluster'." + exit 1 + fi + if [[ "${selected_context}" != "${context}" ]]; then + echo >&2 "E: Cached GKE vars were selected for ${selected_context}, but current context is ${context}." + echo >&2 "I: Run 'devspace --var CLUSTER_PROVIDER=gke run ensure-cluster' to refresh GKE selection." + exit 1 + fi + + require_value "GKE_PROJECT_ID" "${GKE_PROJECT_ID:-}" "Run 'devspace --var CLUSTER_PROVIDER=gke run ensure-cluster'." + require_value "GKE_REGION" "${GKE_REGION:-}" "Run 'devspace --var CLUSTER_PROVIDER=gke run ensure-cluster'." + require_value "GKE_DNS_NAMESERVERS" "${GKE_DNS_NAMESERVERS:-}" "Run 'devspace --var CLUSTER_PROVIDER=gke run ensure-cluster'." + require_value "DEV_REGISTRY" "${DEV_REGISTRY:-}" "Run 'devspace --var CLUSTER_PROVIDER=gke run ensure-cluster'." + require_value "DEV_REGISTRY_IMAGE_PREFIX" "${DEV_REGISTRY_IMAGE_PREFIX:-}" "Run 'devspace --var CLUSTER_PROVIDER=gke run ensure-cluster'." +} + +validate_provider() { + load_devspace_vars + local context derived_provider + context="$(current_context)" + derived_provider="$(require_supported_context)" + validate_provider_override "${derived_provider}" + if [[ "${derived_provider}" == "gke" ]]; then + validate_gke_selection "${context}" + fi + echo >&2 "I: Current Kubernetes context provider is ${derived_provider}." +} + +publish_cluster_env() { + load_devspace_vars + require_tool kubectl + + local context provider + context="$(current_context)" + provider="$(require_supported_context)" + validate_provider_override "${provider}" + if [[ "${provider}" == "gke" ]]; then + validate_gke_selection "${context}" + fi + + local deployment_domain dns_domain dns_mode dns_service_id gateway_namespace + local gke_dns_domain gke_project_id gke_region gke_protection dev_registry_host dev_registry dev_registry_image_prefix + local config_connector_enabled config_connector_mode config_connector_project_id config_connector_service_account + + case "${provider}" in + local) + deployment_domain="int.kube" + dns_domain="kube" + dns_mode="${DNS_MODE:-local}" + dns_service_id="${DNS_SERVICE_ID:-kube}" + gateway_namespace="${GATEWAY_NAMESPACE:-istio-ingress}" + gke_dns_domain="" + gke_project_id="" + gke_region="" + gke_protection="" + dev_registry_host="" + dev_registry="" + dev_registry_image_prefix="${DEV_REGISTRY_IMAGE_PREFIX:-}" + config_connector_enabled="" + config_connector_mode="" + config_connector_project_id="" + config_connector_service_account="" + ;; + gke) + gke_dns_domain="${GKE_DNS_DOMAIN:-${DEFAULT_GKE_DNS_DOMAIN}}" + deployment_domain="${gke_dns_domain%.}" + dns_domain="${deployment_domain}" + dns_mode="${DNS_MODE:-cloud-dns}" + dns_service_id="${DNS_SERVICE_ID:-gcp-kube}" + gateway_namespace="${GATEWAY_NAMESPACE:-${DEFAULT_GKE_GATEWAY_NAMESPACE}}" + gke_project_id="${GKE_PROJECT_ID:-}" + gke_region="${GKE_REGION:-${DEFAULT_GKE_REGION}}" + gke_protection="${GKE_PROTECTION:-${DEFAULT_GKE_PROTECTION}}" + dev_registry_host="${DEV_REGISTRY_HOST:-}" + dev_registry="${DEV_REGISTRY:-}" + dev_registry_image_prefix="${DEV_REGISTRY_IMAGE_PREFIX:-${dev_registry}}" + config_connector_service_account="$(managed_config_connector_service_account "${context}")" + if [[ -n "${config_connector_service_account}" ]]; then + config_connector_enabled="true" + config_connector_mode="cluster" + config_connector_project_id="${gke_project_id}" + else + config_connector_enabled="" + config_connector_mode="" + config_connector_project_id="" + fi + require_value "GKE_PROJECT_ID" "${gke_project_id}" "Run 'devspace --var CLUSTER_PROVIDER=gke run ensure-cluster'." + require_value "GKE_REGION" "${gke_region}" "Run 'devspace --var CLUSTER_PROVIDER=gke run ensure-cluster'." + require_value "DEV_REGISTRY_IMAGE_PREFIX" "${dev_registry_image_prefix}" "Run 'devspace --var CLUSTER_PROVIDER=gke run ensure-cluster'." + ;; + *) + echo >&2 "E: Unsupported cluster provider ${provider}." + exit 1 + ;; + esac + + kubectl create namespace "${CLUSTER_ENV_NAMESPACE}" --dry-run=client -o yaml | kubectl apply -f - >/dev/null + kubectl -n "${CLUSTER_ENV_NAMESPACE}" create configmap "${CLUSTER_ENV_CONFIGMAP}" \ + --from-literal=STARTER_PACK_ENV_VERSION=v1 \ + --from-literal=CLUSTER_PROVIDER="${provider}" \ + --from-literal=DEPLOYMENT_DOMAIN="${deployment_domain}" \ + --from-literal=DNS_DOMAIN="${dns_domain}" \ + --from-literal=DNS_MODE="${dns_mode}" \ + --from-literal=DNS_SERVICE_ID="${dns_service_id}" \ + --from-literal=GATEWAY_NAMESPACE="${gateway_namespace}" \ + --from-literal=DEV_REGISTRY_IMAGE_PREFIX="${dev_registry_image_prefix}" \ + --from-literal=GKE_DNS_DOMAIN="${gke_dns_domain}" \ + --from-literal=GKE_PROJECT_ID="${gke_project_id}" \ + --from-literal=GKE_REGION="${gke_region}" \ + --from-literal=GKE_PROTECTION="${gke_protection}" \ + --from-literal=DEV_REGISTRY_HOST="${dev_registry_host}" \ + --from-literal=DEV_REGISTRY="${dev_registry}" \ + --from-literal=CONFIG_CONNECTOR_ENABLED="${config_connector_enabled}" \ + --from-literal=CONFIG_CONNECTOR_MODE="${config_connector_mode}" \ + --from-literal=CONFIG_CONNECTOR_PROJECT_ID="${config_connector_project_id}" \ + --from-literal=CONFIG_CONNECTOR_SERVICE_ACCOUNT="${config_connector_service_account}" \ + --dry-run=client -o yaml | kubectl apply -f - >/dev/null + kubectl -n "${CLUSTER_ENV_NAMESPACE}" label configmap "${CLUSTER_ENV_CONFIGMAP}" \ + devspace.magneticflux.net/contract=starter-pack-env --overwrite >/dev/null + + echo >&2 "I: Published cluster environment contract ${CLUSTER_ENV_NAMESPACE}/${CLUSTER_ENV_CONFIGMAP}." +} + +managed_config_connector_service_account() { + local context="$1" + local tf_output + tf_output="$(terraform_output_json)" + if [[ -z "${tf_output}" ]]; then + return + fi + if ! terraform_outputs_match_context "${tf_output}" "${context}"; then + return + fi + tf_output_value "${tf_output}" config_connector_service_account_email +} + +contract_value() { + local json="$1" + local key="$2" + printf '%s' "${json}" | yq -r ".data[\"${key}\"] // \"\"" +} + +require_contract_value() { + local json="$1" + local key="$2" + local value + value="$(contract_value "${json}" "${key}")" + if [[ -z "${value}" ]]; then + echo >&2 "E: Cluster environment contract is missing ${key}." + echo >&2 "I: Run 'devspace run ensure-cluster' or deploy starter-pack infrastructure." + exit 1 + fi +} + +print_cluster_env() { + require_tool kubectl + require_tool yq + + local json provider + if ! json="$(kubectl -n "${CLUSTER_ENV_NAMESPACE}" get configmap "${CLUSTER_ENV_CONFIGMAP}" -o json 2>/dev/null)"; then + echo >&2 "E: Cluster environment contract ${CLUSTER_ENV_NAMESPACE}/${CLUSTER_ENV_CONFIGMAP} was not found." + echo >&2 "I: Run 'devspace run ensure-cluster' or deploy starter-pack infrastructure." + exit 1 + fi + + require_contract_value "${json}" STARTER_PACK_ENV_VERSION + require_contract_value "${json}" CLUSTER_PROVIDER + require_contract_value "${json}" DEPLOYMENT_DOMAIN + require_contract_value "${json}" GATEWAY_NAMESPACE + + provider="$(contract_value "${json}" CLUSTER_PROVIDER)" + if [[ "${provider}" == "gke" ]]; then + require_contract_value "${json}" GKE_PROJECT_ID + require_contract_value "${json}" GKE_REGION + require_contract_value "${json}" GKE_PROTECTION + require_contract_value "${json}" DEV_REGISTRY_IMAGE_PREFIX + if [[ "$(contract_value "${json}" CONFIG_CONNECTOR_ENABLED)" == "true" ]]; then + require_contract_value "${json}" CONFIG_CONNECTOR_MODE + require_contract_value "${json}" CONFIG_CONNECTOR_PROJECT_ID + require_contract_value "${json}" CONFIG_CONNECTOR_SERVICE_ACCOUNT + fi + fi + + printf '%s' "${json}" | yq -r '.data | to_entries | sort_by(.key) | .[] | .key + "=" + .value' +} + +test_install() { + load_devspace_vars + local derived_provider + derived_provider="$(require_supported_context)" + validate_provider_override "${derived_provider}" + CLUSTER_PROVIDER="${derived_provider}" CGO_ENABLED=1 go test -count=1 -v -timeout 5m ./tests/install +} + +command_name="${1:-}" +if [[ -z "${command_name}" ]]; then + echo >&2 "E: Missing command name" + exit 1 +fi +shift + +case "${command_name}" in + ensure | validate-provider | publish-cluster-env | print-cluster-env | test-install) + ;; + *) + echo >&2 "E: Unknown command ${command_name}" + exit 1 + ;; +esac + +function_name="${command_name//-/_}" +"${function_name}" "$@" diff --git a/tests/e2e/cmd/smoke/main.go b/tests/e2e/cmd/smoke/main.go index e834cf0..8847b47 100644 --- a/tests/e2e/cmd/smoke/main.go +++ b/tests/e2e/cmd/smoke/main.go @@ -13,7 +13,7 @@ import ( ) const ( - defaultProvider = "kind" + defaultProvider = "local" defaultTimeout = 20 * time.Minute defaultClusterCreateWait = 5 * time.Minute defaultCleanupTimeout = 5 * time.Minute @@ -95,7 +95,20 @@ func run() error { }() env := append(os.Environ(), "KUBECONFIG="+kubeconfig) - if err := assertContext(ctx, env, provider.contextName(cfg.clusterName)); err != nil { + if envProvider, ok := provider.(interface { + environment(context.Context, string) ([]string, error) + }); ok { + extraEnv, err := envProvider.environment(ctx, kubeconfig) + if err != nil { + return err + } + env = append(env, extraEnv...) + } + expectedContext, err := provider.contextName(ctx, cfg.clusterName) + if err != nil { + return err + } + if err := assertContext(ctx, env, expectedContext); err != nil { return err } @@ -109,7 +122,7 @@ func run() error { return err } - testEnv := append(env, "CGO_ENABLED=1") + testEnv := append(env, "CGO_ENABLED=1", "CLUSTER_PROVIDER="+cfg.providerName) goArgs := append([]string{"test", "-count=1", "-timeout", cfg.testTimeout.String()}, cfg.testArgs...) goArgs = append(goArgs, "./tests/install") if output, err := runStepCapture(ctx, testEnv, "go", goArgs...); err != nil { @@ -213,17 +226,22 @@ func printPendingPodDiagnostics(env []string, timeout time.Duration) { } func loadConfig() config { - providerName := getenvDefault("E2E_CLUSTER_PROVIDER", defaultProvider) + providerName := getenvDefault("CLUSTER_PROVIDER", defaultProvider) clusterName := os.Getenv("E2E_CLUSTER_NAME") if clusterName == "" { clusterName = fmt.Sprintf("devspace-smoke-%d", time.Now().Unix()) } + devspaceArgs := splitArgs(os.Getenv("E2E_DEVSPACE_ARGS")) + if providerName == "gke" && len(devspaceArgs) == 0 { + devspaceArgs = []string{"--profile", "gke-test"} + } + return config{ providerName: providerName, clusterName: clusterName, keepCluster: os.Getenv("E2E_KEEP_CLUSTER") == "1", - devspaceArgs: splitArgs(os.Getenv("E2E_DEVSPACE_ARGS")), + devspaceArgs: devspaceArgs, testArgs: splitArgs(os.Getenv("E2E_TEST_ARGS")), timeout: durationFromEnv("E2E_TIMEOUT", defaultTimeout), diff --git a/tests/e2e/cmd/smoke/provider.go b/tests/e2e/cmd/smoke/provider.go index 49e1fdd..0259a59 100644 --- a/tests/e2e/cmd/smoke/provider.go +++ b/tests/e2e/cmd/smoke/provider.go @@ -10,16 +10,18 @@ type provider interface { preflight(context.Context) error create(context.Context, string, string, time.Duration) error delete(context.Context, string, string) error - contextName(string) string + contextName(context.Context, string) (string, error) } func newProvider(name string) (provider, error) { switch name { - case "kind": + case "local": return kindProvider{}, nil + case "gke": + return gkeProvider{}, nil case "vind": return vindProvider{}, nil default: - return nil, fmt.Errorf("unsupported E2E_CLUSTER_PROVIDER %q", name) + return nil, fmt.Errorf("unsupported CLUSTER_PROVIDER %q", name) } } diff --git a/tests/e2e/cmd/smoke/provider_gke.go b/tests/e2e/cmd/smoke/provider_gke.go new file mode 100644 index 0000000..d62db22 --- /dev/null +++ b/tests/e2e/cmd/smoke/provider_gke.go @@ -0,0 +1,206 @@ +package main + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +type gkeProvider struct{} + +type terraformOutputValue struct { + Value json.RawMessage `json:"value"` +} + +type gkeTerraformOutputs struct { + ProjectID string + Region string + ClusterName string + DNSDomain string + DNSNameServers []string + DevRegistryHost string + DevRegistry string + DevRegistryImagePrefix string + ExternalDNSServiceAccountEmail string +} + +func (gkeProvider) preflight(ctx context.Context) error { + for _, tool := range []string{"terraform", "gcloud", "kubectl", "devspace"} { + if err := requireTool(ctx, tool); err != nil { + return err + } + } + return nil +} + +func (gkeProvider) create(ctx context.Context, clusterName, kubeconfig string, _ time.Duration) error { + tfDir := gkeTerraformDir() + if err := runStepInDir(ctx, tfDir, os.Environ(), "terraform", "init", "-input=false"); err != nil { + return err + } + + applyArgs := append([]string{"apply", "-input=false", "-auto-approve"}, gkeTerraformVarArgs(clusterName)...) + if err := runStepInDir(ctx, tfDir, os.Environ(), "terraform", applyArgs...); err != nil { + return err + } + + outputs, err := readGKEOutputs(ctx) + if err != nil { + return err + } + + env := append(os.Environ(), "KUBECONFIG="+kubeconfig) + if err := runStep(ctx, env, "gcloud", "container", "clusters", "get-credentials", outputs.ClusterName, "--region", outputs.Region, "--project", outputs.ProjectID); err != nil { + return err + } + + return nil +} + +func (gkeProvider) delete(ctx context.Context, clusterName, kubeconfig string) error { + tfDir := gkeTerraformDir() + env := append(os.Environ(), "KUBECONFIG="+kubeconfig) + if extraEnv, err := (gkeProvider{}).environment(ctx, kubeconfig); err == nil { + env = append(env, extraEnv...) + } + + if err := runStep(ctx, env, "devspace", "run", "reset-cluster-dns"); err != nil { + fmt.Fprintf(os.Stderr, "smoke: warning: failed to reset GKE split DNS before destroy: %v\n", err) + } + + destroyArgs := append([]string{"destroy", "-input=false", "-auto-approve"}, gkeTerraformVarArgs(clusterName)...) + return runStepInDir(ctx, tfDir, os.Environ(), "terraform", destroyArgs...) +} + +func (gkeProvider) contextName(ctx context.Context, _ string) (string, error) { + outputs, err := readGKEOutputs(ctx) + if err != nil { + return "", err + } + return fmt.Sprintf("gke_%s_%s_%s", outputs.ProjectID, outputs.Region, outputs.ClusterName), nil +} + +func (gkeProvider) environment(ctx context.Context, _ string) ([]string, error) { + outputs, err := readGKEOutputs(ctx) + if err != nil { + return nil, err + } + + domain := strings.TrimSuffix(outputs.DNSDomain, ".") + return []string{ + "GKE_PROJECT_ID=" + outputs.ProjectID, + "GKE_REGION=" + outputs.Region, + "GKE_DNS_DOMAIN=" + domain, + "GKE_DNS_NAMESERVERS=" + strings.Join(outputs.DNSNameServers, ","), + "DEV_REGISTRY_HOST=" + outputs.DevRegistryHost, + "DEV_REGISTRY=" + outputs.DevRegistry, + "DEV_REGISTRY_IMAGE_PREFIX=" + outputs.DevRegistryImagePrefix, + "GATEWAY_NAMESPACE=gke-gateway", + "DNS_MODE=cloud-dns", + "DNS_SERVICE_ID=gcp-kube", + "CLUSTER_PROVIDER=gke", + }, nil +} + +func gkeTerraformDir() string { + if value := os.Getenv("GKE_TF_DIR"); value != "" { + return value + } + return filepath.Join("infra", "gcp-ephemeral") +} + +func gkeTerraformVarArgs(clusterName string) []string { + args := []string{"-var", "cluster_name=" + clusterName} + if value := os.Getenv("GKE_TF_VAR_FILE"); value != "" { + args = append(args, "-var-file", value) + } + return args +} + +func readGKEOutputs(ctx context.Context) (gkeTerraformOutputs, error) { + output, err := commandOutputInDir(ctx, gkeTerraformDir(), os.Environ(), "terraform", "output", "-json") + if err != nil { + return gkeTerraformOutputs{}, err + } + + var raw map[string]terraformOutputValue + if err := json.Unmarshal([]byte(output), &raw); err != nil { + return gkeTerraformOutputs{}, fmt.Errorf("parse terraform output -json: %w", err) + } + + var outputs gkeTerraformOutputs + if err := decodeTerraformOutput(raw, "project_id", &outputs.ProjectID); err != nil { + return outputs, err + } + if err := decodeTerraformOutput(raw, "region", &outputs.Region); err != nil { + return outputs, err + } + if err := decodeTerraformOutput(raw, "cluster_name", &outputs.ClusterName); err != nil { + return outputs, err + } + if err := decodeTerraformOutput(raw, "dns_domain", &outputs.DNSDomain); err != nil { + return outputs, err + } + if err := decodeTerraformOutput(raw, "dns_name_servers", &outputs.DNSNameServers); err != nil { + return outputs, err + } + if err := decodeTerraformOutput(raw, "dev_registry_host", &outputs.DevRegistryHost); err != nil { + return outputs, err + } + if err := decodeTerraformOutput(raw, "dev_registry", &outputs.DevRegistry); err != nil { + return outputs, err + } + if err := decodeTerraformOutput(raw, "dev_registry_image_prefix", &outputs.DevRegistryImagePrefix); err != nil { + return outputs, err + } + if err := decodeTerraformOutput(raw, "external_dns_service_account_email", &outputs.ExternalDNSServiceAccountEmail); err != nil { + return outputs, err + } + return outputs, nil +} + +func decodeTerraformOutput[T any](raw map[string]terraformOutputValue, name string, target *T) error { + value, ok := raw[name] + if !ok { + return fmt.Errorf("terraform output %q is missing", name) + } + if err := json.Unmarshal(value.Value, target); err != nil { + return fmt.Errorf("parse terraform output %q: %w", name, err) + } + return nil +} + +func runStepInDir(ctx context.Context, dir string, env []string, name string, args ...string) error { + fmt.Printf("smoke: running %s %s in %s\n", name, strings.Join(args, " "), dir) + cmd := exec.CommandContext(ctx, name, args...) + cmd.Dir = dir + cmd.Env = append(env, "TF_IN_AUTOMATION=1") + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + if ctx.Err() != nil { + return fmt.Errorf("%s %s timed out: %w", name, strings.Join(args, " "), ctx.Err()) + } + return fmt.Errorf("%s %s failed: %w", name, strings.Join(args, " "), err) + } + return nil +} + +func commandOutputInDir(ctx context.Context, dir string, env []string, name string, args ...string) (string, error) { + cmd := exec.CommandContext(ctx, name, args...) + cmd.Dir = dir + cmd.Env = append(env, "TF_IN_AUTOMATION=1") + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + return stdout.String(), fmt.Errorf("%s %s failed: %w\nstdout:\n%s\nstderr:\n%s", name, strings.Join(args, " "), err, stdout.String(), stderr.String()) + } + return stdout.String(), nil +} diff --git a/tests/e2e/cmd/smoke/provider_kind.go b/tests/e2e/cmd/smoke/provider_kind.go index 056a063..b5b335a 100644 --- a/tests/e2e/cmd/smoke/provider_kind.go +++ b/tests/e2e/cmd/smoke/provider_kind.go @@ -23,6 +23,6 @@ func (kindProvider) delete(ctx context.Context, clusterName, kubeconfig string) return runStep(ctx, os.Environ(), "kind", "delete", "cluster", "--name", clusterName, "--kubeconfig", kubeconfig) } -func (kindProvider) contextName(clusterName string) string { - return "kind-" + clusterName +func (kindProvider) contextName(_ context.Context, clusterName string) (string, error) { + return "kind-" + clusterName, nil } diff --git a/tests/e2e/cmd/smoke/provider_vind.go b/tests/e2e/cmd/smoke/provider_vind.go index 76055e0..5a90a6d 100644 --- a/tests/e2e/cmd/smoke/provider_vind.go +++ b/tests/e2e/cmd/smoke/provider_vind.go @@ -9,17 +9,17 @@ import ( type vindProvider struct{} func (vindProvider) preflight(context.Context) error { - return errors.New("E2E_CLUSTER_PROVIDER=vind is reserved for future support and is not implemented yet") + return errors.New("CLUSTER_PROVIDER=vind is reserved for future support and is not implemented yet") } func (vindProvider) create(context.Context, string, string, time.Duration) error { - return errors.New("E2E_CLUSTER_PROVIDER=vind is reserved for future support and is not implemented yet") + return errors.New("CLUSTER_PROVIDER=vind is reserved for future support and is not implemented yet") } func (vindProvider) delete(context.Context, string, string) error { return nil } -func (vindProvider) contextName(clusterName string) string { - return clusterName +func (vindProvider) contextName(_ context.Context, clusterName string) (string, error) { + return clusterName, nil } diff --git a/tests/install/dns.go b/tests/install/dns.go index c35f896..dfdad1c 100644 --- a/tests/install/dns.go +++ b/tests/install/dns.go @@ -5,6 +5,7 @@ import ( "fmt" "net" "sort" + "strings" "testing" "time" ) @@ -36,6 +37,30 @@ func assertDefaultResolverResolves(t *testing.T, name, expectedIP string) { } } +func assertCloudDNSResolves(t *testing.T, nameservers, name, expectedIP string) { + t.Helper() + + nameserverIP := firstNameserverIP(t, nameservers) + assertDirectDNSResolves(t, nameserverIP, name, expectedIP) +} + +func firstNameserverIP(t *testing.T, nameservers string) string { + t.Helper() + + for _, ns := range strings.Fields(strings.ReplaceAll(nameservers, ",", " ")) { + ns = strings.TrimSuffix(ns, ".") + "." + ips := lookupHost(t, net.DefaultResolver, ns, "default resolver for Cloud DNS nameserver") + for _, ip := range ips { + if strings.Contains(ip, ".") { + return ip + } + } + } + + t.Fatalf("GKE_DNS_NAMESERVERS did not contain a nameserver with an IPv4 address: %q", nameservers) + return "" +} + func lookupHost(t *testing.T, resolver *net.Resolver, name, description string) []string { t.Helper() diff --git a/tests/install/gke_registry.go b/tests/install/gke_registry.go new file mode 100644 index 0000000..5ecc700 --- /dev/null +++ b/tests/install/gke_registry.go @@ -0,0 +1,81 @@ +package install_test + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "testing" + "time" +) + +func assertOptionalGKERegistrySmoke(t *testing.T) { + t.Helper() + + if os.Getenv("GKE_REGISTRY_SMOKE") != "1" { + t.Skip("GKE_REGISTRY_SMOKE=1 is not set") + } + requireTools(t, "docker", "gcloud") + + prefix := gkeDevRegistryImagePrefix(t) + tag := fmt.Sprintf("registry-smoke-%d", time.Now().UnixNano()) + imageName := strings.TrimSuffix(prefix, "/") + "/registry-smoke" + image := imageName + ":" + tag + + dockerfile := filepath.Join(t.TempDir(), "Dockerfile") + if err := os.WriteFile(dockerfile, []byte("FROM registry.k8s.io/pause:3.10\n"), 0o644); err != nil { + t.Fatalf("write Dockerfile: %v", err) + } + + runCommandWithTimeout(t, 2*time.Minute, "docker", "build", "--platform", "linux/amd64", "-t", image, filepath.Dir(dockerfile)) + defer func() { + _, _ = runCommandE(30*time.Second, "docker", "rmi", image) + }() + + runCommandWithTimeout(t, 5*time.Minute, "docker", "push", image) + defer func() { + _, _ = runCommandE(2*time.Minute, "gcloud", "artifacts", "docker", "images", "delete", imageName, "--quiet", "--delete-tags") + }() + + namespace := "registry-smoke-" + tag + runCommand(t, "kubectl", "create", "namespace", namespace) + defer func() { + _, _ = runCommandE(2*time.Minute, "kubectl", "delete", "namespace", namespace, "--wait=false") + }() + + assertNoImagePullSecrets(t, namespace, "serviceaccount", "default") + + runCommand(t, "kubectl", "run", "registry-smoke", "-n", namespace, "--image", image, "--restart", "Never") + runCommandWithTimeout(t, 3*time.Minute, "kubectl", "wait", "-n", namespace, "--for=condition=Ready", "pod/registry-smoke", "--timeout=180s") + assertNoImagePullSecrets(t, namespace, "pod", "registry-smoke") +} + +func gkeDevRegistryImagePrefix(t *testing.T) string { + t.Helper() + + if value := os.Getenv("DEV_REGISTRY_IMAGE_PREFIX"); value != "" { + return value + } + if value := os.Getenv("DEV_REGISTRY"); value != "" { + return value + } + + host := os.Getenv("DEV_REGISTRY_HOST") + if host == "" { + host = getenvDefaultForTests("GKE_REGION", "us-central1") + "-docker.pkg.dev" + } + projectID := os.Getenv("GKE_PROJECT_ID") + if projectID == "" { + t.Fatal("GKE_PROJECT_ID is required when DEV_REGISTRY_IMAGE_PREFIX and DEV_REGISTRY are not set") + } + return host + "/" + projectID + "/devspace-dev" +} + +func assertNoImagePullSecrets(t *testing.T, namespace, kind, name string) { + t.Helper() + + output := runCommand(t, "kubectl", "get", kind, name, "-n", namespace, "-o", "jsonpath={.imagePullSecrets}{.spec.imagePullSecrets}") + if strings.TrimSpace(output) != "" { + t.Fatalf("%s/%s/%s has imagePullSecrets configured: %s", namespace, kind, name, output) + } +} diff --git a/tests/install/host_linux.go b/tests/install/host_linux.go index 35af0c6..a6774ad 100644 --- a/tests/install/host_linux.go +++ b/tests/install/host_linux.go @@ -38,7 +38,11 @@ func assertOptionalHTTPSRoute(t *testing.T) { if !httpbinRouteInstalled(t) { t.Skip("optional httpbin route is not installed") } - assertHTTPSGet(t, "HTTPS route", "https://httpbin.int.kube/get") + assertHTTPSGet(t, "HTTPS route", "https://"+routeHost("httpbin")+"/get") + if clusterProvider() == "gke" { + assertHTTPSHeaderPreserved(t, "httpbin Authorization header", "https://"+routeHost("httpbin")+"/headers", "Authorization", "Bearer devspace-starter-pack-test") + assertHTTPRedirectsToHTTPS(t, "httpbin HTTP route", "http://"+routeHost("httpbin")+"/get") + } } func assertOptionalTracingRoute(t *testing.T) { @@ -47,7 +51,13 @@ func assertOptionalTracingRoute(t *testing.T) { if !httpRouteInstalled(t, "observability", "jaeger") { t.Fatal("observability/jaeger HTTPRoute is not installed") } - assertHTTPSGet(t, "Jaeger HTTPS route", "https://jaeger.int.kube/") + if clusterProvider() == "gke" && gkeProtection() == "iap" { + assertGCPBackendPolicyInstalled(t, "observability", "jaeger-iap") + assertHTTPSRequiresIAP(t, "Jaeger HTTPS route", "https://"+routeHost("jaeger")+"/") + assertHTTPRedirectsToHTTPS(t, "Jaeger HTTP route", "http://"+routeHost("jaeger")+"/") + return + } + assertHTTPSGet(t, "Jaeger HTTPS route", "https://"+routeHost("jaeger")+"/") } func assertOptionalGrafanaRoute(t *testing.T) { @@ -56,6 +66,12 @@ func assertOptionalGrafanaRoute(t *testing.T) { if !httpRouteInstalled(t, "observability", "grafana") { t.Fatal("observability/grafana HTTPRoute is not installed") } + if clusterProvider() == "gke" && gkeProtection() == "iap" { + assertGCPBackendPolicyInstalled(t, "observability", "grafana-iap") + assertHTTPSRequiresIAP(t, "Grafana HTTPS route", "https://"+routeHost("grafana")+"/login") + assertHTTPRedirectsToHTTPS(t, "Grafana HTTP route", "http://"+routeHost("grafana")+"/login") + return + } assertHTTPSGet(t, "Grafana HTTPS route", "https://grafana.int.kube/login") } diff --git a/tests/install/host_macos.go b/tests/install/host_macos.go index dbbdb53..5164f3e 100644 --- a/tests/install/host_macos.go +++ b/tests/install/host_macos.go @@ -42,7 +42,11 @@ func assertOptionalHTTPSRoute(t *testing.T) { t.Fatal("HTTPS route validation on macOS requires CGO_ENABLED=1 so Go uses the system resolver for .kube names") } - assertHTTPSGet(t, "HTTPS route", "https://httpbin.int.kube/get") + assertHTTPSGet(t, "HTTPS route", "https://"+routeHost("httpbin")+"/get") + if clusterProvider() == "gke" { + assertHTTPSHeaderPreserved(t, "httpbin Authorization header", "https://"+routeHost("httpbin")+"/headers", "Authorization", "Bearer devspace-starter-pack-test") + assertHTTPRedirectsToHTTPS(t, "httpbin HTTP route", "http://"+routeHost("httpbin")+"/get") + } } func assertOptionalTracingRoute(t *testing.T) { @@ -55,7 +59,13 @@ func assertOptionalTracingRoute(t *testing.T) { t.Fatal("Jaeger route validation on macOS requires CGO_ENABLED=1 so Go uses the system resolver for .kube names") } - assertHTTPSGet(t, "Jaeger HTTPS route", "https://jaeger.int.kube/") + if clusterProvider() == "gke" && gkeProtection() == "iap" { + assertGCPBackendPolicyInstalled(t, "observability", "jaeger-iap") + assertHTTPSRequiresIAP(t, "Jaeger HTTPS route", "https://"+routeHost("jaeger")+"/") + assertHTTPRedirectsToHTTPS(t, "Jaeger HTTP route", "http://"+routeHost("jaeger")+"/") + return + } + assertHTTPSGet(t, "Jaeger HTTPS route", "https://"+routeHost("jaeger")+"/") } func assertOptionalGrafanaRoute(t *testing.T) { @@ -68,6 +78,12 @@ func assertOptionalGrafanaRoute(t *testing.T) { t.Fatal("Grafana route validation on macOS requires CGO_ENABLED=1 so Go uses the system resolver for .kube names") } + if clusterProvider() == "gke" && gkeProtection() == "iap" { + assertGCPBackendPolicyInstalled(t, "observability", "grafana-iap") + assertHTTPSRequiresIAP(t, "Grafana HTTPS route", "https://"+routeHost("grafana")+"/login") + assertHTTPRedirectsToHTTPS(t, "Grafana HTTP route", "http://"+routeHost("grafana")+"/login") + return + } assertHTTPSGet(t, "Grafana HTTPS route", "https://grafana.int.kube/login") } diff --git a/tests/install/host_routes.go b/tests/install/host_routes.go index 167f9a9..8871c63 100644 --- a/tests/install/host_routes.go +++ b/tests/install/host_routes.go @@ -15,18 +15,7 @@ import ( func assertHTTPSGet(t *testing.T, description, url string) { t.Helper() - pool, err := x509.SystemCertPool() - if err != nil { - t.Fatalf("failed to load system cert pool: %v", err) - } - - client := &http.Client{ - Timeout: 5 * time.Second, - Transport: &http.Transport{ - TLSClientConfig: &tls.Config{RootCAs: pool, MinVersion: tls.VersionTLS12}, - }, - } - + client := trustedHTTPClient(t, true) deadline := time.Now().Add(45 * time.Second) for { resp, err := client.Get(url) @@ -47,6 +36,122 @@ func assertHTTPSGet(t *testing.T, description, url string) { } } +func assertHTTPSHeaderPreserved(t *testing.T, description, url, name, value string) { + t.Helper() + + client := trustedHTTPClient(t, true) + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + t.Fatalf("failed to build %s request: %v", description, err) + } + req.Header.Set(name, value) + + resp := doRequestWithRetry(t, client, req, description+" through gateway") + defer resp.Body.Close() + + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + t.Fatalf("%s returned %s: %s", description, resp.Status, strings.TrimSpace(string(body))) + } + if !strings.Contains(string(body), value) { + t.Fatalf("%s did not preserve %s header value %q: %s", description, name, value, strings.TrimSpace(string(body))) + } +} + +func assertHTTPRedirectsToHTTPS(t *testing.T, description, url string) { + t.Helper() + + client := trustedHTTPClient(t, false) + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + t.Fatalf("failed to build %s redirect request: %v", description, err) + } + resp := doRequestWithRetry(t, client, req, description+" redirect check") + defer resp.Body.Close() + + if resp.StatusCode != http.StatusMovedPermanently { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + t.Fatalf("%s returned %s, expected 301 Moved Permanently: %s", description, resp.Status, strings.TrimSpace(string(body))) + } + location := resp.Header.Get("Location") + if !strings.HasPrefix(location, "https://") { + t.Fatalf("%s redirect Location is %q, expected https://...", description, location) + } +} + +func assertHTTPSRequiresIAP(t *testing.T, description, url string) { + t.Helper() + + client := trustedHTTPClient(t, false) + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + t.Fatalf("failed to build %s IAP request: %v", description, err) + } + resp := doRequestWithRetry(t, client, req, description+" IAP check") + defer resp.Body.Close() + + if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden { + return + } + if resp.StatusCode >= 300 && resp.StatusCode < 400 { + location := resp.Header.Get("Location") + if strings.Contains(location, "accounts.google.com") || strings.Contains(location, "iap.googleapis.com") { + return + } + t.Fatalf("%s redirected to %q, expected Google IAP login", description, location) + } + + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + t.Fatalf("%s returned %s, expected IAP challenge or redirect: %s", description, resp.Status, strings.TrimSpace(string(body))) +} + +func doRequestWithRetry(t *testing.T, client *http.Client, req *http.Request, description string) *http.Response { + t.Helper() + + deadline := time.Now().Add(45 * time.Second) + for { + resp, err := client.Do(req.Clone(req.Context())) + if err == nil { + return resp + } + + if time.Now().After(deadline) || !isRetryableHTTPGetError(err) { + t.Fatalf("%s failed: %v", description, err) + } + + time.Sleep(2 * time.Second) + } +} + +func trustedHTTPClient(t *testing.T, followRedirects bool) *http.Client { + t.Helper() + + pool, err := x509.SystemCertPool() + if err != nil { + t.Fatalf("failed to load system cert pool: %v", err) + } + + client := &http.Client{ + Timeout: 5 * time.Second, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{RootCAs: pool, MinVersion: tls.VersionTLS12}, + }, + } + if !followRedirects { + client.CheckRedirect = func(_ *http.Request, _ []*http.Request) error { + return http.ErrUseLastResponse + } + } + return client +} + +func routeHost(name string) string { + if clusterProvider() == "gke" { + return name + "." + strings.TrimSuffix(getenvDefaultForTests("GKE_DNS_DOMAIN", "gcp.kube"), ".") + } + return name + ".int.kube" +} + func isRetryableHTTPGetError(err error) bool { var dnsErr *net.DNSError if errors.As(err, &dnsErr) { @@ -64,6 +169,9 @@ func isRetryableHTTPGetError(err error) bool { func httpbinRouteInstalled(t *testing.T) bool { t.Helper() + if clusterProvider() == "gke" { + return httpRouteInstalled(t, "httpbin", "https") + } return httpRouteInstalled(t, "httpbin", "http") } diff --git a/tests/install/install_test.go b/tests/install/install_test.go index e537c93..59a59c6 100644 --- a/tests/install/install_test.go +++ b/tests/install/install_test.go @@ -1,6 +1,7 @@ package install_test import ( + "os" "strings" "testing" ) @@ -25,6 +26,8 @@ var checkedNamespaces = []string{ } func TestDevspaceInstallDiagnostics(t *testing.T) { + target := clusterProvider() + t.Run("tooling", func(t *testing.T) { requireTools(t, requiredTools()...) requireKubernetesContext(t) @@ -35,24 +38,41 @@ func TestDevspaceInstallDiagnostics(t *testing.T) { }) t.Run("workload readiness", func(t *testing.T) { - assertWorkloadsReady(t, checkedNamespaces) - assertPodsReady(t, checkedNamespaces) + assertWorkloadsReady(t, checkedNamespacesForTarget(target)) + assertPodsReady(t, checkedNamespacesForTarget(target)) }) t.Run("service endpoints", func(t *testing.T) { - assertServicesHaveReadyEndpoints(t, checkedNamespaces) + assertServicesHaveReadyEndpoints(t, checkedNamespacesForTarget(target)) }) t.Run("load balancers", func(t *testing.T) { + if target == "gke" { + t.Skip("GKE target uses Gateway API load balancers instead of Service type LoadBalancer") + } assertLoadBalancersAssigned(t, checkedNamespaces) }) t.Run("external dns path", func(t *testing.T) { + if target == "gke" { + domain := getenvDefaultForTests("GKE_DNS_DOMAIN", "gcp.kube") + host := "httpbin." + strings.TrimSuffix(domain, ".") + gatewayAddress := requireGatewayAddress(t, "gke-gateway", "gateway") + assertCloudDNSResolves(t, getenvDefaultForTests("GKE_DNS_NAMESERVERS", ""), host, gatewayAddress) + return + } dnsIP := requireServiceLoadBalancerIP(t, dnsNamespace, dnsService) assertDirectDNSResolves(t, dnsIP, dnsName, dnsIP) }) t.Run("host dns", func(t *testing.T) { + if target == "gke" { + domain := getenvDefaultForTests("GKE_DNS_DOMAIN", "gcp.kube") + host := "httpbin." + strings.TrimSuffix(domain, ".") + gatewayAddress := requireGatewayAddress(t, "gke-gateway", "gateway") + assertDefaultResolverResolves(t, host, gatewayAddress) + return + } dnsIP := requireServiceLoadBalancerIP(t, dnsNamespace, dnsService) assertHostDNS(t, dnsName, dnsIP) }) @@ -63,9 +83,22 @@ func TestDevspaceInstallDiagnostics(t *testing.T) { }) t.Run("gateway ext-authz hook", func(t *testing.T) { + if target == "gke" { + assertKubernetesObjectAbsent(t, "namespace", "istio-system", "") + assertKubernetesObjectAbsent(t, "namespace", "istio-ingress", "") + assertKubernetesObjectAbsent(t, "namespace", "hello", "") + return + } assertIstioGatewayExtAuthzHookInstalled(t) }) + t.Run("optional gke dev registry smoke", func(t *testing.T) { + if target != "gke" { + t.Skip("GKE dev registry smoke only runs for the GKE target") + } + assertOptionalGKERegistrySmoke(t) + }) + t.Run("optional https route", func(t *testing.T) { assertOptionalHTTPSRoute(t) }) @@ -74,6 +107,9 @@ func TestDevspaceInstallDiagnostics(t *testing.T) { if !helmReleaseInstalled(t, "observability", "otel-collector") && !helmReleaseInstalled(t, "observability", "jaeger") { t.Skip("with-o11y profile is not installed") } + if !helmReleaseInstalled(t, "observability", "prometheus") { + t.Fatal("observability/prometheus Helm release is not deployed") + } if !helmReleaseInstalled(t, "observability", "otel-collector") { t.Fatal("observability/otel-collector Helm release is not deployed") } @@ -92,9 +128,16 @@ func TestDevspaceInstallDiagnostics(t *testing.T) { "http-query": 16686, }) assertServiceMonitorInstalled(t, "observability", "otel-collector") - assertServiceMonitorInstalled(t, "observability", "istiod") - assertPodMonitorInstalled(t, "observability", "istio-gateway-api-gateway") - assertPodMonitorInstalled(t, "observability", "istio-ingress-gateway") + if target == "gke" { + assertKubernetesObjectAbsent(t, "deployment", "metrics-server", "kube-system") + assertKubernetesObjectAbsent(t, "servicemonitor", "istiod", "observability") + assertKubernetesObjectAbsent(t, "podmonitor", "istio-gateway-api-gateway", "observability") + assertKubernetesObjectAbsent(t, "podmonitor", "istio-ingress-gateway", "observability") + } else { + assertServiceMonitorInstalled(t, "observability", "istiod") + assertPodMonitorInstalled(t, "observability", "istio-gateway-api-gateway") + assertPodMonitorInstalled(t, "observability", "istio-ingress-gateway") + } assertPrometheusRemoteWriteReceiverEnabled(t, "observability", "prometheus-kube-prometheus-prometheus") assertOptionalTracingRoute(t) }) @@ -115,6 +158,9 @@ func TestDevspaceInstallDiagnostics(t *testing.T) { }) t.Run("optional observability addons", func(t *testing.T) { + if target == "gke" { + t.Skip("GKE target does not deploy observability addons initially") + } if !helmReleaseInstalled(t, "observability", "loki") && !helmReleaseInstalled(t, "observability", "tempo") && !helmReleaseInstalled(t, "observability", "alloy") { @@ -130,8 +176,41 @@ func TestDevspaceInstallDiagnostics(t *testing.T) { }) } +func clusterProvider() string { + return getenvDefaultForTests("CLUSTER_PROVIDER", "local") +} + +func gkeProtection() string { + return getenvDefaultForTests("GKE_PROTECTION", "iap") +} + +func checkedNamespacesForTarget(target string) []string { + if target == "gke" { + return []string{ + "cert-manager", + "external-dns", + "gke-gateway", + "httpbin", + "observability", + "reflector", + } + } + return checkedNamespaces +} + +func getenvDefaultForTests(name, fallback string) string { + value := os.Getenv(name) + if value == "" { + return fallback + } + return value +} + func requiredTools() []string { tools := []string{"kubectl", "helm"} + if clusterProvider() == "gke" { + tools = append(tools, "gcloud") + } tools = append(tools, hostRequiredTools()...) return tools } diff --git a/tests/install/kubernetes.go b/tests/install/kubernetes.go index fcfefb5..d676650 100644 --- a/tests/install/kubernetes.go +++ b/tests/install/kubernetes.go @@ -150,6 +150,15 @@ type condition struct { Message string `json:"message"` } +type gateway struct { + Status struct { + Addresses []struct { + Value string `json:"value"` + } `json:"addresses"` + Conditions []condition `json:"conditions"` + } `json:"status"` +} + type certificateList struct { Items []certResource `json:"items"` } @@ -435,6 +444,19 @@ func assertKubernetesObjectAbsent(t *testing.T, kind, name, namespace string) { } } +func assertGCPBackendPolicyInstalled(t *testing.T, namespace, name string) { + t.Helper() + + output, err := runCommandE(defaultCommandTimeout, "kubectl", "get", "gcpbackendpolicy", name, "-n", namespace, "-o", "name") + if err != nil { + t.Fatalf("GCPBackendPolicy %s/%s is not installed: %v", namespace, name, err) + } + got := strings.TrimSpace(output) + if got != "gcpbackendpolicy.networking.gke.io/"+name && got != "gcpbackendpolicy/"+name { + t.Fatalf("unexpected GCPBackendPolicy name for %s/%s: %s", namespace, name, got) + } +} + func assertServiceMonitorInstalled(t *testing.T, namespace, name string) { t.Helper() @@ -529,6 +551,28 @@ func requireServiceLoadBalancerIP(t *testing.T, namespace, name string) string { return address } +func requireGatewayAddress(t *testing.T, namespace, name string) string { + t.Helper() + + gw := kubectlJSON[gateway](t, "get", "gateway", name, "-n", namespace) + if !conditionTrue(gw.Status.Conditions, "Accepted") { + t.Fatalf("%s is not Accepted", describeObject(namespace, "gateway", name)) + } + if len(gw.Status.Addresses) == 0 || gw.Status.Addresses[0].Value == "" { + t.Fatalf("%s has no status address", describeObject(namespace, "gateway", name)) + } + return gw.Status.Addresses[0].Value +} + +func conditionTrue(conditions []condition, conditionType string) bool { + for _, condition := range conditions { + if condition.Type == conditionType && condition.Status == "True" { + return true + } + } + return false +} + func assertCertManagerResourcesReady(t *testing.T) { t.Helper()