diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/e2e-interception-proxy.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/e2e-interception-proxy.yaml new file mode 100644 index 000000000..1d6627b93 --- /dev/null +++ b/tests/e2e-prow/rhoai/manifests/lightspeed/e2e-interception-proxy.yaml @@ -0,0 +1,77 @@ +# In-cluster TLS-intercepting proxy for proxy.feature (Konflux / Prow). +# Llama Stack run.yaml points at http://e2e-interception-proxy..svc.cluster.local:8889 +apiVersion: v1 +kind: Pod +metadata: + name: e2e-interception-proxy + labels: + app: e2e-interception-proxy +spec: + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + containers: + - name: e2e-interception-proxy + image: python:3.12-slim + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + workingDir: /app + env: + - name: HOME + value: /tmp + - name: PYTHONPATH + value: /app:/tmp/pydeps + command: + - /bin/sh + - -c + - | + set -e + pip install --quiet --no-cache-dir --target /tmp/pydeps 'trustme>=1.2.1' + exec python /app/interception_proxy.py + ports: + - containerPort: 8889 + name: proxy + - containerPort: 8886 + name: stats + volumeMounts: + - name: proxy-scripts + mountPath: /app + readOnly: true + readinessProbe: + httpGet: + path: /stats + port: stats + initialDelaySeconds: 5 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /stats + port: stats + initialDelaySeconds: 10 + periodSeconds: 15 + volumes: + - name: proxy-scripts + configMap: + name: e2e-interception-proxy-script +--- +apiVersion: v1 +kind: Service +metadata: + name: e2e-interception-proxy +spec: + selector: + app: e2e-interception-proxy + ports: + - name: proxy + port: 8889 + targetPort: proxy + - name: stats + port: 8886 + targetPort: stats diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/e2e-mock-tls-inference.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/e2e-mock-tls-inference.yaml new file mode 100644 index 000000000..6797de24a --- /dev/null +++ b/tests/e2e-prow/rhoai/manifests/lightspeed/e2e-mock-tls-inference.yaml @@ -0,0 +1,104 @@ +# Mock HTTPS OpenAI API for tls.feature (Konflux / Prow; no Docker Compose). +# Llama Stack run.yaml uses https://e2e-mock-tls-inference..svc.cluster.local:8443|8444|8445/v1 +apiVersion: v1 +kind: Pod +metadata: + name: e2e-mock-tls-inference + labels: + app: e2e-mock-tls-inference +spec: + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + containers: + - name: e2e-mock-tls-inference + image: python:3.12-slim + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: PYTHONPATH + value: /app:/tmp/pydeps + command: + - /bin/sh + - -c + - | + set -e + pip install --quiet --no-cache-dir --target /tmp/pydeps 'trustme>=1.2.1' 'cryptography>=42.0.0' + NS="${POD_NAMESPACE:-default}" + export TLS_CERT_DNS_NAMES="mock-tls-inference,localhost,127.0.0.1,e2e-mock-tls-inference,e2e-mock-tls-inference.${NS}.svc.cluster.local" + exec python /app/server.py + ports: + - containerPort: 8443 + name: tls + - containerPort: 8444 + name: mtls + - containerPort: 8445 + name: mismatch + volumeMounts: + - name: server-script + mountPath: /app/server.py + subPath: server.py + readOnly: true + - name: certs-work + mountPath: /certs + readinessProbe: + exec: + command: + - python3 + - -c + - | + import ssl, urllib.request + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + urllib.request.urlopen("https://localhost:8443/health", context=ctx) + initialDelaySeconds: 8 + periodSeconds: 5 + livenessProbe: + exec: + command: + - python3 + - -c + - | + import ssl, urllib.request + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + urllib.request.urlopen("https://localhost:8443/health", context=ctx) + initialDelaySeconds: 15 + periodSeconds: 20 + volumes: + - name: server-script + configMap: + name: e2e-mock-tls-inference-script + - name: certs-work + emptyDir: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: e2e-mock-tls-inference +spec: + selector: + app: e2e-mock-tls-inference + ports: + - name: tls + port: 8443 + targetPort: tls + - name: mtls + port: 8444 + targetPort: mtls + - name: mismatch + port: 8445 + targetPort: mismatch diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/e2e-tunnel-proxy.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/e2e-tunnel-proxy.yaml new file mode 100644 index 000000000..e436fd18c --- /dev/null +++ b/tests/e2e-prow/rhoai/manifests/lightspeed/e2e-tunnel-proxy.yaml @@ -0,0 +1,69 @@ +# In-cluster HTTP CONNECT tunnel proxy for proxy.feature (Konflux / Prow). +# Llama Stack run.yaml points at http://e2e-tunnel-proxy..svc.cluster.local:8888 +apiVersion: v1 +kind: Pod +metadata: + name: e2e-tunnel-proxy + labels: + app: e2e-tunnel-proxy +spec: + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + containers: + - name: e2e-tunnel-proxy + image: python:3.12-slim + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + workingDir: /app + env: + - name: PYTHONPATH + value: /app + command: ["python", "/app/tunnel_proxy.py"] + ports: + - containerPort: 8888 + name: proxy + - containerPort: 8887 + name: stats + volumeMounts: + - name: proxy-scripts + mountPath: /app + readOnly: true + readinessProbe: + httpGet: + path: /stats + port: stats + initialDelaySeconds: 2 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /stats + port: stats + initialDelaySeconds: 5 + periodSeconds: 15 + volumes: + - name: proxy-scripts + configMap: + name: e2e-tunnel-proxy-script +--- +apiVersion: v1 +kind: Service +metadata: + name: e2e-tunnel-proxy +spec: + selector: + app: e2e-tunnel-proxy + ports: + - name: proxy + port: 8888 + targetPort: proxy + - name: stats + port: 8887 + targetPort: stats diff --git a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-openai.yaml b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-openai.yaml index 3efea3fc1..b182a2463 100644 --- a/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-openai.yaml +++ b/tests/e2e-prow/rhoai/manifests/lightspeed/llama-stack-openai.yaml @@ -201,6 +201,15 @@ spec: - name: rag-data mountPath: /opt/app-root/rag-data-cm readOnly: true + # proxy.feature (interception): PEM from Secret e2e-interception-proxy-ca (optional). + - name: interception-proxy-ca + mountPath: /tmp/interception-proxy-ca.pem + subPath: ca.pem + readOnly: true + # tls.feature: client/CA PEMs from Secret e2e-mock-tls-certs (optional). + - name: mock-tls-certs + mountPath: /certs + readOnly: true volumes: - name: app-root emptyDir: {} @@ -213,3 +222,11 @@ spec: - name: rag-data configMap: name: rag-data + - name: interception-proxy-ca + secret: + secretName: e2e-interception-proxy-ca + optional: true + - name: mock-tls-certs + secret: + secretName: e2e-mock-tls-certs + optional: true diff --git a/tests/e2e-prow/rhoai/pipeline-konflux.sh b/tests/e2e-prow/rhoai/pipeline-konflux.sh index 931c1b4a7..dbd88fc43 100755 --- a/tests/e2e-prow/rhoai/pipeline-konflux.sh +++ b/tests/e2e-prow/rhoai/pipeline-konflux.sh @@ -135,6 +135,9 @@ oc wait pod/mock-jwks pod/mock-mcp \ } log "✅ Mock servers deployed" +# e2e-tunnel-proxy and e2e-interception-proxy are deployed from proxy.feature steps +# (see tests/e2e/features/steps/proxy.py + e2e-ops deploy-e2e-*-proxy). + #======================================== # 5. DEPLOY LIGHTSPEED STACK AND LLAMA STACK #======================================== diff --git a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh index 684a833c1..332a429c2 100755 --- a/tests/e2e-prow/rhoai/scripts/e2e-ops.sh +++ b/tests/e2e-prow/rhoai/scripts/e2e-ops.sh @@ -23,6 +23,10 @@ # update-configmap - Update ConfigMap from file # get-configmap-content - Get ConfigMap content (outputs to stdout) # disrupt-llama-stack - Delete llama-stack pod to disrupt connection +# deploy-e2e-tunnel-proxy - Deploy in-cluster tunnel proxy (proxy.feature step) +# deploy-e2e-interception-proxy - Deploy in-cluster interception proxy (proxy.feature step) +# deploy-e2e-mock-tls-inference - Deploy mock HTTPS inference server (tls.feature step) +# sync-mock-tls-certs-secret - Publish /certs PEMs to Secret for llama-stack mount set -e @@ -329,7 +333,55 @@ cmd_restart_lightspeed() { echo "✓ Lightspeed restart complete" } +cmd_reload_llama_stack_config() { + local llama_pod_name="llama-stack-service" + local tmp + + echo "===== Reloading llama-stack run.yaml (container restart, no pod recreate) =====" + tmp=$(mktemp) + if ! oc get configmap llama-stack-config -n "$NAMESPACE" \ + -o jsonpath='{.data.run\.yaml}' >"$tmp"; then + rm -f "$tmp" + echo "ERROR: failed to read llama-stack-config run.yaml" >&2 + return 1 + fi + if [[ ! -s "$tmp" ]]; then + rm -f "$tmp" + echo "ERROR: llama-stack-config run.yaml is empty" >&2 + return 1 + fi + if ! oc cp "$tmp" "$NAMESPACE/$llama_pod_name:/opt/app-root/run.yaml" \ + -c llama-stack-container; then + rm -f "$tmp" + echo "ERROR: failed to copy run.yaml into llama-stack pod" >&2 + return 1 + fi + rm -f "$tmp" + echo "Restarting llama-stack-container to pick up run.yaml..." + oc exec -n "$NAMESPACE" "$llama_pod_name" -c llama-stack-container -- bash -c 'kill 1' \ + 2>/dev/null || true + wait_for_pod "$llama_pod_name" 45 + if ! wait_for_llama_stack_http_health 35; then + echo "===== Llama-stack reload FAILED (HTTP not healthy) =====" + return 1 + fi + if ! cmd_restart_llama_port_forward; then + echo "ERROR: Llama pod is up but localhost:${LOCAL_LLAMA_PORT:-8321} port-forward failed" + return 1 + fi + echo "===== Llama-stack config reload complete =====" +} + cmd_restart_llama_stack() { + if [[ "${E2E_KONFLUX_E2E:-0}" == "1" && "${E2E_LLAMA_RELOAD_CONFIG_ONLY:-0}" == "1" ]]; then + if oc get pod llama-stack-service -n "$NAMESPACE" &>/dev/null; then + if cmd_reload_llama_stack_config; then + return 0 + fi + echo "WARN: llama config reload failed; falling back to full pod restart" >&2 + fi + fi + echo "===== Restoring llama-stack service =====" # Pod.spec is largely immutable; delete so apply creates a pod with current volumes/env. echo "Deleting llama-stack pod (if any) before apply..." @@ -340,16 +392,48 @@ cmd_restart_llama_stack() { echo "Applying pod manifest..." if [[ "${E2E_KONFLUX_E2E:-0}" == "1" ]]; then + # Interception-proxy e2e: refresh Secret before pod recreate so the volume mount is populated. + if [[ "${E2E_COPY_INTERCEPTION_CA_TO_LLAMA:-0}" == "1" ]]; then + echo "[e2e-ops] Syncing e2e-interception-proxy-ca secret before llama-stack apply..." + if ! cmd_sync_interception_proxy_ca_secret; then + echo "===== Llama-stack restore FAILED (interception CA secret sync) =====" + exit 1 + fi + fi + if [[ "${E2E_COPY_MOCK_TLS_CERTS_TO_LLAMA:-0}" == "1" \ + && "${E2E_LLAMA_RELOAD_CONFIG_ONLY:-0}" != "1" ]]; then + echo "[e2e-ops] Syncing e2e-mock-tls-certs secret before llama-stack apply..." + if ! cmd_sync_mock_tls_certs_secret; then + echo "===== Llama-stack restore FAILED (mock TLS certs secret sync) =====" + exit 1 + fi + fi _LLAMA_SVC_FQDN="llama-stack-service-svc.${NAMESPACE}.svc.cluster.local" oc create secret generic llama-stack-ip-secret \ --from-literal=key="$_LLAMA_SVC_FQDN" \ -n "$NAMESPACE" \ --dry-run=client -o yaml | oc apply -f - oc apply -n "$NAMESPACE" -f "$MANIFEST_DIR/llama-stack-openai.yaml" - wait_for_pod "llama-stack-service" 60 + wait_for_pod "llama-stack-service" 90 echo "Labeling pod for service..." oc label pod llama-stack-service pod=llama-stack-service -n "$NAMESPACE" --overwrite - if ! wait_for_llama_stack_http_health 35; then + if [[ "${E2E_COPY_INTERCEPTION_CA_TO_LLAMA:-0}" == "1" ]]; then + if ! _verify_interception_ca_mounted_in_llama; then + echo "===== Llama-stack restore FAILED (interception CA not mounted) =====" + exit 1 + fi + fi + if [[ "${E2E_COPY_MOCK_TLS_CERTS_TO_LLAMA:-0}" == "1" ]]; then + if ! _verify_mock_tls_certs_mounted_in_llama; then + echo "===== Llama-stack restore FAILED (mock TLS certs not mounted) =====" + exit 1 + fi + fi + local llama_health_attempts=50 + if [[ "${E2E_COPY_MOCK_TLS_CERTS_TO_LLAMA:-0}" == "1" ]]; then + llama_health_attempts=75 + fi + if ! wait_for_llama_stack_http_health "$llama_health_attempts"; then echo "===== Llama-stack restore FAILED (HTTP not healthy) =====" exit 1 fi @@ -612,6 +696,208 @@ cmd_get_configmap_content() { -o "go-template={{index .data \"$configmap_key\"}}" } +cmd_tunnel_proxy_stats() { + local pod_name + pod_name=$(oc get pod -n "$NAMESPACE" -l app=e2e-tunnel-proxy \ + -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) || pod_name="" + + if [[ -z "$pod_name" ]]; then + echo "ERROR: no e2e-tunnel-proxy pod in namespace $NAMESPACE" >&2 + return 1 + fi + + oc exec -n "$NAMESPACE" "$pod_name" -- \ + python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:8887/stats', timeout=5).read().decode())" +} + +cmd_interception_proxy_stats() { + local pod_name + pod_name=$(oc get pod -n "$NAMESPACE" -l app=e2e-interception-proxy \ + -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) || pod_name="" + + if [[ -z "$pod_name" ]]; then + echo "ERROR: no e2e-interception-proxy pod in namespace $NAMESPACE" >&2 + return 1 + fi + + oc exec -n "$NAMESPACE" "$pod_name" -- \ + python3 -c "import urllib.request; print(urllib.request.urlopen('http://127.0.0.1:8886/stats', timeout=5).read().decode())" +} + +cmd_sync_interception_proxy_ca_secret() { + local proxy_pod_name tmp + proxy_pod_name=$(oc get pod -n "$NAMESPACE" -l app=e2e-interception-proxy \ + -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) || proxy_pod_name="" + + if [[ -z "$proxy_pod_name" ]]; then + echo "ERROR: no e2e-interception-proxy pod in namespace $NAMESPACE" >&2 + return 1 + fi + + tmp=$(mktemp) + if ! oc exec -n "$NAMESPACE" "$proxy_pod_name" -- \ + cat /tmp/interception-proxy-ca.pem >"$tmp"; then + rm -f "$tmp" + echo "ERROR: failed to read CA from e2e-interception-proxy pod" >&2 + return 1 + fi + if [[ ! -s "$tmp" ]]; then + rm -f "$tmp" + echo "ERROR: interception-proxy CA PEM is empty" >&2 + return 1 + fi + + if ! oc create secret generic e2e-interception-proxy-ca \ + --from-file=ca.pem="$tmp" \ + -n "$NAMESPACE" \ + --dry-run=client -o yaml | oc apply -n "$NAMESPACE" -f -; then + rm -f "$tmp" + echo "ERROR: failed to apply e2e-interception-proxy-ca secret" >&2 + return 1 + fi + rm -f "$tmp" + echo "✓ Secret e2e-interception-proxy-ca updated (ca.pem)" +} + +_verify_interception_ca_mounted_in_llama() { + local llama_pod_name="llama-stack-service" + if oc exec -n "$NAMESPACE" "$llama_pod_name" -c llama-stack-container -- \ + test -s /tmp/interception-proxy-ca.pem; then + echo "✓ interception-proxy CA present at /tmp/interception-proxy-ca.pem in llama-stack" + return 0 + fi + echo "ERROR: /tmp/interception-proxy-ca.pem missing or empty in llama-stack pod" >&2 + oc exec -n "$NAMESPACE" "$llama_pod_name" -c llama-stack-container -- \ + ls -la /tmp/interception-proxy-ca.pem 2>&1 || true + return 1 +} + +cmd_copy_interception_proxy_ca_to_llama() { + # Legacy name: publish CA via Secret (mounted by llama-stack-openai.yaml). + cmd_sync_interception_proxy_ca_secret +} + +_MOCK_TLS_CERT_FILES=( + ca.crt + client.crt + client.key + untrusted-ca.crt + expired-ca.crt + untrusted-client.crt + untrusted-client.key + expired-client.crt +) + +cmd_sync_mock_tls_certs_secret() { + local mock_pod_name tmpdir f + mock_pod_name=$(oc get pod -n "$NAMESPACE" -l app=e2e-mock-tls-inference \ + -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) || mock_pod_name="" + + if [[ -z "$mock_pod_name" ]]; then + echo "ERROR: no e2e-mock-tls-inference pod in namespace $NAMESPACE" >&2 + echo " Run: e2e-ops.sh deploy-e2e-mock-tls-inference" >&2 + return 1 + fi + + tmpdir=$(mktemp -d) + for f in "${_MOCK_TLS_CERT_FILES[@]}"; do + if ! oc exec -n "$NAMESPACE" "$mock_pod_name" -c e2e-mock-tls-inference -- \ + cat "/certs/$f" >"$tmpdir/$f"; then + echo "ERROR: failed to read /certs/$f from e2e-mock-tls-inference pod" >&2 + rm -rf "$tmpdir" + return 1 + fi + if [[ ! -s "$tmpdir/$f" ]]; then + echo "ERROR: /certs/$f is empty in e2e-mock-tls-inference pod" >&2 + rm -rf "$tmpdir" + return 1 + fi + done + + if ! oc create secret generic e2e-mock-tls-certs \ + --from-file="$tmpdir" \ + -n "$NAMESPACE" \ + --dry-run=client -o yaml | oc apply -f -; then + echo "ERROR: failed to apply e2e-mock-tls-certs secret" >&2 + rm -rf "$tmpdir" + return 1 + fi + rm -rf "$tmpdir" + echo "✓ Secret e2e-mock-tls-certs updated (${#_MOCK_TLS_CERT_FILES[@]} files)" +} + +_verify_mock_tls_certs_mounted_in_llama() { + local llama_pod_name="llama-stack-service" + if oc exec -n "$NAMESPACE" "$llama_pod_name" -c llama-stack-container -- \ + sh -c 'test -s /certs/ca.crt && test -s /certs/client.crt && test -s /certs/client.key'; then + echo "✓ mock TLS certs present under /certs in llama-stack" + return 0 + fi + echo "ERROR: /certs missing or incomplete in llama-stack pod" >&2 + oc get secret e2e-mock-tls-certs -n "$NAMESPACE" 2>&1 || true + oc exec -n "$NAMESPACE" "$llama_pod_name" -c llama-stack-container -- \ + ls -la /certs 2>&1 || true + return 1 +} + +_e2e_repo_root() { + cd "$SCRIPT_DIR/../../../.." && pwd +} + +cmd_deploy_e2e_tunnel_proxy() { + local repo_root + repo_root="$(_e2e_repo_root)" + echo "Deploying e2e-tunnel-proxy in namespace $NAMESPACE..." + oc create configmap e2e-tunnel-proxy-script -n "$NAMESPACE" \ + --from-file=tunnel_proxy.py="$repo_root/tests/e2e/proxy/tunnel_proxy.py" \ + --dry-run=client -o yaml | oc apply -f - + oc apply -n "$NAMESPACE" -f "$MANIFEST_DIR/e2e-tunnel-proxy.yaml" + if ! oc wait pod/e2e-tunnel-proxy -n "$NAMESPACE" --for=condition=Ready --timeout=120s; then + echo "ERROR: e2e-tunnel-proxy failed to become ready" >&2 + oc describe pod e2e-tunnel-proxy -n "$NAMESPACE" 2>/dev/null | tail -25 || true + return 1 + fi + echo "✓ e2e-tunnel-proxy ready at http://e2e-tunnel-proxy.${NAMESPACE}.svc.cluster.local:8888" +} + +cmd_deploy_e2e_interception_proxy() { + local repo_root + repo_root="$(_e2e_repo_root)" + echo "Deploying e2e-interception-proxy in namespace $NAMESPACE..." + oc create configmap e2e-interception-proxy-script -n "$NAMESPACE" \ + --from-file=interception_proxy.py="$repo_root/tests/e2e/proxy/interception_proxy.py" \ + --dry-run=client -o yaml | oc apply -f - + oc apply -n "$NAMESPACE" -f "$MANIFEST_DIR/e2e-interception-proxy.yaml" + if ! oc wait pod/e2e-interception-proxy -n "$NAMESPACE" --for=condition=Ready --timeout=180s; then + echo "ERROR: e2e-interception-proxy failed to become ready" >&2 + oc describe pod e2e-interception-proxy -n "$NAMESPACE" 2>/dev/null | tail -25 || true + return 1 + fi + echo "✓ e2e-interception-proxy ready at http://e2e-interception-proxy.${NAMESPACE}.svc.cluster.local:8889" +} + +cmd_deploy_e2e_mock_tls_inference() { + local repo_root + repo_root="$(_e2e_repo_root)" + echo "Deploying e2e-mock-tls-inference in namespace $NAMESPACE..." + oc create configmap e2e-mock-tls-inference-script -n "$NAMESPACE" \ + --from-file=server.py="$repo_root/tests/e2e/mock_tls_inference_server/server.py" \ + --dry-run=client -o yaml | oc apply -f - + oc delete pod e2e-mock-tls-inference -n "$NAMESPACE" --ignore-not-found=true --wait=true 2>/dev/null || true + oc apply -n "$NAMESPACE" -f "$MANIFEST_DIR/e2e-mock-tls-inference.yaml" + if ! oc wait pod/e2e-mock-tls-inference -n "$NAMESPACE" --for=condition=Ready --timeout=240s; then + echo "ERROR: e2e-mock-tls-inference failed to become ready" >&2 + oc describe pod/e2e-mock-tls-inference -n "$NAMESPACE" 2>/dev/null | tail -30 || true + oc logs e2e-mock-tls-inference -n "$NAMESPACE" --tail=40 2>&1 || true + return 1 + fi + echo "✓ e2e-mock-tls-inference ready at https://e2e-mock-tls-inference.${NAMESPACE}.svc.cluster.local:8443" + if ! cmd_sync_mock_tls_certs_secret; then + echo "WARNING: mock TLS server is up but e2e-mock-tls-certs secret sync failed" >&2 + return 1 + fi +} + cmd_disrupt_llama_stack() { local pod_name="llama-stack-service" @@ -664,6 +950,30 @@ case "$COMMAND" in disrupt-llama-stack) cmd_disrupt_llama_stack ;; + tunnel-proxy-stats) + cmd_tunnel_proxy_stats + ;; + interception-proxy-stats) + cmd_interception_proxy_stats + ;; + copy-interception-proxy-ca-to-llama) + cmd_copy_interception_proxy_ca_to_llama + ;; + sync-interception-proxy-ca-secret) + cmd_sync_interception_proxy_ca_secret + ;; + deploy-e2e-tunnel-proxy) + cmd_deploy_e2e_tunnel_proxy + ;; + deploy-e2e-interception-proxy) + cmd_deploy_e2e_interception_proxy + ;; + deploy-e2e-mock-tls-inference) + cmd_deploy_e2e_mock_tls_inference + ;; + sync-mock-tls-certs-secret) + cmd_sync_mock_tls_certs_secret + ;; *) echo "Usage: $0 [args...]" echo "" @@ -676,6 +986,14 @@ case "$COMMAND" in echo " update-configmap - Update ConfigMap from file" echo " get-configmap-content - Get ConfigMap content (outputs to stdout)" echo " disrupt-llama-stack - Delete llama-stack pod to disrupt connection" + echo " tunnel-proxy-stats - JSON stats from in-cluster e2e-tunnel-proxy" + echo " interception-proxy-stats - JSON stats from in-cluster e2e-interception-proxy" + echo " copy-interception-proxy-ca-to-llama - Alias for sync-interception-proxy-ca-secret" + echo " sync-interception-proxy-ca-secret - Publish trustme CA to Secret for llama mount" + echo " deploy-e2e-tunnel-proxy - Deploy in-cluster tunnel proxy pod" + echo " deploy-e2e-interception-proxy - Deploy in-cluster interception proxy pod" + echo " deploy-e2e-mock-tls-inference - Deploy mock HTTPS inference server (tls.feature)" + echo " sync-mock-tls-certs-secret - Publish mock TLS /certs PEMs to Secret for llama" exit 1 ;; esac diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-tls.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-tls.yaml index babdc2b99..fd45ea744 100644 --- a/tests/e2e/configuration/server-mode/lightspeed-stack-tls.yaml +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-tls.yaml @@ -8,7 +8,7 @@ service: access_log: true llama_stack: use_as_library_client: false - url: http://llama-stack:8321 + url: http://${env.E2E_LLAMA_HOSTNAME}:8321 api_key: xyzzy user_data_collection: feedback_enabled: true diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py index fdca1247c..e97f993a5 100644 --- a/tests/e2e/features/environment.py +++ b/tests/e2e/features/environment.py @@ -26,6 +26,7 @@ reset_llama_stack_disrupt_once_tracking, reset_llama_stack_was_running, ) +from tests.e2e.features.steps.tls import reset_tls_prow_restart_optimization_state from tests.e2e.utils.llama_stack_utils import register_shield from tests.e2e.utils.prow_utils import ( restart_pod, @@ -451,6 +452,8 @@ def before_feature(context: Context, feature: Feature) -> None: context.active_lightspeed_stack_config_basename = None # One real Llama disruption per feature (module-level flag; survives context resets) reset_llama_stack_disrupt_once_tracking() + if feature.filename and "tls.feature" in feature.filename: + reset_tls_prow_restart_optimization_state() try: max_flaky = int(os.getenv("E2E_FLAKY_MAX_ATTEMPTS", _E2E_FLAKY_MAX_ATTEMPTS)) diff --git a/tests/e2e/features/proxy.feature b/tests/e2e/features/proxy.feature index 1b0d4c6ac..00fde258a 100644 --- a/tests/e2e/features/proxy.feature +++ b/tests/e2e/features/proxy.feature @@ -1,4 +1,4 @@ -@e2e_group_3 @skip-in-library-mode +@e2e_group_3 @skip-in-library-mode @skip-in-prow Feature: Proxy and TLS networking tests for Llama Stack providers Verify that the Lightspeed Stack works correctly when Llama Stack's @@ -21,7 +21,7 @@ Feature: Proxy and TLS networking tests for Llama Stack providers # --- AC1: Tunnel proxy routing --- - @TunnelProxy @skip-in-prow + @TunnelProxy Scenario: LLM traffic is routed through a configured tunnel proxy Given A tunnel proxy is running on port 8888 And Llama Stack is configured to route inference through the tunnel proxy @@ -53,7 +53,7 @@ Feature: Proxy and TLS networking tests for Llama Stack providers # --- AC2: Interception proxy with CA certificate --- - @InterceptionProxy @skip-in-prow + @InterceptionProxy @flaky Scenario: LLM traffic works through interception proxy with correct CA Given An interception proxy with trustme CA is running on port 8889 And Llama Stack is configured to route inference through the interception proxy with CA cert @@ -66,7 +66,7 @@ Feature: Proxy and TLS networking tests for Llama Stack providers Then The status code of the response is 200 And The interception proxy intercepted at least 1 connection - @InterceptionProxy @skip-in-prow + @InterceptionProxy Scenario: LLM query fails when interception proxy CA is not provided Given An interception proxy with trustme CA is running on port 8890 And Llama Stack is configured to route inference through the interception proxy without CA cert @@ -76,12 +76,13 @@ Feature: Proxy and TLS networking tests for Llama Stack providers """ {"query": "What is 2+2?", "model": "{MODEL}", "provider": "{PROVIDER}", "shield_ids": []} """ - Then The status code of the response is 500 + #will be fixed in https://redhat.atlassian.net/browse/LCORE-2255 + Then The status code of the response is one of 404 or 500 # --- AC3: TLS version and cipher configuration --- - @TLSVersion + @TLSVersion @flaky Scenario: TLS minimum version TLSv1.2 is respected Given Llama Stack is configured with minimum TLS version "TLSv1.2" And Llama Stack is restarted @@ -92,7 +93,7 @@ Feature: Proxy and TLS networking tests for Llama Stack providers """ Then The status code of the response is 200 - @TLSVersion + @TLSVersion @flaky Scenario: TLS minimum version TLSv1.3 is respected Given Llama Stack is configured with minimum TLS version "TLSv1.3" And Llama Stack is restarted @@ -103,7 +104,7 @@ Feature: Proxy and TLS networking tests for Llama Stack providers """ Then The status code of the response is 200 - @TLSCipher + @TLSCipher @flaky Scenario: Custom cipher suite configuration is respected Given Llama Stack is configured with ciphers "ECDHE+AESGCM:DHE+AESGCM" And Llama Stack is restarted diff --git a/tests/e2e/features/steps/proxy.py b/tests/e2e/features/steps/proxy.py index 6b2910d17..597204d92 100644 --- a/tests/e2e/features/steps/proxy.py +++ b/tests/e2e/features/steps/proxy.py @@ -13,6 +13,8 @@ """ import asyncio +import json +import os import subprocess import tempfile import threading @@ -30,6 +32,7 @@ restore_llama_config_if_modified, write_llama_config, ) +from tests.e2e.utils.prow_utils import get_namespace, run_e2e_ops from tests.e2e.utils.utils import ( is_prow_environment, restart_container, @@ -93,13 +96,110 @@ def _host_special_dns_from_container(hostname: str) -> Optional[str]: return ip or None +def _cluster_tunnel_proxy_host() -> str: + """DNS name of the in-cluster tunnel proxy (Konflux / Prow).""" + explicit = os.getenv("E2E_PROXY_HOST", "").strip() + if explicit: + return explicit + return f"e2e-tunnel-proxy.{get_namespace()}.svc.cluster.local" + + +def _fetch_cluster_tunnel_proxy_stats() -> dict[str, Any]: + """Read CONNECT counters from the e2e-tunnel-proxy stats HTTP server.""" + result = run_e2e_ops("tunnel-proxy-stats", timeout=60) + if result.returncode != 0: + raise AssertionError( + "Failed to read e2e-tunnel-proxy stats: " + f"{result.stderr or result.stdout}" + ) + stats = json.loads(result.stdout.strip()) + assert isinstance(stats, dict), "tunnel-proxy-stats did not return a JSON object" + return stats + + +def _cluster_interception_proxy_host() -> str: + """DNS name of the in-cluster interception proxy (Konflux / Prow).""" + explicit = os.getenv("E2E_INTERCEPTION_PROXY_HOST", "").strip() + if explicit: + return explicit + return f"e2e-interception-proxy.{get_namespace()}.svc.cluster.local" + + +def _cluster_interception_proxy_port(requested_port: int) -> int: + """Map feature-file port to the in-cluster interception proxy listener.""" + if requested_port in (8889, 8890): + return 8889 + raise AssertionError( + "In-cluster e2e-interception-proxy listens on 8889 only; " + f"scenario requested port {requested_port}" + ) + + +def _deploy_cluster_tunnel_proxy() -> None: + """Deploy the in-cluster tunnel proxy pod (Konflux / Prow).""" + result = run_e2e_ops("deploy-e2e-tunnel-proxy", timeout=180) + print(result.stdout, end="") + if result.returncode != 0: + raise AssertionError( + "Failed to deploy e2e-tunnel-proxy: " f"{result.stderr or result.stdout}" + ) + os.environ.setdefault( + "E2E_PROXY_HOST", + f"e2e-tunnel-proxy.{get_namespace()}.svc.cluster.local", + ) + + +def _deploy_cluster_interception_proxy() -> None: + """Deploy the in-cluster interception proxy pod (Konflux / Prow).""" + result = run_e2e_ops("deploy-e2e-interception-proxy", timeout=200) + print(result.stdout, end="") + if result.returncode != 0: + raise AssertionError( + "Failed to deploy e2e-interception-proxy: " + f"{result.stderr or result.stdout}" + ) + os.environ.setdefault( + "E2E_INTERCEPTION_PROXY_HOST", + f"e2e-interception-proxy.{get_namespace()}.svc.cluster.local", + ) + + +def _fetch_cluster_interception_proxy_stats() -> dict[str, Any]: + """Read interception counters from the e2e-interception-proxy stats HTTP server.""" + result = run_e2e_ops("interception-proxy-stats", timeout=60) + if result.returncode != 0: + raise AssertionError( + "Failed to read e2e-interception-proxy stats: " + f"{result.stderr or result.stdout}" + ) + stats = json.loads(result.stdout.strip()) + assert isinstance(stats, dict), "interception-proxy-stats did not return JSON" + return stats + + +_INTERCEPTION_CA_LLAMA_PATH = "/tmp/interception-proxy-ca.pem" + + +def _sync_interception_proxy_ca_secret() -> None: + """Publish trustme CA to Secret ``e2e-interception-proxy-ca`` (mounted by llama pod).""" + result = run_e2e_ops("sync-interception-proxy-ca-secret", timeout=90) + print(result.stdout, end="") + if result.returncode != 0: + raise AssertionError( + "Failed to sync interception proxy CA secret: " + f"{result.stderr or result.stdout}" + ) + + def _get_proxy_host(is_docker: bool) -> str: - """Get the host address that containers can use to reach the proxy on the host. + """Get the host address that Llama Stack should use to reach the tunnel proxy. Parameters: ---------- is_docker: Whether services are running in Docker (local e2e). """ + if is_prow_environment(): + return _cluster_tunnel_proxy_host() if not is_docker: return "127.0.0.1" for hostname in ("host.docker.internal", "host.containers.internal"): @@ -194,6 +294,11 @@ def restore_if_modified(context: Context) -> None: # Stop any leftover proxy servers from previous scenario _stop_proxy(context, "tunnel_proxy", "proxy_loop") _stop_proxy(context, "interception_proxy", "interception_proxy_loop") + os.environ.pop("E2E_COPY_INTERCEPTION_CA_TO_LLAMA", None) + os.environ.pop("E2E_COPY_MOCK_TLS_CERTS_TO_LLAMA", None) + os.environ.pop("E2E_LLAMA_RELOAD_CONFIG_ONLY", None) + if hasattr(context, "needs_interception_ca_on_llama"): + delattr(context, "needs_interception_ca_on_llama") if restore_llama_config_if_modified(): print("Restoring original Llama Stack config from backup...") @@ -220,7 +325,22 @@ def restart_lightspeed_stack(context: Context) -> None: @given("A tunnel proxy is running on port {port:d}") def start_tunnel_proxy(context: Context, port: int) -> None: - """Start a tunnel proxy in a background thread.""" + """Start a tunnel proxy locally, or verify the in-cluster proxy (Konflux/Prow).""" + if is_prow_environment(): + if port != 8888: + raise AssertionError( + "In-cluster e2e-tunnel-proxy is fixed on port 8888; " + f"scenario requested port {port}" + ) + context.tunnel_proxy = None + context.cluster_tunnel_proxy_port = port + _deploy_cluster_tunnel_proxy() + print( + f"Using in-cluster tunnel proxy at " + f"http://{_cluster_tunnel_proxy_host()}:{port}" + ) + return + from tests.e2e.proxy.tunnel_proxy import TunnelProxy # Bind to 0.0.0.0 so Docker containers can reach the proxy @@ -243,7 +363,11 @@ def run_proxy() -> None: def configure_llama_tunnel_proxy(context: Context) -> None: """Modify run.yaml with proxy config pointing to the tunnel proxy.""" backup_llama_config() - proxy = context.tunnel_proxy + if is_prow_environment(): + proxy_port = getattr(context, "cluster_tunnel_proxy_port", 8888) + else: + proxy = context.tunnel_proxy + proxy_port = proxy.port proxy_host = _get_proxy_host(context.is_docker_mode) config = load_llama_config() provider = _find_inference_provider(context, config) @@ -252,7 +376,7 @@ def configure_llama_tunnel_proxy(context: Context) -> None: provider["config"] = {} provider["config"]["network"] = { "proxy": { - "url": f"http://{proxy_host}:{proxy.port}", + "url": f"http://{proxy_host}:{proxy_port}", } } @@ -283,6 +407,18 @@ def configure_llama_unreachable_proxy(context: Context, proxy_url: str) -> None: @given("An interception proxy with trustme CA is running on port {port:d}") def start_interception_proxy(context: Context, port: int) -> None: """Start an interception proxy with trustme CA.""" + if is_prow_environment(): + cluster_port = _cluster_interception_proxy_port(port) + context.interception_proxy = None + context.cluster_interception_proxy_port = cluster_port + context.ca_cert_path_for_config = _INTERCEPTION_CA_LLAMA_PATH + _deploy_cluster_interception_proxy() + print( + f"Using in-cluster interception proxy at " + f"http://{_cluster_interception_proxy_host()}:{cluster_port}" + ) + return + from tests.e2e.proxy.interception_proxy import InterceptionProxy ca = trustme.CA() @@ -325,8 +461,16 @@ def run_proxy() -> None: def configure_llama_interception_with_ca(context: Context) -> None: """Modify run.yaml with interception proxy and CA cert config.""" backup_llama_config() - proxy = context.interception_proxy - proxy_host = _get_proxy_host(context.is_docker_mode) + context.needs_interception_ca_on_llama = True + if is_prow_environment(): + os.environ["E2E_COPY_INTERCEPTION_CA_TO_LLAMA"] = "1" + if is_prow_environment(): + proxy_port = getattr(context, "cluster_interception_proxy_port", 8889) + proxy_host = _cluster_interception_proxy_host() + else: + proxy = context.interception_proxy + proxy_port = proxy.port + proxy_host = _get_proxy_host(context.is_docker_mode) config = load_llama_config() provider = _find_inference_provider(context, config) @@ -334,7 +478,7 @@ def configure_llama_interception_with_ca(context: Context) -> None: provider["config"] = {} provider["config"]["network"] = { "proxy": { - "url": f"http://{proxy_host}:{proxy.port}", + "url": f"http://{proxy_host}:{proxy_port}", "cacert": context.ca_cert_path_for_config, }, "tls": { @@ -343,6 +487,8 @@ def configure_llama_interception_with_ca(context: Context) -> None: } write_llama_config(config) + if is_prow_environment(): + _sync_interception_proxy_ca_secret() @given( @@ -352,8 +498,15 @@ def configure_llama_interception_with_ca(context: Context) -> None: def configure_llama_interception_no_ca(context: Context) -> None: """Modify run.yaml with interception proxy but NO CA cert.""" backup_llama_config() - proxy = context.interception_proxy - proxy_host = _get_proxy_host(context.is_docker_mode) + context.needs_interception_ca_on_llama = False + os.environ.pop("E2E_COPY_INTERCEPTION_CA_TO_LLAMA", None) + if is_prow_environment(): + proxy_port = getattr(context, "cluster_interception_proxy_port", 8889) + proxy_host = _cluster_interception_proxy_host() + else: + proxy = context.interception_proxy + proxy_port = proxy.port + proxy_host = _get_proxy_host(context.is_docker_mode) config = load_llama_config() provider = _find_inference_provider(context, config) @@ -361,7 +514,7 @@ def configure_llama_interception_no_ca(context: Context) -> None: provider["config"] = {} provider["config"]["network"] = { "proxy": { - "url": f"http://{proxy_host}:{proxy.port}", + "url": f"http://{proxy_host}:{proxy_port}", }, } @@ -415,6 +568,16 @@ def configure_llama_ciphers(context: Context, ciphers: str) -> None: ) def verify_tunnel_proxy_used(context: Context, count: int) -> None: """Verify the tunnel proxy received CONNECT requests.""" + if is_prow_environment(): + stats = _fetch_cluster_tunnel_proxy_stats() + connect_count = int(stats.get("connect_count", 0)) + last_target = stats.get("last_connect_target") + assert ( + connect_count >= count + ), f"Expected at least {count} CONNECT requests, got {connect_count}" + assert last_target is not None, "No CONNECT target recorded" + return + proxy = context.tunnel_proxy assert proxy.connect_count >= count, ( f"Expected at least {count} CONNECT requests, " f"got {proxy.connect_count}" @@ -425,6 +588,16 @@ def verify_tunnel_proxy_used(context: Context, count: int) -> None: @then("The interception proxy intercepted at least {count:d} connection") def verify_interception_proxy_used(context: Context, count: int) -> None: """Verify the interception proxy intercepted connections.""" + if is_prow_environment(): + stats = _fetch_cluster_interception_proxy_stats() + connect_count = int(stats.get("connect_count", 0)) + assert ( + connect_count >= count + ), f"Expected at least {count} intercepted connections, got {connect_count}" + intercepted = stats.get("intercepted_hosts") or [] + assert intercepted, "No intercepted hosts recorded" + return + proxy = context.interception_proxy assert proxy.connect_count >= count, ( f"Expected at least {count} intercepted connections, " diff --git a/tests/e2e/features/steps/tls.py b/tests/e2e/features/steps/tls.py index 66d56adcc..0bf7b6905 100644 --- a/tests/e2e/features/steps/tls.py +++ b/tests/e2e/features/steps/tls.py @@ -9,6 +9,7 @@ """ import copy +import os from typing import Any, Optional from behave import given # pyright: ignore[reportAttributeAccessIssue] @@ -19,16 +20,12 @@ load_llama_config, write_llama_config, ) +from tests.e2e.utils.prow_utils import get_namespace, run_e2e_ops +from tests.e2e.utils.utils import is_prow_environment -_TLS_PROVIDER_BASE: dict[str, Any] = { - "provider_id": "tls-openai", - "provider_type": "remote::openai", - "config": { - "api_key": "test-key", - "base_url": "https://mock-tls-inference:8443/v1", - "allowed_models": ["mock-tls-model"], - }, -} +_MOCK_TLS_PORT_TLS = 8443 +_MOCK_TLS_PORT_MTLS = 8444 +_MOCK_TLS_PORT_HOSTNAME_MISMATCH = 8445 _TLS_MODEL_RESOURCE: dict[str, str] = { "model_id": "mock-tls-model", @@ -36,6 +33,74 @@ "provider_model_id": "mock-tls-model", } +_mock_tls_cluster_deploy_state: dict[str, bool] = {"done": False} +_tls_llama_warm_in_prow: dict[str, bool] = {"done": False} + + +def reset_tls_prow_restart_optimization_state() -> None: + """Reset per-feature Prow restart optimizations (call from ``before_feature``).""" + _tls_llama_warm_in_prow["done"] = False + os.environ.pop("E2E_LLAMA_RELOAD_CONFIG_ONLY", None) + + +def _prepare_tls_prow_llama_restart_env() -> None: + """Set env vars so e2e-ops can reload run.yaml instead of recreating the pod.""" + os.environ["E2E_COPY_MOCK_TLS_CERTS_TO_LLAMA"] = "1" + if _tls_llama_warm_in_prow["done"]: + os.environ["E2E_LLAMA_RELOAD_CONFIG_ONLY"] = "1" + else: + os.environ.pop("E2E_LLAMA_RELOAD_CONFIG_ONLY", None) + + +def _cluster_mock_tls_inference_host() -> str: + """DNS name of the in-cluster mock TLS inference server (Konflux / Prow).""" + explicit = os.getenv("E2E_MOCK_TLS_INFERENCE_HOST", "").strip() + if explicit: + return explicit + return f"e2e-mock-tls-inference.{get_namespace()}.svc.cluster.local" + + +def _mock_tls_base_url(port: int) -> str: + """OpenAI-compatible base URL for the mock TLS inference server.""" + if is_prow_environment(): + host = _cluster_mock_tls_inference_host() + else: + host = "mock-tls-inference" + return f"https://{host}:{port}/v1" + + +def _tls_provider_base() -> dict[str, Any]: + """Default tls-openai provider dict with environment-appropriate base_url.""" + return { + "provider_id": "tls-openai", + "provider_type": "remote::openai", + "config": { + "api_key": "test-key", + "base_url": _mock_tls_base_url(_MOCK_TLS_PORT_TLS), + "allowed_models": ["mock-tls-model"], + }, + } + + +def _deploy_cluster_mock_tls_inference() -> None: + """Deploy the in-cluster mock TLS inference pod (Konflux / Prow).""" + if _mock_tls_cluster_deploy_state["done"]: + print("Using existing e2e-mock-tls-inference deployment") + return + + result = run_e2e_ops("deploy-e2e-mock-tls-inference", timeout=300) + print(result.stdout, end="") + if result.returncode != 0: + raise AssertionError( + "Failed to deploy e2e-mock-tls-inference: " + f"{result.stderr or result.stdout}" + ) + os.environ.setdefault( + "E2E_MOCK_TLS_INFERENCE_HOST", + _cluster_mock_tls_inference_host(), + ) + _mock_tls_cluster_deploy_state["done"] = True + def _ensure_tls_provider(config: dict[str, Any]) -> dict[str, Any]: """Find or create the tls-openai inference provider in the config. @@ -59,7 +124,7 @@ def _ensure_tls_provider(config: dict[str, Any]) -> dict[str, Any]: return provider # Provider not found — add it - provider = copy.deepcopy(_TLS_PROVIDER_BASE) + provider = copy.deepcopy(_tls_provider_base()) inference.append(provider) # Also register the model resource @@ -85,8 +150,14 @@ def _configure_tls(tls_config: dict[str, Any], base_url: Optional[str] = None) - provider.setdefault("config", {}).setdefault("network", {}) if base_url is not None: provider["config"]["base_url"] = base_url + else: + provider["config"]["base_url"] = _mock_tls_base_url(_MOCK_TLS_PORT_TLS) provider["config"]["network"]["tls"] = tls_config write_llama_config(config) + if is_prow_environment(): + _prepare_tls_prow_llama_restart_env() + if not _tls_llama_warm_in_prow["done"]: + _tls_llama_warm_in_prow["done"] = True # --- Background Steps --- @@ -94,6 +165,15 @@ def _configure_tls(tls_config: dict[str, Any], base_url: Optional[str] = None) - # run.yaml (see proxy.py). Restart steps are listed in tls.feature / proxy.feature. +@given("The mock TLS inference server is deployed") +def deploy_mock_tls_inference_server(context: Context) -> None: + """Ensure mock TLS inference is reachable (Compose locally, pod in Prow).""" + if is_prow_environment(): + _deploy_cluster_mock_tls_inference() + return + print("Using docker-compose mock-tls-inference service") + + # --- TLS Configuration Steps --- @@ -124,7 +204,7 @@ def configure_tls_mtls(context: Context) -> None: "client_cert": "/certs/client.crt", "client_key": "/certs/client.key", }, - base_url="https://mock-tls-inference:8444/v1", + base_url=_mock_tls_base_url(_MOCK_TLS_PORT_MTLS), ) @@ -139,7 +219,7 @@ def configure_mtls_no_client_cert(context: Context) -> None: """Configure run.yaml for mTLS port without client cert (should fail).""" _configure_tls( {"verify": "/certs/ca.crt"}, - base_url="https://mock-tls-inference:8444/v1", + base_url=_mock_tls_base_url(_MOCK_TLS_PORT_MTLS), ) @@ -152,7 +232,7 @@ def configure_mtls_wrong_client_cert(context: Context) -> None: "client_cert": "/certs/ca.crt", "client_key": "/certs/client.key", }, - base_url="https://mock-tls-inference:8444/v1", + base_url=_mock_tls_base_url(_MOCK_TLS_PORT_MTLS), ) @@ -165,7 +245,7 @@ def configure_mtls_untrusted_client_cert(context: Context) -> None: "client_cert": "/certs/untrusted-client.crt", "client_key": "/certs/untrusted-client.key", }, - base_url="https://mock-tls-inference:8444/v1", + base_url=_mock_tls_base_url(_MOCK_TLS_PORT_MTLS), ) @@ -178,7 +258,7 @@ def configure_mtls_expired_client_cert(context: Context) -> None: "client_cert": "/certs/expired-client.crt", "client_key": "/certs/client.key", }, - base_url="https://mock-tls-inference:8444/v1", + base_url=_mock_tls_base_url(_MOCK_TLS_PORT_MTLS), ) @@ -187,7 +267,7 @@ def configure_tls_hostname_mismatch(context: Context) -> None: """Configure run.yaml to connect to hostname-mismatch server (should fail).""" _configure_tls( {"verify": "/certs/ca.crt"}, - base_url="https://mock-tls-inference:8445/v1", + base_url=_mock_tls_base_url(_MOCK_TLS_PORT_HOSTNAME_MISMATCH), ) @@ -200,7 +280,7 @@ def configure_mtls_hostname_mismatch(context: Context) -> None: "client_cert": "/certs/client.crt", "client_key": "/certs/client.key", }, - base_url="https://mock-tls-inference:8445/v1", + base_url=_mock_tls_base_url(_MOCK_TLS_PORT_HOSTNAME_MISMATCH), ) @@ -211,7 +291,7 @@ def configure_tls_min_version_hostname_mismatch(context: Context, version: str) """Configure run.yaml with TLS min version against hostname-mismatch server.""" _configure_tls( {"verify": "/certs/ca.crt", "min_version": version}, - base_url="https://mock-tls-inference:8445/v1", + base_url=_mock_tls_base_url(_MOCK_TLS_PORT_HOSTNAME_MISMATCH), ) diff --git a/tests/e2e/features/tls.feature b/tests/e2e/features/tls.feature index a900b1c0f..15215408e 100644 --- a/tests/e2e/features/tls.feature +++ b/tests/e2e/features/tls.feature @@ -1,8 +1,10 @@ -@e2e_group_1 @skip-in-library-mode @skip-in-prow +@e2e_group_1 @skip-in-library-mode Feature: TLS configuration for remote inference providers Validate that Llama Stack's NetworkConfig.tls settings are applied correctly when connecting to a remote inference provider over HTTPS. + # Only Llama run.yaml changes per scenario; LCS uses lightspeed-stack-tls.yaml throughout. + Background: Given The service is started locally And The system is in default state @@ -10,6 +12,7 @@ Feature: TLS configuration for remote inference providers And the Lightspeed stack configuration directory is "tests/e2e/configuration" And The service uses the lightspeed-stack-tls.yaml configuration And The service is restarted + And The mock TLS inference server is deployed And The original Llama Stack config is restored if modified Scenario: Inference succeeds with TLS verification disabled diff --git a/tests/e2e/mock_tls_inference_server/server.py b/tests/e2e/mock_tls_inference_server/server.py index bfb4cbae5..25bd23a0c 100644 --- a/tests/e2e/mock_tls_inference_server/server.py +++ b/tests/e2e/mock_tls_inference_server/server.py @@ -13,6 +13,7 @@ import datetime import json +import os import ssl import threading import time @@ -29,6 +30,25 @@ MTLS_PORT = 8444 HOSTNAME_MISMATCH_PORT = 8445 +_DEFAULT_SERVER_CERT_DNS_NAMES: tuple[str, ...] = ( + "mock-tls-inference", + "localhost", + "127.0.0.1", +) + + +def _server_cert_dns_names() -> tuple[str, ...]: + """Return DNS identities for the main server certificate. + + Reads comma-separated ``TLS_CERT_DNS_NAMES`` (set in Konflux/Prow manifest). + Falls back to Docker Compose defaults when unset. + """ + raw = os.environ.get("TLS_CERT_DNS_NAMES", "").strip() + if not raw: + return _DEFAULT_SERVER_CERT_DNS_NAMES + names = tuple(name.strip() for name in raw.split(",") if name.strip()) + return names or _DEFAULT_SERVER_CERT_DNS_NAMES + class OpenAIHandler(BaseHTTPRequestHandler): """Handles OpenAI-compatible API requests over HTTPS.""" @@ -221,8 +241,9 @@ def main() -> None: # Generate CA and certificates ca = trustme.CA() - # Server cert with SANs for Docker service name and localhost - server_cert = ca.issue_cert("mock-tls-inference", "localhost", "127.0.0.1") + server_dns_names = _server_cert_dns_names() + print(f" Server cert DNS names: {', '.join(server_dns_names)}") + server_cert = ca.issue_cert(*server_dns_names) # Client cert for mTLS testing (use a simple hostname without spaces) client_cert = ca.issue_cert("tls-e2e-test-client") diff --git a/tests/e2e/proxy/interception_proxy.py b/tests/e2e/proxy/interception_proxy.py index 5977fb245..2b465563d 100644 --- a/tests/e2e/proxy/interception_proxy.py +++ b/tests/e2e/proxy/interception_proxy.py @@ -4,23 +4,25 @@ and re-encrypts toward the destination using trustme-generated certificates. This simulates a corporate interception proxy (SSL inspection). -The proxy generates a unique server certificate for each CONNECT target -using the trustme CA, so the client must trust the CA certificate to -successfully connect. - -Usage:: +Local Behave usage:: import trustme ca = trustme.CA() proxy = InterceptionProxy(ca=ca, port=8889) await proxy.start() - # ... run tests with HTTPS_PROXY=http://localhost:8889 - # and ca_cert_path pointing to the trustme CA cert ... + # ... run tests with proxy URL and ca_cert_path pointing to the trustme CA ... await proxy.stop() assert proxy.intercepted_hosts # verify interception happened + +In-cluster (Konflux/Prow) usage:: + + python interception_proxy.py + # MITM on 8889; GET http://127.0.0.1:8886/stats for counters; + # CA PEM at /tmp/interception-proxy-ca.pem (copy into llama-stack pod). """ import asyncio +import json import logging import ssl from pathlib import Path @@ -30,6 +32,10 @@ logger = logging.getLogger(__name__) +DEFAULT_INTERCEPTION_PROXY_PORT = 8889 +DEFAULT_INTERCEPTION_STATS_PORT = 8886 +IN_CLUSTER_CA_CERT_PATH = Path("/tmp/interception-proxy-ca.pem") + class InterceptionProxy: """Async TLS-intercepting proxy for testing. @@ -237,3 +243,87 @@ def reset_counters(self) -> None: """Reset request counters.""" self.connect_count = 0 self.intercepted_hosts.clear() + + +class _InterceptionStatsHandler: # pylint: disable=too-few-public-methods + """Expose interception proxy counters over HTTP for in-cluster e2e assertions.""" + + def __init__(self, proxy: InterceptionProxy) -> None: + self._proxy = proxy + + async def handle( + self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter + ) -> None: + """Serve ``GET /stats`` as JSON; other requests get 404.""" + try: + request_line = await reader.readline() + if not request_line: + return + line = request_line.decode("utf-8", errors="replace").strip() + method_path = line.split() + path = method_path[1] if len(method_path) > 1 else "" + while True: + header = await reader.readline() + if header in (b"\r\n", b"\n", b""): + break + if method_path and method_path[0].upper() == "GET" and path == "/stats": + body = json.dumps( + { + "connect_count": self._proxy.connect_count, + "intercepted_hosts": sorted(self._proxy.intercepted_hosts), + } + ).encode("utf-8") + writer.write(b"HTTP/1.1 200 OK\r\n") + writer.write(b"Content-Type: application/json\r\n") + writer.write(f"Content-Length: {len(body)}\r\n\r\n".encode()) + writer.write(body) + else: + writer.write(b"HTTP/1.1 404 Not Found\r\n\r\n") + await writer.drain() + finally: + writer.close() + + +async def _run_interception_stats_server( + proxy: InterceptionProxy, host: str, port: int +) -> asyncio.Server: + """Start the stats HTTP server bound to ``host:port``.""" + handler = _InterceptionStatsHandler(proxy) + + async def _client_handler( + reader: asyncio.StreamReader, writer: asyncio.StreamWriter + ) -> None: + await handler.handle(reader, writer) + + server = await asyncio.start_server(_client_handler, host, port) + logger.info("Interception proxy stats listening on %s:%d", host, port) + return server + + +async def run_in_cluster( + proxy_port: int = DEFAULT_INTERCEPTION_PROXY_PORT, + stats_port: int = DEFAULT_INTERCEPTION_STATS_PORT, + ca_cert_path: Path = IN_CLUSTER_CA_CERT_PATH, +) -> None: + """Run MITM proxy and stats server until cancelled (in-cluster pod entrypoint).""" + ca = trustme.CA() + proxy = InterceptionProxy(ca=ca, host="0.0.0.0", port=proxy_port) + proxy.export_ca_cert(ca_cert_path) + await proxy.start() + stats_server = await _run_interception_stats_server(proxy, "0.0.0.0", stats_port) + try: + await asyncio.Event().wait() + finally: + stats_server.close() + await stats_server.wait_closed() + await proxy.stop() + + +def main() -> None: + """CLI entrypoint for the ``e2e-interception-proxy`` Kubernetes pod.""" + logging.basicConfig(level=logging.INFO) + asyncio.run(run_in_cluster()) + + +if __name__ == "__main__": + main() diff --git a/tests/e2e/proxy/tunnel_proxy.py b/tests/e2e/proxy/tunnel_proxy.py index 07ff430ac..b29c01c24 100644 --- a/tests/e2e/proxy/tunnel_proxy.py +++ b/tests/e2e/proxy/tunnel_proxy.py @@ -4,19 +4,29 @@ tunneling. The proxy creates a TCP tunnel between the client and the destination server without inspecting the traffic. -Usage:: +Local Behave usage:: proxy = TunnelProxy(port=8888) await proxy.start() # ... run tests with HTTPS_PROXY=http://localhost:8888 ... await proxy.stop() assert proxy.connect_count > 0 # verify proxy was used + +In-cluster (Konflux/Prow) usage:: + + python tunnel_proxy.py + # CONNECT on 8888; GET http://127.0.0.1:8887/stats for connect_count JSON """ import asyncio +import json import logging from typing import Any, Optional +# In-cluster defaults (``python tunnel_proxy.py``). +DEFAULT_PROXY_PORT = 8888 +DEFAULT_STATS_PORT = 8887 + logger = logging.getLogger(__name__) @@ -172,3 +182,82 @@ def reset_counters(self) -> None: """Reset request counters.""" self.connect_count = 0 self.last_connect_target = None + + +class _StatsHandler: # pylint: disable=too-few-public-methods + """Expose tunnel proxy counters over HTTP for in-cluster e2e assertions.""" + + def __init__(self, proxy: TunnelProxy) -> None: + self._proxy = proxy + + async def handle( + self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter + ) -> None: + """Serve ``GET /stats`` as JSON; other requests get 404.""" + try: + request_line = await reader.readline() + if not request_line: + return + line = request_line.decode("utf-8", errors="replace").strip() + method_path = line.split() + path = method_path[1] if len(method_path) > 1 else "" + while True: + header = await reader.readline() + if header in (b"\r\n", b"\n", b""): + break + if method_path and method_path[0].upper() == "GET" and path == "/stats": + body = json.dumps( + { + "connect_count": self._proxy.connect_count, + "last_connect_target": self._proxy.last_connect_target, + } + ).encode("utf-8") + writer.write(b"HTTP/1.1 200 OK\r\n") + writer.write(b"Content-Type: application/json\r\n") + writer.write(f"Content-Length: {len(body)}\r\n\r\n".encode()) + writer.write(body) + else: + writer.write(b"HTTP/1.1 404 Not Found\r\n\r\n") + await writer.drain() + finally: + writer.close() + + +async def _run_stats_server(proxy: TunnelProxy, host: str, port: int) -> asyncio.Server: + """Start the stats HTTP server bound to ``host:port``.""" + handler = _StatsHandler(proxy) + + async def _client_handler( + reader: asyncio.StreamReader, writer: asyncio.StreamWriter + ) -> None: + await handler.handle(reader, writer) + + server = await asyncio.start_server(_client_handler, host, port) + logger.info("Tunnel proxy stats listening on %s:%d", host, port) + return server + + +async def run_in_cluster( + proxy_port: int = DEFAULT_PROXY_PORT, + stats_port: int = DEFAULT_STATS_PORT, +) -> None: + """Run CONNECT proxy and stats server until cancelled (in-cluster pod entrypoint).""" + proxy = TunnelProxy(host="0.0.0.0", port=proxy_port) + await proxy.start() + stats_server = await _run_stats_server(proxy, "0.0.0.0", stats_port) + try: + await asyncio.Event().wait() + finally: + stats_server.close() + await stats_server.wait_closed() + await proxy.stop() + + +def main() -> None: + """CLI entrypoint for the ``e2e-tunnel-proxy`` Kubernetes pod.""" + logging.basicConfig(level=logging.INFO) + asyncio.run(run_in_cluster()) + + +if __name__ == "__main__": + main() diff --git a/tests/e2e/test_list.txt b/tests/e2e/test_list.txt index 34e1b8647..26926a81f 100644 --- a/tests/e2e/test_list.txt +++ b/tests/e2e/test_list.txt @@ -26,4 +26,4 @@ features/mcp_servers_api_auth.feature features/mcp_servers_api_no_config.feature features/proxy.feature features/tls.feature -features/opentelemetry.feature +features/opentelemetry.feature \ No newline at end of file diff --git a/tests/e2e/utils/prow_utils.py b/tests/e2e/utils/prow_utils.py index 5e1a9252c..ff771904b 100644 --- a/tests/e2e/utils/prow_utils.py +++ b/tests/e2e/utils/prow_utils.py @@ -110,7 +110,12 @@ def restart_pod(container_name: str) -> None: print(result.stdout, end="") if result.returncode != 0: print(result.stderr, end="") - raise subprocess.CalledProcessError(result.returncode, op) + detail = (result.stderr or result.stdout or "").strip() + raise subprocess.CalledProcessError( + result.returncode, + op, + detail or None, + ) except subprocess.TimeoutExpired as e: print(f"Failed to restart pod {container_name}: {e}") raise