From d93365433445b11b3082ab3e4c66da1d97f7dae6 Mon Sep 17 00:00:00 2001 From: Sam DaSilva Date: Thu, 26 Feb 2026 23:00:23 -0500 Subject: [PATCH 1/9] fix(taskfiles/images): Multi-arch enablement image amd64 only Signed-off-by: Sam DaSilva --- taskfiles/images.yaml | 101 +++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 46 deletions(-) diff --git a/taskfiles/images.yaml b/taskfiles/images.yaml index 67f61e33b..355c1e83c 100644 --- a/taskfiles/images.yaml +++ b/taskfiles/images.yaml @@ -1,4 +1,4 @@ -version: '3' +version: "3" tasks: go-cache: @@ -12,10 +12,10 @@ tasks: - task: helper-prep-tmp-dir desc: Build {{.PLATFORM}} image using buildah vars: - NAME: '{{ .NAME }}' - DOCKERFILE: '{{ .DOCKERFILE }}' - PLATFORM: '{{.PLATFORM}}' - INCREMENTAL: '{{ .INCREMENTAL }}' + NAME: "{{ .NAME }}" + DOCKERFILE: "{{ .DOCKERFILE }}" + PLATFORM: "{{.PLATFORM}}" + INCREMENTAL: "{{ .INCREMENTAL }}" BUILDAH_OPTIONS: sh: | if [ "{{.INCREMENTAL}}" = "true" ]; then @@ -54,15 +54,26 @@ tasks: helper-prepare-multi-arch: internal: true status: - - test -f /proc/sys/fs/binfmt_misc/qemu-aarch64 + - | + if [ "$(uname -m)" = "aarch64" ]; then + test -f /proc/sys/fs/binfmt_misc/qemu-x86_64 + else + test -f /proc/sys/fs/binfmt_misc/qemu-aarch64 + fi cmds: - - sudo podman run --rm --privileged quay.io/bnemeth/multiarch-qemu-user-static --reset -p yes - - setenforce 0 + - sudo podman run --rm --privileged docker.io/tonistiigi/binfmt --install all + - | + if command -v getenforce >/dev/null 2>&1; then + mode="$(getenforce || true)" + if [ "$mode" != "Disabled" ]; then + sudo setenforce 0 || true + fi + fi clean-image-layer: internal: true vars: - NAME: '{{.NAME}}' + NAME: "{{.NAME}}" status: - sh -c '! buildah manifest inspect localhost/{{.NAME}}:dev-manifest' cmds: @@ -72,46 +83,46 @@ tasks: internal: true desc: Clean up image {{.NAME}} vars: - NAME: '{{.NAME}}' + NAME: "{{.NAME}}" cmds: - buildah manifest rm localhost/{{.NAME}}:dev-base-manifest || true - task: clean-image-layer vars: - NAME: '{{.NAME}}' + NAME: "{{.NAME}}" build-image: internal: true desc: Building {{.PLATFORM}} image {{.NAME}} vars: - NAME: '{{.NAME}}' - DOCKERFILE: '{{.DOCKERFILE}}' - PLATFORM: '{{.PLATFORM }}' + NAME: "{{.NAME}}" + DOCKERFILE: "{{.DOCKERFILE}}" + PLATFORM: "{{.PLATFORM }}" cmds: - task: helper-buildah vars: - NAME: 'localhost/{{.NAME}}:dev-base' - DOCKERFILE: '{{.DOCKERFILE}}' + NAME: "localhost/{{.NAME}}:dev-base" + DOCKERFILE: "{{.DOCKERFILE}}" INCREMENTAL: "false" - PLATFORM: '{{.PLATFORM}}' + PLATFORM: "{{.PLATFORM}}" # - cmd: buildah tag localhost/{{.NAME}}:dev-base-{{.PLATFORM}} localhost/{{.NAME}}:dev-base - task: prep-incremental-docker-file vars: - BASE_NAME: 'localhost/{{.NAME}}:dev-base-{{.PLATFORM}}' - IN_FILE: '{{.DOCKERFILE}}' - OUT_FILE: '{{.DOCKERFILE}}.inc' + BASE_NAME: "localhost/{{.NAME}}:dev-base-{{.PLATFORM}}" + IN_FILE: "{{.DOCKERFILE}}" + OUT_FILE: "{{.DOCKERFILE}}.inc" - task: helper-buildah vars: - NAME: 'localhost/{{.NAME}}:dev' - DOCKERFILE: '{{.DOCKERFILE}}.inc' + NAME: "localhost/{{.NAME}}:dev" + DOCKERFILE: "{{.DOCKERFILE}}.inc" INCREMENTAL: "true" - PLATFORM: '{{.PLATFORM}}' + PLATFORM: "{{.PLATFORM}}" prep-incremental-docker-file: internal: true vars: - BASE_NAME: '{{ .BASE_NAME }}' - IN_FILE: '{{ .IN_FILE }}' - OUT_FILE: '{{ .OUT_FILE }}' + BASE_NAME: "{{ .BASE_NAME }}" + IN_FILE: "{{ .IN_FILE }}" + OUT_FILE: "{{ .OUT_FILE }}" cmds: - > go run tools/incremental/incremental.go @@ -282,7 +293,6 @@ tasks: vars: NAME: intel-vsp - build-image-intel-vsp-p4: deps: - task: clean-image-layer @@ -331,7 +341,7 @@ tasks: NAME: network-resources-injector clean-image-all: - cmds: # can't run in parallel since multiple concurrent pulls are not supported + cmds: # can't run in parallel since multiple concurrent pulls are not supported - task: clean-image-manager - task: clean-image-daemon - task: clean-image-intel-vsp @@ -346,7 +356,7 @@ tasks: # they will be picked up by the build-image-* targets deps: - build-bin-all - cmds: # can't run in parallel since multiple concurrent pulls are not supported + cmds: # can't run in parallel since multiple concurrent pulls are not supported - task: build-image-manager - task: build-image-daemon - task: build-image-intel-vsp @@ -361,34 +371,33 @@ tasks: deps: - task: push-image-helper vars: - SOURCE: 'localhost/dpu-operator:dev' - IMAGE: '{{.REGISTRY}}/dpu-operator:dev' + SOURCE: "localhost/dpu-operator:dev" + IMAGE: "{{.REGISTRY}}/dpu-operator:dev" - task: push-image-helper vars: - SOURCE: 'localhost/dpu-daemon:dev' - IMAGE: '{{.REGISTRY}}/dpu-daemon:dev' + SOURCE: "localhost/dpu-daemon:dev" + IMAGE: "{{.REGISTRY}}/dpu-daemon:dev" - task: push-image-helper vars: - SOURCE: 'localhost/mrvl-vsp:dev' - IMAGE: '{{.REGISTRY}}/mrvl-vsp:dev' + SOURCE: "localhost/mrvl-vsp:dev" + IMAGE: "{{.REGISTRY}}/mrvl-vsp:dev" - task: push-image-helper vars: - SOURCE: 'localhost/mrvl-cpagent:dev' - IMAGE: '{{.REGISTRY}}/mrvl-cpagent:dev' + SOURCE: "localhost/mrvl-cpagent:dev" + IMAGE: "{{.REGISTRY}}/mrvl-cpagent:dev" - task: push-image-helper vars: - SOURCE: 'localhost/intel-vsp:dev' - IMAGE: '{{.REGISTRY}}/intel-vsp:dev' + SOURCE: "localhost/intel-vsp:dev" + IMAGE: "{{.REGISTRY}}/intel-vsp:dev" - task: push-image-helper vars: - SOURCE: 'localhost/intel-vsp-p4:dev' - IMAGE: '{{.REGISTRY}}/intel-vsp-p4:dev' + SOURCE: "localhost/intel-vsp-p4:dev" + IMAGE: "{{.REGISTRY}}/intel-vsp-p4:dev" - task: push-image-helper vars: - SOURCE: 'localhost/intel-netsec-vsp:dev' - IMAGE: '{{.REGISTRY}}/intel-netsec-vsp:dev' + SOURCE: "localhost/intel-netsec-vsp:dev" + IMAGE: "{{.REGISTRY}}/intel-netsec-vsp:dev" - task: push-image-helper vars: - SOURCE: 'localhost/network-resources-injector:dev' - IMAGE: '{{.REGISTRY}}/network-resources-injector:dev' - + SOURCE: "localhost/network-resources-injector:dev" + IMAGE: "{{.REGISTRY}}/network-resources-injector:dev" From 4c5772b8015a81741ab893ba4ed14ff628535351 Mon Sep 17 00:00:00 2001 From: Sam DaSilva Date: Fri, 27 Feb 2026 16:47:24 -0500 Subject: [PATCH 2/9] feat(upstream-images): add upstream Dockerfiles and suffix-based task switching - add non-.rhel Dockerfiles for manager/daemon/VSP/NRI images - keep .rhel Dockerfiles unchanged and selectable via DOCKERFILE_SUFFIX - add build-image-all-rhel and clean-image-all-rhel wrappers - make REGISTRY overridable in taskfile - fix pip RPM-setuptools conflict in openshift/install-dpu.sh Signed-off-by: Sam DaSilva --- Dockerfile | 20 +++++++ Dockerfile.CNI | 12 +++++ Dockerfile.IntelNetSecVSP | 29 ++++++++++ Dockerfile.IntelP4 | 53 +++++++++++++++++++ Dockerfile.IntelVSP | 52 ++++++++++++++++++ Dockerfile.daemon | 28 ++++++++++ Dockerfile.inc | 6 +++ Dockerfile.mrvlCPAgent | 57 ++++++++++++++++++++ Dockerfile.mrvlVSP | 29 ++++++++++ Dockerfile.networkResourcesInjector | 15 ++++++ openshift/install-dpu.sh | 4 +- taskfile.yaml | 7 ++- taskfiles/images.yaml | 82 ++++++++++++++++++++++++----- 13 files changed, 378 insertions(+), 16 deletions(-) create mode 100644 Dockerfile create mode 100644 Dockerfile.CNI create mode 100644 Dockerfile.IntelNetSecVSP create mode 100644 Dockerfile.IntelP4 create mode 100644 Dockerfile.IntelVSP create mode 100644 Dockerfile.daemon create mode 100644 Dockerfile.inc create mode 100644 Dockerfile.mrvlCPAgent create mode 100644 Dockerfile.mrvlVSP create mode 100644 Dockerfile.networkResourcesInjector diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..1747eff5d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +# Build the manager binary +FROM --platform=$BUILDPLATFORM docker.io/library/golang:1.24-bookworm@sha256:1a6d4452c65dea36aac2e2d606b01b4a029ec90cc1ae53890540ce6173ea77ac AS builder +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /workspace +COPY . . + +# Build directly to avoid GOARCH leaking into go-run helper tooling during cross builds. +RUN mkdir -p /workspace/bin && \ + GOMAXPROCS=2 CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \ + go build -o /workspace/bin/manager.${TARGETARCH} ./cmd/main.go + +# Use a minimal runtime image for the manager binary. +FROM gcr.io/distroless/static-debian12:nonroot@sha256:a9329520abc449e3b14d5bc3a6ffae065bdde0f02667fa10880c49b35c109fd1 +WORKDIR / +ARG TARGETARCH +COPY --from=builder /workspace/bin/manager.${TARGETARCH} /manager +USER 65532:65532 +ENTRYPOINT ["/manager"] diff --git a/Dockerfile.CNI b/Dockerfile.CNI new file mode 100644 index 000000000..93ac47add --- /dev/null +++ b/Dockerfile.CNI @@ -0,0 +1,12 @@ +FROM --platform=$BUILDPLATFORM docker.io/library/golang:1.24-bookworm@sha256:1a6d4452c65dea36aac2e2d606b01b4a029ec90cc1ae53890540ce6173ea77ac AS builder +ARG TARGETOS +ARG TARGETARCH + +COPY . /usr/src/dpu-cni +WORKDIR /usr/src/dpu-cni +RUN GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -o dpucni ./dpu-cni/dpu-cni.go + +FROM gcr.io/distroless/static-debian12:nonroot@sha256:a9329520abc449e3b14d5bc3a6ffae065bdde0f02667fa10880c49b35c109fd1 +COPY --from=builder /usr/src/dpu-cni/dpucni /usr/bin/ +WORKDIR / +LABEL io.k8s.display-name="DPU-CNI" diff --git a/Dockerfile.IntelNetSecVSP b/Dockerfile.IntelNetSecVSP new file mode 100644 index 000000000..cbf7c5a77 --- /dev/null +++ b/Dockerfile.IntelNetSecVSP @@ -0,0 +1,29 @@ +FROM --platform=$BUILDPLATFORM docker.io/library/golang:1.24-bookworm@sha256:1a6d4452c65dea36aac2e2d606b01b4a029ec90cc1ae53890540ce6173ea77ac AS builder +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /workspace +COPY . . + +# Build directly to avoid GOARCH leaking into go-run helper tooling during cross builds. +RUN mkdir -p /workspace/bin && \ + GOMAXPROCS=2 CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \ + go build -o /workspace/bin/vsp-intel-netsec.${TARGETARCH} ./internal/daemon/vendor-specific-plugins/intel-netsec/main.go + +FROM quay.io/centos/centos:stream9@sha256:f6041e6d52b61ece8da2c9733ea2a522ba6b36663303fd91024ea6882c5a8942 +ARG TARGETARCH +COPY --from=builder /workspace/bin/vsp-intel-netsec.${TARGETARCH} /vsp-intel-netsec + +RUN dnf update -y \ + && dnf install -y \ + ethtool \ + net-tools \ + kmod \ + pciutils \ + iputils \ + iproute \ + && dnf clean all \ + && rm -rf /var/cache/dnf + +USER 0 +ENTRYPOINT ["/vsp-intel-netsec"] diff --git a/Dockerfile.IntelP4 b/Dockerfile.IntelP4 new file mode 100644 index 000000000..3193abcc0 --- /dev/null +++ b/Dockerfile.IntelP4 @@ -0,0 +1,53 @@ +FROM quay.io/centos/centos:stream9@sha256:f6041e6d52b61ece8da2c9733ea2a522ba6b36663303fd91024ea6882c5a8942 + +ARG P4_NAME=fxp-net_linux-networking +ENV P4_NAME $P4_NAME + +ARG TARGETOS +ARG TARGETARCH +ENV ARCHSUFFIX="aarch64" + +COPY . /src +WORKDIR /src +RUN dnf install -y \ + kmod \ + gettext \ + python3-pip \ + pciutils \ + libnl3 \ + libedit \ + net-tools \ + libatomic \ + libconfig \ + gcc gcc-c++ \ + && dnf clean all + +RUN mkdir -p /opt/${P4_NAME} +COPY cmd/intelvsp/$P4_NAME/* /opt/${P4_NAME}/ +COPY cmd/intelvsp/p4sdk/entrypoint.sh / +COPY cmd/intelvsp/p4sdk/es2k_skip_p4.conf.template / + +RUN python3 -m pip install --no-cache-dir \ + netaddr==1.2.1 + +WORKDIR / + +# Add steps for cachito +ENV REMOTE_SOURCES=${REMOTE_SOURCES:-"./openshift/"} +ENV REMOTE_SOURCES_DIR=${REMOTE_SOURCES_DIR:-"/cachito"} +COPY ${REMOTE_SOURCES} ${REMOTE_SOURCES_DIR} +COPY openshift/install-dpu.sh . +RUN chmod +x install-dpu.sh \ + && ./install-dpu.sh + +# Remove packages only needed for cachito. +RUN dnf remove -y gcc gcc-c++ \ + && dnf clean all \ + && rm -rf /var/cache/dnf + +COPY ./cmd/intelvsp/p4runtime-2023.11.0/p4 /opt/p4rt_proto +COPY ./cmd/intelvsp/p4runtime-2023.11.0/copy_p4rt_python_deps.sh /opt/p4rt_proto/ +RUN chmod a+x /opt/p4rt_proto/copy_p4rt_python_deps.sh +RUN /opt/p4rt_proto/copy_p4rt_python_deps.sh + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/Dockerfile.IntelVSP b/Dockerfile.IntelVSP new file mode 100644 index 000000000..7f75516e0 --- /dev/null +++ b/Dockerfile.IntelVSP @@ -0,0 +1,52 @@ +FROM --platform=$BUILDPLATFORM docker.io/library/golang:1.24-bookworm@sha256:1a6d4452c65dea36aac2e2d606b01b4a029ec90cc1ae53890540ce6173ea77ac AS builder +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /workspace +COPY . . + +# Build directly to avoid GOARCH leaking into go-run helper tooling during cross builds. +RUN mkdir -p /workspace/bin && \ + GOMAXPROCS=2 CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \ + go build -o /workspace/bin/ipuplugin.${TARGETARCH} ./cmd/intelvsp/intelvsp.go + +FROM quay.io/centos/centos:stream9@sha256:f6041e6d52b61ece8da2c9733ea2a522ba6b36663303fd91024ea6882c5a8942 +ARG TARGETARCH +ENV PYTHONUNBUFFERED=1 +WORKDIR / + +# https://github.com/grpc/grpc/issues/24556 +RUN dnf install -y \ + centos-release-nfv-openvswitch \ + && dnf install -y \ + NetworkManager iproute python3 python3-devel openssh-clients gcc gcc-c++ openvswitch3.4 \ + && python3 -m ensurepip --upgrade + +# By setting WORKDIR, directories are created automatically. +WORKDIR /opt/p4/p4-cp-nws/bin/ +RUN mkdir -p /opt/p4/p4-cp-nws/bin/p4 + +COPY ./cmd/intelvsp/fxp-net_linux-networking/fxp-net_linux-networking.pkg / +COPY ./cmd/intelvsp/p4rt-ctl /opt/p4/p4-cp-nws/bin/ + +# Add steps for cachito +ENV REMOTE_SOURCES=${REMOTE_SOURCES:-"./openshift/"} +ENV REMOTE_SOURCES_DIR=${REMOTE_SOURCES_DIR:-"/cachito"} +COPY ${REMOTE_SOURCES} ${REMOTE_SOURCES_DIR} +COPY openshift/install-dpu.sh . +RUN chmod +x install-dpu.sh \ + && ./install-dpu.sh + +# Remove packages only needed for cachito. +RUN dnf remove -y gcc gcc-c++ \ + && dnf clean all \ + && rm -rf /var/cache/dnf + +COPY ./cmd/intelvsp/p4runtime-2023.11.0/p4 /opt/p4rt_proto +COPY ./cmd/intelvsp/p4runtime-2023.11.0/copy_p4rt_python_deps.sh /opt/p4rt_proto +RUN chmod a+x /opt/p4rt_proto/copy_p4rt_python_deps.sh +RUN /opt/p4rt_proto/copy_p4rt_python_deps.sh + +COPY --chmod=755 --from=builder /workspace/bin/ipuplugin.${TARGETARCH} /ipuplugin +LABEL io.k8s.display-name="IPU OPI Plugin" +ENTRYPOINT ["/ipuplugin"] diff --git a/Dockerfile.daemon b/Dockerfile.daemon new file mode 100644 index 000000000..b0edc6f2e --- /dev/null +++ b/Dockerfile.daemon @@ -0,0 +1,28 @@ +# Build the daemon and CNI binaries +FROM --platform=$BUILDPLATFORM docker.io/library/golang:1.24-bookworm@sha256:1a6d4452c65dea36aac2e2d606b01b4a029ec90cc1ae53890540ce6173ea77ac AS builder +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /workspace +COPY . . + +# Build directly to avoid GOARCH leaking into go-run helper tooling during cross builds. +RUN mkdir -p /workspace/bin && \ + GOMAXPROCS=2 CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \ + go build -o /workspace/bin/daemon.${TARGETARCH} ./cmd/daemon/daemon.go && \ + GOMAXPROCS=2 CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \ + go build -o /workspace/bin/dpu-cni.${TARGETARCH} ./dpu-cni/dpu-cni.go + +FROM quay.io/centos/centos:stream9@sha256:f6041e6d52b61ece8da2c9733ea2a522ba6b36663303fd91024ea6882c5a8942 +ARG TARGETARCH +WORKDIR / +COPY --from=builder /workspace/bin/daemon.${TARGETARCH} /daemon +COPY --from=builder /workspace/bin/dpu-cni.${TARGETARCH} /dpu-cni + +# Install hwdata to include pci.ids so jaypipes/ghw can run offline. +RUN dnf install -y hwdata ethtool \ + && dnf clean all \ + && rm -rf /var/cache/dnf + +USER 65532:65532 +ENTRYPOINT ["/daemon"] diff --git a/Dockerfile.inc b/Dockerfile.inc new file mode 100644 index 000000000..cbdf3ff90 --- /dev/null +++ b/Dockerfile.inc @@ -0,0 +1,6 @@ + +FROM localhost/dpu-operator:dev-base-arm64 + +ARG TARGETARCH + +COPY bin/manager.${TARGETARCH} /manager diff --git a/Dockerfile.mrvlCPAgent b/Dockerfile.mrvlCPAgent new file mode 100644 index 000000000..326ef44bc --- /dev/null +++ b/Dockerfile.mrvlCPAgent @@ -0,0 +1,57 @@ +ARG TARGETARCH +FROM --platform=linux/${TARGETARCH} docker.io/library/golang:1.24-bookworm@sha256:1a6d4452c65dea36aac2e2d606b01b4a029ec90cc1ae53890540ce6173ea77ac AS stage1 +ARG TARGETOS +ARG TARGETARCH + +RUN apt-get update && apt-get install -y --no-install-recommends \ + gawk gcc g++ libconfig-dev make pkg-config \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace +COPY . . + +RUN \ + set -x && \ + mkdir -p /cpagent-bin/ && \ + if [ "$TARGETARCH" = "arm64" ] ; then \ + export OCTEP_PATH="/workspace/pcie_ep_octeon_target/target/libs/octep_cp_lib" && \ + ln -nfs internal/daemon/vendor-specific-plugins/marvell/vendor/pcie_ep_octeon_target.25.03.0/ /workspace/pcie_ep_octeon_target && \ + cd "/workspace/pcie_ep_octeon_target/target/libs/octep_cp_lib" && \ + make CFLAGS="-DUSE_PEM_AND_DPI_PF=1" && \ + cd "/workspace/pcie_ep_octeon_target/target/apps/octep_cp_agent" && \ + make CFLAGS="$(pkg-config --cflags libconfig) -I$OCTEP_PATH/include" \ + LDFLAGS="$(pkg-config --libs libconfig) -L$OCTEP_PATH/bin/lib" && \ + cp bin/bin/octep_cp_agent /cpagent-bin/octep_cp_agent.25.03.0 && \ + ln -nfs internal/daemon/vendor-specific-plugins/marvell/vendor/pcie_ep_octeon_target/ /workspace/pcie_ep_octeon_target && \ + cd "/workspace/pcie_ep_octeon_target/target/libs/octep_cp_lib" && \ + make CFLAGS="-DUSE_PEM_AND_DPI_PF=1" && \ + cd "/workspace/pcie_ep_octeon_target/target/apps/octep_cp_agent" && \ + make CFLAGS="$(pkg-config --cflags libconfig) -I$OCTEP_PATH/include" \ + LDFLAGS="$(pkg-config --libs libconfig) -L$OCTEP_PATH/bin/lib" && \ + cp bin/bin/octep_cp_agent /cpagent-bin/ && \ + cp cn106xx.cfg /cpagent-bin/ && \ + echo "build completed" ; \ + fi + +# Due to https://github.com/golang/go/issues/70329 cross-compilation hangs at times. +# As a temporary workaround, we can try specifying GOMAXPROCS=2 to relieve this issue. +WORKDIR /workspace +RUN GOMAXPROCS=2 CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \ + go build -o /cpagent-bin/cp-agent-run internal/daemon/vendor-specific-plugins/marvell/cp-agent/cp-agent-run.go + +FROM quay.io/centos/centos:stream9@sha256:f6041e6d52b61ece8da2c9733ea2a522ba6b36663303fd91024ea6882c5a8942 +COPY --from=stage1 /cpagent-bin/ /usr/bin/ + +RUN dnf update -y \ + && dnf install -y \ + net-tools \ + kmod \ + pciutils \ + iputils \ + iproute \ + libconfig \ + && dnf clean all \ + && rm -rf /var/cache/dnf + +USER 0 +ENTRYPOINT ["/usr/bin/cp-agent-run"] diff --git a/Dockerfile.mrvlVSP b/Dockerfile.mrvlVSP new file mode 100644 index 000000000..c596e9a77 --- /dev/null +++ b/Dockerfile.mrvlVSP @@ -0,0 +1,29 @@ +FROM --platform=$BUILDPLATFORM docker.io/library/golang:1.24-bookworm@sha256:1a6d4452c65dea36aac2e2d606b01b4a029ec90cc1ae53890540ce6173ea77ac AS builder +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /workspace +COPY . . + +# Build directly to avoid GOARCH leaking into go-run helper tooling during cross builds. +RUN mkdir -p /workspace/bin && \ + GOMAXPROCS=2 CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \ + go build -o /workspace/bin/vsp-mrvl.${TARGETARCH} ./internal/daemon/vendor-specific-plugins/marvell/main.go + +FROM quay.io/centos/centos:stream9@sha256:f6041e6d52b61ece8da2c9733ea2a522ba6b36663303fd91024ea6882c5a8942 +ARG TARGETARCH +COPY --from=builder /workspace/bin/vsp-mrvl.${TARGETARCH} /vsp-mrvl + +RUN dnf update -y \ + && dnf install -y \ + net-tools \ + kmod \ + pciutils \ + iputils \ + iproute \ + ethtool \ + && dnf clean all \ + && rm -rf /var/cache/dnf + +USER 0 +ENTRYPOINT ["/vsp-mrvl"] diff --git a/Dockerfile.networkResourcesInjector b/Dockerfile.networkResourcesInjector new file mode 100644 index 000000000..c6f6069ba --- /dev/null +++ b/Dockerfile.networkResourcesInjector @@ -0,0 +1,15 @@ +FROM --platform=$BUILDPLATFORM docker.io/library/golang:1.24-bookworm@sha256:1a6d4452c65dea36aac2e2d606b01b4a029ec90cc1ae53890540ce6173ea77ac AS builder +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /workspace +COPY . . +RUN mkdir -p /workspace/bin && \ + GOMAXPROCS=2 CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} \ + go build -o /workspace/bin/nri.${TARGETARCH} ./cmd/nri/networkresourcesinjector.go + +FROM gcr.io/distroless/static-debian12:nonroot@sha256:a9329520abc449e3b14d5bc3a6ffae065bdde0f02667fa10880c49b35c109fd1 +ARG TARGETARCH +WORKDIR / +COPY --from=builder /workspace/bin/nri.${TARGETARCH} /webhook +ENTRYPOINT ["/webhook"] diff --git a/openshift/install-dpu.sh b/openshift/install-dpu.sh index 078f35d1a..25f20de3b 100644 --- a/openshift/install-dpu.sh +++ b/openshift/install-dpu.sh @@ -16,7 +16,9 @@ fi python3 -m pip install --upgrade pip # Install the packages in order of build dependency to avoid issues during installation. -python3 -m pip install ${PIP_OPTS} -r requirements-build.txt +# CentOS/RHEL-family images often ship setuptools via RPM; ignore-installed avoids pip trying +# (and failing) to uninstall the RPM-provided setuptools when requirements pin a newer version. +python3 -m pip install ${PIP_OPTS} --ignore-installed -r requirements-build.txt python3 -m pip install ${PIP_OPTS} -r requirements.txt rm -rf ${REMOTE_SOURCES_DIR} diff --git a/taskfile.yaml b/taskfile.yaml index a713af9e2..66d3c5d8e 100644 --- a/taskfile.yaml +++ b/taskfile.yaml @@ -15,7 +15,12 @@ vars: BINDIR_ABS: sh: if [[ "{{.BINDIR}}" = /* ]]; then echo "{{.BINDIR}}"; else echo "$(pwd)/{{.BINDIR}}"; fi REGISTRY: - sh: hostname | sed 's/$/:5000/' + sh: | + if [ -n "${REGISTRY:-}" ]; then + echo "${REGISTRY}" + else + hostname | sed 's/$/:5000/' + fi ENVTEST_K8S_VERSION: 1.27.1 KUSTOMIZE_VERSION: v5.6.0 GINKGO_VERSION: diff --git a/taskfiles/images.yaml b/taskfiles/images.yaml index 355c1e83c..1d25f2a55 100644 --- a/taskfiles/images.yaml +++ b/taskfiles/images.yaml @@ -1,5 +1,8 @@ version: "3" +vars: + DOCKERFILE_SUFFIX: '{{ default "" .DOCKERFILE_SUFFIX }}' + tasks: go-cache: cmds: @@ -16,6 +19,13 @@ tasks: DOCKERFILE: "{{ .DOCKERFILE }}" PLATFORM: "{{.PLATFORM}}" INCREMENTAL: "{{ .INCREMENTAL }}" + BUILDPLATFORM: + sh: | + case "$(uname -m)" in + x86_64) echo "linux/amd64" ;; + aarch64|arm64) echo "linux/arm64" ;; + *) echo "linux/$(uname -m)" ;; + esac BUILDAH_OPTIONS: sh: | if [ "{{.INCREMENTAL}}" = "true" ]; then @@ -40,6 +50,9 @@ tasks: {{.BUILDAH_OPTIONS}} --manifest {{.NAME}}-manifest --platform linux/{{.PLATFORM}} + --build-arg BUILDPLATFORM={{.BUILDPLATFORM}} + --build-arg TARGETOS=linux + --build-arg TARGETARCH={{.PLATFORM}} -v {{.DPU_OPERATOR_TEMP_DIR}}/go-cache:/go:z -f {{.DOCKERFILE}} -t {{.NAME}}-{{.PLATFORM}} @@ -145,12 +158,12 @@ tasks: - task: build-image vars: NAME: dpu-operator - DOCKERFILE: Dockerfile.rhel + DOCKERFILE: Dockerfile{{.DOCKERFILE_SUFFIX}} PLATFORM: amd64 - task: build-image vars: NAME: dpu-operator - DOCKERFILE: Dockerfile.rhel + DOCKERFILE: Dockerfile{{.DOCKERFILE_SUFFIX}} PLATFORM: arm64 clean-image-manager: @@ -174,12 +187,12 @@ tasks: - task: build-image vars: NAME: dpu-daemon - DOCKERFILE: Dockerfile.daemon.rhel + DOCKERFILE: Dockerfile.daemon{{.DOCKERFILE_SUFFIX}} PLATFORM: amd64 - task: build-image vars: NAME: dpu-daemon - DOCKERFILE: Dockerfile.daemon.rhel + DOCKERFILE: Dockerfile.daemon{{.DOCKERFILE_SUFFIX}} PLATFORM: arm64 clean-image-daemon: @@ -203,12 +216,12 @@ tasks: - task: build-image vars: NAME: mrvl-vsp - DOCKERFILE: Dockerfile.mrvlVSP.rhel + DOCKERFILE: Dockerfile.mrvlVSP{{.DOCKERFILE_SUFFIX}} PLATFORM: amd64 - task: build-image vars: NAME: mrvl-vsp - DOCKERFILE: Dockerfile.mrvlVSP.rhel + DOCKERFILE: Dockerfile.mrvlVSP{{.DOCKERFILE_SUFFIX}} PLATFORM: arm64 clean-image-marvell-vsp: @@ -226,7 +239,7 @@ tasks: - task: build-image vars: NAME: mrvl-cpagent - DOCKERFILE: Dockerfile.mrvlCPAgent.rhel + DOCKERFILE: Dockerfile.mrvlCPAgent{{.DOCKERFILE_SUFFIX}} PLATFORM: arm64 clean-image-marvell-cpagent: @@ -250,12 +263,12 @@ tasks: - task: build-image vars: NAME: intel-netsec-vsp - DOCKERFILE: Dockerfile.IntelNetSecVSP.rhel + DOCKERFILE: Dockerfile.IntelNetSecVSP{{.DOCKERFILE_SUFFIX}} PLATFORM: amd64 - task: build-image vars: NAME: intel-netsec-vsp - DOCKERFILE: Dockerfile.IntelNetSecVSP.rhel + DOCKERFILE: Dockerfile.IntelNetSecVSP{{.DOCKERFILE_SUFFIX}} PLATFORM: arm64 clean-image-intel-netsec-vsp: @@ -279,12 +292,12 @@ tasks: - task: build-image vars: NAME: intel-vsp - DOCKERFILE: Dockerfile.IntelVSP.rhel + DOCKERFILE: Dockerfile.IntelVSP{{.DOCKERFILE_SUFFIX}} PLATFORM: amd64 - task: build-image vars: NAME: intel-vsp - DOCKERFILE: Dockerfile.IntelVSP.rhel + DOCKERFILE: Dockerfile.IntelVSP{{.DOCKERFILE_SUFFIX}} PLATFORM: arm64 clean-image-intel-vsp: @@ -302,7 +315,7 @@ tasks: - task: build-image vars: NAME: intel-vsp-p4 - DOCKERFILE: Dockerfile.IntelP4.rhel + DOCKERFILE: Dockerfile.IntelP4{{.DOCKERFILE_SUFFIX}} PLATFORM: arm64 clean-image-intel-vsp-p4: @@ -326,12 +339,12 @@ tasks: - task: build-image vars: NAME: network-resources-injector - DOCKERFILE: Dockerfile.networkResourcesInjector.rhel + DOCKERFILE: Dockerfile.networkResourcesInjector{{.DOCKERFILE_SUFFIX}} PLATFORM: amd64 - task: build-image vars: NAME: network-resources-injector - DOCKERFILE: Dockerfile.networkResourcesInjector.rhel + DOCKERFILE: Dockerfile.networkResourcesInjector{{.DOCKERFILE_SUFFIX}} PLATFORM: arm64 clean-image-network-resources-injector: @@ -351,6 +364,17 @@ tasks: - task: clean-image-intel-netsec-vsp - task: clean-image-network-resources-injector + clean-image-all-rhel: + cmds: + - task: clean-image-manager + - task: clean-image-daemon + - task: clean-image-intel-vsp + - task: clean-image-intel-vsp-p4 + - task: clean-image-marvell-vsp + - task: clean-image-marvell-cpagent + - task: clean-image-intel-netsec-vsp + - task: clean-image-network-resources-injector + build-image-all: # build all the binaries in parallel for speed # they will be picked up by the build-image-* targets @@ -367,6 +391,36 @@ tasks: - task: build-image-network-resources-injector - task: push-image-all + build-image-all-rhel: + deps: + - build-bin-all + cmds: + - task: build-image-manager + vars: + DOCKERFILE_SUFFIX: .rhel + - task: build-image-daemon + vars: + DOCKERFILE_SUFFIX: .rhel + - task: build-image-intel-vsp + vars: + DOCKERFILE_SUFFIX: .rhel + - task: build-image-intel-vsp-p4 + vars: + DOCKERFILE_SUFFIX: .rhel + - task: build-image-marvell-vsp + vars: + DOCKERFILE_SUFFIX: .rhel + - task: build-image-intel-netsec-vsp + vars: + DOCKERFILE_SUFFIX: .rhel + - task: build-image-marvell-cpagent + vars: + DOCKERFILE_SUFFIX: .rhel + - task: build-image-network-resources-injector + vars: + DOCKERFILE_SUFFIX: .rhel + - task: push-image-all + push-image-all: deps: - task: push-image-helper From 6d66b6c80a482bc2d8f84e1f5a00d5fc3d184308 Mon Sep 17 00:00:00 2001 From: Sam DaSilva Date: Fri, 27 Feb 2026 20:10:05 -0500 Subject: [PATCH 3/9] fix(tasks): make local-registry push and tool bootstrap reliable - add PUSH_TLS_VERIFY (default true) to push-image-helper - allow --tls-verify=false for insecure local registries - unset GOOS/GOARCH for go-install tool tasks (kustomize/ginkgo/controller-gen/envtest) Signed-off-by: Sam DaSilva --- taskfile.yaml | 7 ++++--- taskfiles/operator-sdk.yaml | 5 ++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/taskfile.yaml b/taskfile.yaml index 66d3c5d8e..d9fadfa2c 100644 --- a/taskfile.yaml +++ b/taskfile.yaml @@ -21,6 +21,7 @@ vars: else hostname | sed 's/$/:5000/' fi + PUSH_TLS_VERIFY: '{{ default "true" .PUSH_TLS_VERIFY }}' ENVTEST_K8S_VERSION: 1.27.1 KUSTOMIZE_VERSION: v5.6.0 GINKGO_VERSION: @@ -64,7 +65,7 @@ tasks: SOURCE: '{{.SOURCE}}' IMAGE: '{{.IMAGE}}' cmds: - - buildah manifest push --all '{{.SOURCE}}-manifest' 'docker://{{.IMAGE}}' + - buildah manifest push --all --tls-verify={{.PUSH_TLS_VERIFY}} '{{.SOURCE}}-manifest' 'docker://{{.IMAGE}}' undeploy-helper: internal: true @@ -115,7 +116,7 @@ tasks: - test -s {{.BINDIR}}/setup-envtest - ./{{.BINDIR}}/setup-envtest --help | head -1 | grep -q {{.SETUP_ENVTEST_VERSION}} cmds: - - GOBIN={{.BINDIR_ABS}} GOFLAGS='' go install sigs.k8s.io/controller-runtime/tools/setup-envtest@{{.SETUP_ENVTEST_VERSION}} + - env -u GOOS -u GOARCH GOBIN={{.BINDIR_ABS}} GOFLAGS='' go install sigs.k8s.io/controller-runtime/tools/setup-envtest@{{.SETUP_ENVTEST_VERSION}} deploy: deps: @@ -261,7 +262,7 @@ tasks: - test -s {{.BINDIR}}/controller-gen - ./{{.BINDIR}}/controller-gen --version | grep -q {{.CONTROLLER_TOOLS_VERSION}} cmds: - - GOBIN={{.BINDIR_ABS}} GOFLAGS='' go install sigs.k8s.io/controller-tools/cmd/controller-gen@{{.CONTROLLER_TOOLS_VERSION}} + - env -u GOOS -u GOARCH GOBIN={{.BINDIR_ABS}} GOFLAGS='' go install sigs.k8s.io/controller-tools/cmd/controller-gen@{{.CONTROLLER_TOOLS_VERSION}} test: deps: diff --git a/taskfiles/operator-sdk.yaml b/taskfiles/operator-sdk.yaml index 717ae6862..89fb93e51 100644 --- a/taskfiles/operator-sdk.yaml +++ b/taskfiles/operator-sdk.yaml @@ -4,7 +4,7 @@ tasks: kustomize: cmds: - mkdir -p {{.BINDIR}} - - GOBIN={{.BINDIR_ABS}} GOFLAGS='' GO111MODULE=on go install sigs.k8s.io/kustomize/kustomize/v5@{{.KUSTOMIZE_VERSION}} + - env -u GOOS -u GOARCH GOBIN={{.BINDIR_ABS}} GOFLAGS='' GO111MODULE=on go install sigs.k8s.io/kustomize/kustomize/v5@{{.KUSTOMIZE_VERSION}} - echo "{{.KUSTOMIZE_VERSION}}" > {{.BINDIR}}/kustomize_version status: - test -d {{.BINDIR}} @@ -17,7 +17,7 @@ tasks: - test -x {{.BINDIR}}/ginkgo - ./{{.BINDIR}}/ginkgo version | grep -q "{{.GINKGO_VERSION}}" cmds: - - GOBIN={{.BINDIR_ABS}} GOFLAGS='' go install github.com/onsi/ginkgo/v2/ginkgo@v{{.GINKGO_VERSION}} + - env -u GOOS -u GOARCH GOBIN={{.BINDIR_ABS}} GOFLAGS='' go install github.com/onsi/ginkgo/v2/ginkgo@v{{.GINKGO_VERSION}} ## Download operator-sdk locally if necessary. operator-sdk: @@ -52,4 +52,3 @@ tasks: else ln -sf $(which opm) {{.BINDIR}}/opm fi - From b8ac2211a05a83b409b57ec9b43485ec174aecce Mon Sep 17 00:00:00 2001 From: Sam DaSilva Date: Fri, 27 Feb 2026 20:28:50 -0500 Subject: [PATCH 4/9] refactor(cli): switch deploy/undeploy flow from oc to kubectl - replace oc with kubectl in taskfile deploy and undeploy paths - update README example command accordingly Signed-off-by: Sam DaSilva --- README.md | 2 +- taskfile.yaml | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 6d3ffda58..7054863c6 100644 --- a/README.md +++ b/README.md @@ -114,7 +114,7 @@ kubectl create -f examples/config.yaml After creating the `DpuOperatorConfig` CR, you should see the following pods: ```sh -oc get pods -n openshift-dpu-operator -o wide +kubectl get pods -n openshift-dpu-operator -o wide NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES dpu-daemon-rn6mc 1/1 Running 0 22h 192.168.122.218 worker-229 dpu-daemon-xrrlg 1/1 Running 0 22h 192.168.122.90 worker-229-ptl diff --git a/taskfile.yaml b/taskfile.yaml index d9fadfa2c..4dad66332 100644 --- a/taskfile.yaml +++ b/taskfile.yaml @@ -74,13 +74,13 @@ tasks: vars: KUBECONFIG: '{{.KUBECONFIG}}' status: - - NS=$(KUBECONFIG={{.KUBECONFIG}} oc get ns openshift-dpu-operator) + - NS=$(KUBECONFIG={{.KUBECONFIG}} kubectl get ns openshift-dpu-operator) if [[ -n "$NS" ]]; then false else true fi cmds: # this will block untill everything is cleaned up and bringing system back into a clean state as if the operator was never installed - - KUBECONFIG={{.KUBECONFIG}} oc delete -f examples/config.yaml || true - - bin/kustomize build config/default | KUBECONFIG={{.KUBECONFIG}} oc delete --ignore-not-found=true -f - - - KUBECONFIG={{.KUBECONFIG}} oc wait --for=delete ns openshift-dpu-operator --timeout=300s + - KUBECONFIG={{.KUBECONFIG}} kubectl delete -f examples/config.yaml || true + - bin/kustomize build config/default | KUBECONFIG={{.KUBECONFIG}} kubectl delete --ignore-not-found=true -f - + - KUBECONFIG={{.KUBECONFIG}} kubectl wait --for=delete ns openshift-dpu-operator --timeout=300s - echo "Namespace 'openshift-dpu-operator' has been removed." undeploy: @@ -126,12 +126,12 @@ tasks: vars: KUBECONFIG_DPU: "{{.KUBECONFIG_DPU}}" KUBECONFIG_HOST: "{{.KUBECONFIG_HOST}}" - - bin/kustomize build bin | KUBECONFIG="{{.KUBECONFIG_DPU}}" oc apply -f - - - bin/kustomize build bin | KUBECONFIG="{{.KUBECONFIG_HOST}}" oc apply -f - - - KUBECONFIG="{{.KUBECONFIG_DPU}}" oc -n openshift-dpu-operator wait --for=condition=Available deployment/dpu-operator-controller-manager --timeout=300s - - KUBECONFIG="{{.KUBECONFIG_HOST}}" oc -n openshift-dpu-operator wait --for=condition=Available deployment/dpu-operator-controller-manager --timeout=300s - - KUBECONFIG="{{.KUBECONFIG_DPU}}" oc -n openshift-dpu-operator wait --for=condition=ready pod --all --timeout=300s - - KUBECONFIG="{{.KUBECONFIG_HOST}}" oc -n openshift-dpu-operator wait --for=condition=ready pod --all --timeout=300s + - bin/kustomize build bin | KUBECONFIG="{{.KUBECONFIG_DPU}}" kubectl apply -f - + - bin/kustomize build bin | KUBECONFIG="{{.KUBECONFIG_HOST}}" kubectl apply -f - + - KUBECONFIG="{{.KUBECONFIG_DPU}}" kubectl -n openshift-dpu-operator wait --for=condition=Available deployment/dpu-operator-controller-manager --timeout=300s + - KUBECONFIG="{{.KUBECONFIG_HOST}}" kubectl -n openshift-dpu-operator wait --for=condition=Available deployment/dpu-operator-controller-manager --timeout=300s + - KUBECONFIG="{{.KUBECONFIG_DPU}}" kubectl -n openshift-dpu-operator wait --for=condition=ready pod --all --timeout=300s + - KUBECONFIG="{{.KUBECONFIG_HOST}}" kubectl -n openshift-dpu-operator wait --for=condition=ready pod --all --timeout=300s - echo "DPU operator deployment complete - controller manager and webhook are ready" deploy-1c: @@ -141,9 +141,9 @@ tasks: - task: undeploy-1c vars: KUBECONFIG_HOST: "{{.KUBECONFIG_HOST}}" - - bin/kustomize build bin | KUBECONFIG="{{.KUBECONFIG_HOST}}" oc apply -f - - - KUBECONFIG="{{.KUBECONFIG_HOST}}" oc -n openshift-dpu-operator wait --for=condition=Available deployment/dpu-operator-controller-manager --timeout=300s - - KUBECONFIG="{{.KUBECONFIG_HOST}}" oc -n openshift-dpu-operator wait --for=condition=ready pod --all --timeout=300s + - bin/kustomize build bin | KUBECONFIG="{{.KUBECONFIG_HOST}}" kubectl apply -f - + - KUBECONFIG="{{.KUBECONFIG_HOST}}" kubectl -n openshift-dpu-operator wait --for=condition=Available deployment/dpu-operator-controller-manager --timeout=300s + - KUBECONFIG="{{.KUBECONFIG_HOST}}" kubectl -n openshift-dpu-operator wait --for=condition=ready pod --all --timeout=300s prepare-e2e-test: cmds: From 226c077ea187f925c327da0b95aefeb50f757030 Mon Sep 17 00:00:00 2001 From: Sam DaSilva Date: Mon, 2 Mar 2026 21:56:52 -0500 Subject: [PATCH 5/9] feat: Enable cert-manager configurations for upstream Signed-off-by: Sam DaSilva --- config/crd/kustomization.yaml | 6 +- config/default/kustomization.yaml | 196 +++++++++++++++--------------- 2 files changed, 101 insertions(+), 101 deletions(-) diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 8f8026c39..d6cc9b97c 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -17,9 +17,9 @@ patches: # [CERTMANAGER] To enable cert-manager, uncomment all the sections with [CERTMANAGER] prefix. # patches here are for enabling the CA injection for each CRD -#- path: patches/cainjection_in_dpuoperatorconfigs.yaml -#- path: patches/cainjection_in_servicefunctionchains.yaml -#- path: patches/cainjection_in_dataprocessingunits.yaml +- path: patches/cainjection_in_dpuoperatorconfigs.yaml +# - path: patches/cainjection_in_servicefunctionchains.yaml +# - path: patches/cainjection_in_dataprocessingunits.yaml #+kubebuilder:scaffold:crdkustomizecainjectionpatch # [WEBHOOK] To enable webhook, uncomment the following section diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 82f460350..80671fbf3 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -22,7 +22,7 @@ resources: # crd/kustomization.yaml - ../webhook # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. -#- ../certmanager +- ../certmanager # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. #- ../prometheus @@ -38,100 +38,100 @@ patches: # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. # Uncomment the following replacements to add the cert-manager CA injection annotations -#replacements: -# - source: # Add cert-manager annotation to ValidatingWebhookConfiguration, MutatingWebhookConfiguration and CRDs -# kind: Certificate -# group: cert-manager.io -# version: v1 -# name: serving-cert # this name should match the one in certificate.yaml -# fieldPath: .metadata.namespace # namespace of the certificate CR -# targets: -# - select: -# kind: ValidatingWebhookConfiguration -# fieldPaths: -# - .metadata.annotations.[cert-manager.io/inject-ca-from] -# options: -# delimiter: '/' -# index: 0 -# create: true -# - select: -# kind: MutatingWebhookConfiguration -# fieldPaths: -# - .metadata.annotations.[cert-manager.io/inject-ca-from] -# options: -# delimiter: '/' -# index: 0 -# create: true -# - select: -# kind: CustomResourceDefinition -# fieldPaths: -# - .metadata.annotations.[cert-manager.io/inject-ca-from] -# options: -# delimiter: '/' -# index: 0 -# create: true -# - source: -# kind: Certificate -# group: cert-manager.io -# version: v1 -# name: serving-cert # this name should match the one in certificate.yaml -# fieldPath: .metadata.name -# targets: -# - select: -# kind: ValidatingWebhookConfiguration -# fieldPaths: -# - .metadata.annotations.[cert-manager.io/inject-ca-from] -# options: -# delimiter: '/' -# index: 1 -# create: true -# - select: -# kind: MutatingWebhookConfiguration -# fieldPaths: -# - .metadata.annotations.[cert-manager.io/inject-ca-from] -# options: -# delimiter: '/' -# index: 1 -# create: true -# - select: -# kind: CustomResourceDefinition -# fieldPaths: -# - .metadata.annotations.[cert-manager.io/inject-ca-from] -# options: -# delimiter: '/' -# index: 1 -# create: true -# - source: # Add cert-manager annotation to the webhook Service -# kind: Service -# version: v1 -# name: webhook-service -# fieldPath: .metadata.name # namespace of the service -# targets: -# - select: -# kind: Certificate -# group: cert-manager.io -# version: v1 -# fieldPaths: -# - .spec.dnsNames.0 -# - .spec.dnsNames.1 -# options: -# delimiter: '.' -# index: 0 -# create: true -# - source: -# kind: Service -# version: v1 -# name: webhook-service -# fieldPath: .metadata.namespace # namespace of the service -# targets: -# - select: -# kind: Certificate -# group: cert-manager.io -# version: v1 -# fieldPaths: -# - .spec.dnsNames.0 -# - .spec.dnsNames.1 -# options: -# delimiter: '.' -# index: 1 -# create: true +replacements: + - source: # Add cert-manager annotation to ValidatingWebhookConfiguration, MutatingWebhookConfiguration and CRDs + kind: Certificate + group: cert-manager.io + version: v1 + name: serving-cert # this name should match the one in certificate.yaml + fieldPath: .metadata.namespace # namespace of the certificate CR + targets: + - select: + kind: ValidatingWebhookConfiguration + fieldPaths: + - .metadata.annotations.[cert-manager.io/inject-ca-from] + options: + delimiter: '/' + index: 0 + create: true + - select: + kind: MutatingWebhookConfiguration + fieldPaths: + - .metadata.annotations.[cert-manager.io/inject-ca-from] + options: + delimiter: '/' + index: 0 + create: true + - select: + kind: CustomResourceDefinition + fieldPaths: + - .metadata.annotations.[cert-manager.io/inject-ca-from] + options: + delimiter: '/' + index: 0 + create: true + - source: + kind: Certificate + group: cert-manager.io + version: v1 + name: serving-cert # this name should match the one in certificate.yaml + fieldPath: .metadata.name + targets: + - select: + kind: ValidatingWebhookConfiguration + fieldPaths: + - .metadata.annotations.[cert-manager.io/inject-ca-from] + options: + delimiter: '/' + index: 1 + create: true + - select: + kind: MutatingWebhookConfiguration + fieldPaths: + - .metadata.annotations.[cert-manager.io/inject-ca-from] + options: + delimiter: '/' + index: 1 + create: true + - select: + kind: CustomResourceDefinition + fieldPaths: + - .metadata.annotations.[cert-manager.io/inject-ca-from] + options: + delimiter: '/' + index: 1 + create: true + - source: # Add cert-manager annotation to the webhook Service + kind: Service + version: v1 + name: webhook-service + fieldPath: .metadata.name # namespace of the service + targets: + - select: + kind: Certificate + group: cert-manager.io + version: v1 + fieldPaths: + - .spec.dnsNames.0 + - .spec.dnsNames.1 + options: + delimiter: '.' + index: 0 + create: true + - source: + kind: Service + version: v1 + name: webhook-service + fieldPath: .metadata.namespace # namespace of the service + targets: + - select: + kind: Certificate + group: cert-manager.io + version: v1 + fieldPaths: + - .spec.dnsNames.0 + - .spec.dnsNames.1 + options: + delimiter: '.' + index: 1 + create: true From 1103483cc58ca4c64a87f7a7343216ab8d8426d7 Mon Sep 17 00:00:00 2001 From: Sam DaSilva Date: Wed, 18 Mar 2026 17:12:06 -0400 Subject: [PATCH 6/9] feat(cluster): Add Vanilla cluster flavor as the default Signed-off-by: Sam DaSilva --- internal/testutils/testcluster.go | 8 +------- internal/utils/cluster_environment.go | 10 ++++++---- internal/utils/path_manager.go | 4 ++-- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/internal/testutils/testcluster.go b/internal/testutils/testcluster.go index 90325dfbf..be2736566 100644 --- a/internal/testutils/testcluster.go +++ b/internal/testutils/testcluster.go @@ -861,7 +861,7 @@ func LabelNodesWithDpu(c client.Client) error { switch flavour { case utils.MicroShiftFlavour, utils.KindFlavour: return LabelAllNodesWithDpu(c) - case utils.OpenShiftFlavour: + case utils.OpenShiftFlavour, utils.VanillaFlavour: return LabelWorkerNodesWithDpu(c) default: return fmt.Errorf("unsupported cluster flavor %s", flavour) @@ -925,7 +925,6 @@ func WaitForDPUReady(c client.Client) error { return allReady, nil }) - if err != nil { return fmt.Errorf("timeout waiting for all DPU CRs to be Ready: %w", err) } @@ -954,7 +953,6 @@ func WaitForDPUReady(c client.Client) error { } func WaitForDPU(c client.Client) error { - var dpuName string err := wait.PollUntilContextTimeout(context.TODO(), time.Second, TestInitialSetupTimeout*3, true, func(ctx context.Context) (bool, error) { dpuList := &configv1.DataProcessingUnitList{} @@ -970,7 +968,6 @@ func WaitForDPU(c client.Client) error { return false, nil }) - if err != nil { return fmt.Errorf("timeout waiting for DPU resource: %w", err) } @@ -990,7 +987,6 @@ func WaitForDPU(c client.Client) error { return false, nil }) - if err != nil { return fmt.Errorf("timeout waiting for DPU %s to be Ready: %w", dpuName, err) } @@ -999,7 +995,6 @@ func WaitForDPU(c client.Client) error { } func WaitForAllPodsReady(c client.Client, namespace string) error { - err := wait.PollImmediate(time.Second, TestInitialSetupTimeout*3, func() (bool, error) { podList := &corev1.PodList{} err := c.List(context.TODO(), podList, client.InNamespace(namespace)) @@ -1026,7 +1021,6 @@ func WaitForAllPodsReady(c client.Client, namespace string) error { return true, nil }) - if err != nil { return fmt.Errorf("pods not ready after timeout: %w", err) } diff --git a/internal/utils/cluster_environment.go b/internal/utils/cluster_environment.go index b55533b0f..f36b4e750 100644 --- a/internal/utils/cluster_environment.go +++ b/internal/utils/cluster_environment.go @@ -25,6 +25,7 @@ func NewClusterEnvironment(client client.Client) *ClusterEnvironment { type Flavour string const ( + VanillaFlavour Flavour = "Vanilla" OpenShiftFlavour Flavour = "OpenShift" MicroShiftFlavour Flavour = "MicroShift" KindFlavour Flavour = "Kind" @@ -34,7 +35,7 @@ const ( func (ce *ClusterEnvironment) Flavour(ctx context.Context) (Flavour, error) { microShift, err := ce.isMicroShift(ctx) if err != nil { - return UnknownFlavour, err + return VanillaFlavour, err } if microShift { return MicroShiftFlavour, nil @@ -42,7 +43,7 @@ func (ce *ClusterEnvironment) Flavour(ctx context.Context) (Flavour, error) { openShift, err := ce.isOpenShift(ctx) if err != nil { - return UnknownFlavour, err + return VanillaFlavour, err } if openShift { return OpenShiftFlavour, nil @@ -50,12 +51,13 @@ func (ce *ClusterEnvironment) Flavour(ctx context.Context) (Flavour, error) { kind, err := ce.isKind(ctx) if err != nil { - return UnknownFlavour, err + return VanillaFlavour, err } if kind { return KindFlavour, nil } - return UnknownFlavour, nil + + return VanillaFlavour, nil } func (ce *ClusterEnvironment) isMicroShift(ctx context.Context) (bool, error) { diff --git a/internal/utils/path_manager.go b/internal/utils/path_manager.go index a445bcf87..ad5ca6e97 100644 --- a/internal/utils/path_manager.go +++ b/internal/utils/path_manager.go @@ -45,10 +45,10 @@ func (p *PathManager) CniHostDir(clusterFlavour Flavour, filesystemMode Filesyst case clusterFlavour == MicroShiftFlavour && filesystemMode == ImageMode: return p.wrap("/run/cni"), nil // OpenShift typically uses /var/lib/cni regardless of filesystem mode since nodes are always coreos based - case clusterFlavour == OpenShiftFlavour: + case clusterFlavour == OpenShiftFlavour || (clusterFlavour == VanillaFlavour && filesystemMode == ImageMode): return p.wrap("/var/lib/cni"), nil // MicroShift with PackageMode and Kind use /opt/cni - case (clusterFlavour == MicroShiftFlavour && filesystemMode == PackageMode) || clusterFlavour == KindFlavour: + case (clusterFlavour == MicroShiftFlavour && filesystemMode == PackageMode) || (clusterFlavour == VanillaFlavour && filesystemMode == PackageMode) || clusterFlavour == KindFlavour: return p.wrap("/opt/cni"), nil default: return "", fmt.Errorf("unknown combination of cluster flavour (%s) and filesystem mode (%s)", clusterFlavour, filesystemMode) From a7336ffc77229c24a4a0cea2b7fab88abd5aae5f Mon Sep 17 00:00:00 2001 From: Sam DaSilva Date: Mon, 23 Mar 2026 14:32:01 -0400 Subject: [PATCH 7/9] feat: add flavor-aware NRI TLS provisioning Signed-off-by: Sam DaSilva --- .../00_service.yaml | 11 ++++ .../01_webhook.yaml | 27 ++++++++ .../02_serviceaccount.yaml | 5 ++ .../03_secret.yaml | 8 +++ ...lusterrole_network_resources_injector.yaml | 59 +++++++++++++++++ .../05_clusterrole_secrets.yaml | 11 ++++ .../06_clusterrole_webhook_configs.yaml | 11 ++++ .../07_clusterrole_service.yaml | 17 +++++ .../08_clusterrole_configmaps.yaml | 11 ++++ ...twork_resources_injector_role_binding.yaml | 11 ++++ .../10_clusterrolebinding_secrets.yaml | 12 ++++ ...11_clusterrolebinding_webhook_configs.yaml | 12 ++++ .../12_clusterrolebinding_service.yaml | 12 ++++ .../13_clusterrolebinding_configmaps.yaml | 12 ++++ .../14_server.yaml | 63 +++++++++++++++++++ .../15_issuer.yaml | 7 +++ .../16_certificate.yaml | 18 ++++++ .../dpuoperatorconfig_controller.go | 44 ++++++++++++- 18 files changed, 350 insertions(+), 1 deletion(-) create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/00_service.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/01_webhook.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/02_serviceaccount.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/03_secret.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/04_clusterrole_network_resources_injector.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/05_clusterrole_secrets.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/06_clusterrole_webhook_configs.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/07_clusterrole_service.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/08_clusterrole_configmaps.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/09_clusterrolebinding_network_resources_injector_role_binding.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/10_clusterrolebinding_secrets.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/11_clusterrolebinding_webhook_configs.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/12_clusterrolebinding_service.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/13_clusterrolebinding_configmaps.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/14_server.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/15_issuer.yaml create mode 100644 internal/controller/bindata/network-resources-injector-certmanager/16_certificate.yaml diff --git a/internal/controller/bindata/network-resources-injector-certmanager/00_service.yaml b/internal/controller/bindata/network-resources-injector-certmanager/00_service.yaml new file mode 100644 index 000000000..a29632957 --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/00_service.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Service +metadata: + name: network-resources-injector-service + namespace: {{.Namespace}} +spec: + ports: + - port: 443 + targetPort: 8443 + selector: + app: network-resources-injector diff --git a/internal/controller/bindata/network-resources-injector-certmanager/01_webhook.yaml b/internal/controller/bindata/network-resources-injector-certmanager/01_webhook.yaml new file mode 100644 index 000000000..47f119682 --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/01_webhook.yaml @@ -0,0 +1,27 @@ +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: network-resources-injector-config + annotations: + cert-manager.io/inject-ca-from: "{{.Namespace}}/network-resources-injector-serving-cert" +webhooks: + - name: network-resources-injector-config.k8s.io + sideEffects: None + admissionReviewVersions: ["v1", "v1beta1"] + clientConfig: + service: + name: network-resources-injector-service + namespace: {{.Namespace}} + path: "/mutate" + namespaceSelector: + matchExpressions: + - key: "kubernetes.io/metadata.name" + operator: "NotIn" + values: + - {{.Namespace}} + rules: + - operations: [ "CREATE" ] + apiGroups: ["apps", ""] + apiVersions: ["v1"] + resources: ["pods"] diff --git a/internal/controller/bindata/network-resources-injector-certmanager/02_serviceaccount.yaml b/internal/controller/bindata/network-resources-injector-certmanager/02_serviceaccount.yaml new file mode 100644 index 000000000..5a4113857 --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/02_serviceaccount.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + namespace: {{.Namespace}} + name: network-resources-injector-sa diff --git a/internal/controller/bindata/network-resources-injector-certmanager/03_secret.yaml b/internal/controller/bindata/network-resources-injector-certmanager/03_secret.yaml new file mode 100644 index 000000000..13b62fa1a --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/03_secret.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: network-resources-injector-sa-secret + namespace: {{.Namespace}} + annotations: + kubernetes.io/service-account.name: network-resources-injector-sa +type: kubernetes.io/service-account-token diff --git a/internal/controller/bindata/network-resources-injector-certmanager/04_clusterrole_network_resources_injector.yaml b/internal/controller/bindata/network-resources-injector-certmanager/04_clusterrole_network_resources_injector.yaml new file mode 100644 index 000000000..108989813 --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/04_clusterrole_network_resources_injector.yaml @@ -0,0 +1,59 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: network-resources-injector +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - '*' +- apiGroups: + - "" + resources: + - secrets + verbs: + - '*' +- apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - get + - list + - watch + - create + - delete +- apiGroups: + - k8s.cni.cncf.io + resources: + - network-attachment-definitions + verbs: + - 'watch' + - 'list' + - 'get' +- apiGroups: + - "" + resources: + - configmaps + verbs: + - 'get' +- apiGroups: + - apps + resources: + - deployments + verbs: + - 'watch' + - 'list' + - 'get' +- apiGroups: + - security.openshift.io + resourceNames: + - anyuid + - hostnetwork + - privileged + resources: + - securitycontextconstraints + verbs: + - 'use' diff --git a/internal/controller/bindata/network-resources-injector-certmanager/05_clusterrole_secrets.yaml b/internal/controller/bindata/network-resources-injector-certmanager/05_clusterrole_secrets.yaml new file mode 100644 index 000000000..661f9b333 --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/05_clusterrole_secrets.yaml @@ -0,0 +1,11 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: network-resources-injector-secrets +rules: +- apiGroups: + - "" + resources: + - secrets + verbs: + - '*' diff --git a/internal/controller/bindata/network-resources-injector-certmanager/06_clusterrole_webhook_configs.yaml b/internal/controller/bindata/network-resources-injector-certmanager/06_clusterrole_webhook_configs.yaml new file mode 100644 index 000000000..d8a2e6e70 --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/06_clusterrole_webhook_configs.yaml @@ -0,0 +1,11 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: network-resources-injector-webhook-configs +rules: +- apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + verbs: + - '*' diff --git a/internal/controller/bindata/network-resources-injector-certmanager/07_clusterrole_service.yaml b/internal/controller/bindata/network-resources-injector-certmanager/07_clusterrole_service.yaml new file mode 100644 index 000000000..7bec6c7ea --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/07_clusterrole_service.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: network-resources-injector-service +rules: +- apiGroups: + - "" + resources: + - services + verbs: + - '*' +- apiGroups: + - "" + resources: + - pods + verbs: + - '*' diff --git a/internal/controller/bindata/network-resources-injector-certmanager/08_clusterrole_configmaps.yaml b/internal/controller/bindata/network-resources-injector-certmanager/08_clusterrole_configmaps.yaml new file mode 100644 index 000000000..b58c11ad1 --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/08_clusterrole_configmaps.yaml @@ -0,0 +1,11 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: network-resources-injector-configmaps +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - 'get' diff --git a/internal/controller/bindata/network-resources-injector-certmanager/09_clusterrolebinding_network_resources_injector_role_binding.yaml b/internal/controller/bindata/network-resources-injector-certmanager/09_clusterrolebinding_network_resources_injector_role_binding.yaml new file mode 100644 index 000000000..65e17756b --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/09_clusterrolebinding_network_resources_injector_role_binding.yaml @@ -0,0 +1,11 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: network-resources-injector-role-binding +roleRef: + kind: ClusterRole + name: network-resources-injector +subjects: +- kind: ServiceAccount + name: network-resources-injector-sa + namespace: {{.Namespace}} diff --git a/internal/controller/bindata/network-resources-injector-certmanager/10_clusterrolebinding_secrets.yaml b/internal/controller/bindata/network-resources-injector-certmanager/10_clusterrolebinding_secrets.yaml new file mode 100644 index 000000000..0732a6834 --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/10_clusterrolebinding_secrets.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: network-resources-injector-secrets-role-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: network-resources-injector-secrets +subjects: +- kind: ServiceAccount + name: network-resources-injector-sa + namespace: {{.Namespace}} diff --git a/internal/controller/bindata/network-resources-injector-certmanager/11_clusterrolebinding_webhook_configs.yaml b/internal/controller/bindata/network-resources-injector-certmanager/11_clusterrolebinding_webhook_configs.yaml new file mode 100644 index 000000000..57a8595f0 --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/11_clusterrolebinding_webhook_configs.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: network-resources-injector-webhook-configs-role-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: network-resources-injector-webhook-configs +subjects: +- kind: ServiceAccount + name: network-resources-injector-sa + namespace: {{.Namespace}} diff --git a/internal/controller/bindata/network-resources-injector-certmanager/12_clusterrolebinding_service.yaml b/internal/controller/bindata/network-resources-injector-certmanager/12_clusterrolebinding_service.yaml new file mode 100644 index 000000000..5ea93cf5f --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/12_clusterrolebinding_service.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: network-resources-injector-service-role-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: network-resources-injector-service +subjects: +- kind: ServiceAccount + name: network-resources-injector-sa + namespace: {{.Namespace}} diff --git a/internal/controller/bindata/network-resources-injector-certmanager/13_clusterrolebinding_configmaps.yaml b/internal/controller/bindata/network-resources-injector-certmanager/13_clusterrolebinding_configmaps.yaml new file mode 100644 index 000000000..9f94516a8 --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/13_clusterrolebinding_configmaps.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: network-resources-injector-configmaps-role-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: network-resources-injector-configmaps +subjects: +- kind: ServiceAccount + name: network-resources-injector-sa + namespace: {{.Namespace}} diff --git a/internal/controller/bindata/network-resources-injector-certmanager/14_server.yaml b/internal/controller/bindata/network-resources-injector-certmanager/14_server.yaml new file mode 100644 index 000000000..883b5fa48 --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/14_server.yaml @@ -0,0 +1,63 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: network-resources-injector + name: network-resources-injector + namespace: {{.Namespace}} +spec: + selector: + matchLabels: + app: network-resources-injector + template: + metadata: + labels: + app: network-resources-injector + spec: + serviceAccount: network-resources-injector-sa + containers: + - name: webhook-server + image: {{.NRIWebhookImage}} + imagePullPolicy: {{.ImagePullPolicy}} + command: + - /webhook + args: + - -bind-address=0.0.0.0 + - -port=8443 + - -tls-private-key-file=/etc/tls/tls.key + - -tls-cert-file=/etc/tls/tls.crt + - -insecure=true + - -health-check-port=8444 + - -logtostderr + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + securityContext: + privileged: true + capabilities: + drop: + - ALL + add: ["NET_BIND_SERVICE"] + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /etc/tls + name: tls + resources: + requests: + memory: "50Mi" + cpu: "250m" + limits: + memory: "200Mi" + cpu: "500m" + livenessProbe: + httpGet: + path: /healthz + port: 8444 + initialDelaySeconds: 10 + periodSeconds: 5 + volumes: + - name: tls + secret: + secretName: network-resources-injector-secret diff --git a/internal/controller/bindata/network-resources-injector-certmanager/15_issuer.yaml b/internal/controller/bindata/network-resources-injector-certmanager/15_issuer.yaml new file mode 100644 index 000000000..5ff9aa6aa --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/15_issuer.yaml @@ -0,0 +1,7 @@ +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: network-resources-injector-selfsigned + namespace: {{.Namespace}} +spec: + selfSigned: {} diff --git a/internal/controller/bindata/network-resources-injector-certmanager/16_certificate.yaml b/internal/controller/bindata/network-resources-injector-certmanager/16_certificate.yaml new file mode 100644 index 000000000..9aa358148 --- /dev/null +++ b/internal/controller/bindata/network-resources-injector-certmanager/16_certificate.yaml @@ -0,0 +1,18 @@ +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: network-resources-injector-serving-cert + namespace: {{.Namespace}} +spec: + secretName: network-resources-injector-secret + issuerRef: + name: network-resources-injector-selfsigned + kind: Issuer + dnsNames: + - network-resources-injector-service + - network-resources-injector-service.{{.Namespace}} + - network-resources-injector-service.{{.Namespace}}.svc + usages: + - digital signature + - key encipherment + - server auth diff --git a/internal/controller/dpuoperatorconfig_controller.go b/internal/controller/dpuoperatorconfig_controller.go index b3d3ebcc0..8722e9e2b 100644 --- a/internal/controller/dpuoperatorconfig_controller.go +++ b/internal/controller/dpuoperatorconfig_controller.go @@ -27,9 +27,11 @@ import ( "github.com/openshift/dpu-operator/internal/utils" "github.com/openshift/dpu-operator/pkgs/render" "github.com/openshift/dpu-operator/pkgs/vars" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -90,6 +92,7 @@ func (r *DpuOperatorConfigReconciler) WithImagePullPolicy(policy string) *DpuOpe //+kubebuilder:rbac:groups="",resources=services,verbs=* //+kubebuilder:rbac:groups=admissionregistration.k8s.io,resources=mutatingwebhookconfigurations,verbs=* //+kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch +//+kubebuilder:rbac:groups=cert-manager.io,resources=issuers;certificates,verbs=* //+kubebuilder:rbac:groups=apps,resources=daemonsets,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=apps,resources=replicasets,verbs=get;list;watch;create;update;patch;delete @@ -299,6 +302,7 @@ func (r *DpuOperatorConfigReconciler) yamlVars() map[string]string { "ImagePullPolicy": r.imagePullPolicy, "ResourceName": "openshift.io/dpu", // FIXME: Hardcode for now "CniDir": p, + "ClusterFlavour": string(flavour), } return data @@ -321,9 +325,47 @@ func (r *DpuOperatorConfigReconciler) ensureDpuDeamonSet(ctx context.Context, cf func (r *DpuOperatorConfigReconciler) ensureNetworkResourcesInjector(ctx context.Context, cfg *configv1.DpuOperatorConfig) error { logger := log.FromContext(ctx) + ce := utils.NewClusterEnvironment(r.Client) + flavour, err := ce.Flavour(ctx) + if err != nil { + return fmt.Errorf("failed to detect cluster flavour for network resources injector: %w", err) + } + + binDataPath := "network-resources-injector" + switch flavour { + case utils.OpenShiftFlavour, utils.MicroShiftFlavour: + binDataPath = "network-resources-injector" + default: + if err := r.ensureCertManagerInstalled(ctx, flavour); err != nil { + return err + } + binDataPath = "network-resources-injector-certmanager" + } + logger.Info("Create Network Resources Injector") - return r.createAndApplyAllFromBinData(logger, "network-resources-injector", cfg) + logger.Info("Selected network resources injector manifest set", "flavour", flavour, "path", binDataPath) + return r.createAndApplyAllFromBinData(logger, binDataPath, cfg) +} + +func (r *DpuOperatorConfigReconciler) ensureCertManagerInstalled(ctx context.Context, flavour utils.Flavour) error { + requiredCRDs := []string{ + "certificates.cert-manager.io", + "issuers.cert-manager.io", + } + + for _, crdName := range requiredCRDs { + crd := &apiextensionsv1.CustomResourceDefinition{} + if err := r.Get(ctx, types.NamespacedName{Name: crdName}, crd); err != nil { + if apierrors.IsNotFound(err) { + return fmt.Errorf("cert-manager is required on %s clusters for network-resources-injector TLS provisioning: missing CRD %q", flavour, crdName) + } + return fmt.Errorf("failed to verify cert-manager CRD %q on %s cluster: %w", crdName, flavour, err) + } + } + + return nil } + func (r *DpuOperatorConfigReconciler) ensureNetworkFunctioNAD(ctx context.Context, cfg *configv1.DpuOperatorConfig) error { logger := log.FromContext(ctx) From 0551cf6329e095ceb8fdc6cf0764b758cca9b469 Mon Sep 17 00:00:00 2001 From: Sam DaSilva Date: Mon, 23 Mar 2026 14:32:11 -0400 Subject: [PATCH 8/9] fix(deploy): resolve OpenShift webhook TLS clash Signed-off-by: Sam DaSilva --- config/rbac/role.yaml | 13 +++++++++ taskfile.yaml | 65 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 70 insertions(+), 8 deletions(-) diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 42ef81004..001b01f40 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -41,6 +41,19 @@ rules: - get - list - watch +- apiGroups: + - cert-manager.io + resources: + - certificates + - issuers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - apps resources: diff --git a/taskfile.yaml b/taskfile.yaml index 4dad66332..f727f04bc 100644 --- a/taskfile.yaml +++ b/taskfile.yaml @@ -109,6 +109,54 @@ tasks: -template-file config/dev/local-images-template.yaml -output-file bin/local-images.yaml - cp config/dev/kustomization.yaml bin + + deploy-webhook-compat: + internal: true + vars: + KUBECONFIG: '{{.KUBECONFIG}}' + cmds: + - | + KUBECONFIG={{.KUBECONFIG}} python3 - <<'PY' + import json + import os + import subprocess + + env = dict(os.environ) + env["KUBECONFIG"] = os.environ["KUBECONFIG"] + + data = json.loads(subprocess.check_output([ + "kubectl", + "get", + "validatingwebhookconfigurations", + "-o", + "json", + ], env=env)) + + for item in data.get("items", []): + name = item.get("metadata", {}).get("name", "") + if name == "dpu-operator-validating-webhook-configuration": + continue + + for webhook in item.get("webhooks", []): + if webhook.get("name") == "vdpuoperatorconfig.kb.io": + subprocess.run([ + "kubectl", + "delete", + "validatingwebhookconfiguration", + name, + "--ignore-not-found=true", + ], env=env, check=False) + break + PY + - | + if KUBECONFIG={{.KUBECONFIG}} kubectl get --raw /apis/route.openshift.io/v1 >/dev/null 2>&1; then + KUBECONFIG={{.KUBECONFIG}} kubectl annotate validatingwebhookconfiguration dpu-operator-validating-webhook-configuration cert-manager.io/inject-ca-from- --overwrite || true + KUBECONFIG={{.KUBECONFIG}} kubectl -n openshift-dpu-operator delete certificate dpu-operator-serving-cert --ignore-not-found=true + KUBECONFIG={{.KUBECONFIG}} kubectl -n openshift-dpu-operator delete issuer dpu-operator-selfsigned-issuer --ignore-not-found=true + KUBECONFIG={{.KUBECONFIG}} kubectl -n openshift-dpu-operator delete secret webhook-server-cert --ignore-not-found=true + KUBECONFIG={{.KUBECONFIG}} kubectl -n openshift-dpu-operator wait --for=create secret/webhook-server-cert --timeout=180s + KUBECONFIG={{.KUBECONFIG}} kubectl wait --for=jsonpath='{.webhooks[0].clientConfig.caBundle}' validatingwebhookconfiguration/dpu-operator-validating-webhook-configuration --timeout=180s + fi ## Download envtest-setup locally if necessary envtest: @@ -128,6 +176,12 @@ tasks: KUBECONFIG_HOST: "{{.KUBECONFIG_HOST}}" - bin/kustomize build bin | KUBECONFIG="{{.KUBECONFIG_DPU}}" kubectl apply -f - - bin/kustomize build bin | KUBECONFIG="{{.KUBECONFIG_HOST}}" kubectl apply -f - + - task: deploy-webhook-compat + vars: + KUBECONFIG: "{{.KUBECONFIG_DPU}}" + - task: deploy-webhook-compat + vars: + KUBECONFIG: "{{.KUBECONFIG_HOST}}" - KUBECONFIG="{{.KUBECONFIG_DPU}}" kubectl -n openshift-dpu-operator wait --for=condition=Available deployment/dpu-operator-controller-manager --timeout=300s - KUBECONFIG="{{.KUBECONFIG_HOST}}" kubectl -n openshift-dpu-operator wait --for=condition=Available deployment/dpu-operator-controller-manager --timeout=300s - KUBECONFIG="{{.KUBECONFIG_DPU}}" kubectl -n openshift-dpu-operator wait --for=condition=ready pod --all --timeout=300s @@ -142,6 +196,9 @@ tasks: vars: KUBECONFIG_HOST: "{{.KUBECONFIG_HOST}}" - bin/kustomize build bin | KUBECONFIG="{{.KUBECONFIG_HOST}}" kubectl apply -f - + - task: deploy-webhook-compat + vars: + KUBECONFIG: "{{.KUBECONFIG_HOST}}" - KUBECONFIG="{{.KUBECONFIG_HOST}}" kubectl -n openshift-dpu-operator wait --for=condition=Available deployment/dpu-operator-controller-manager --timeout=300s - KUBECONFIG="{{.KUBECONFIG_HOST}}" kubectl -n openshift-dpu-operator wait --for=condition=ready pod --all --timeout=300s @@ -196,14 +253,6 @@ tasks: {{.BINDIR}}/ginkgo -coverprofile cover.out ./e2e_test/... - KUBECONFIG_HOST={{.KUBECONFIG_HOST}} sh hack/traffic_flow_tests.sh - prepare-e2e-test: - cmds: - - > - if [ "{{.SUBMODULES}}" = "true" ]; then - hack/prepare-submodules.sh - fi - hack/prepare-venv.sh - redeploy: cmds: - task: build-image-all From 6d4f47320b9fe55a05dc2831db5e762ee56ab8e6 Mon Sep 17 00:00:00 2001 From: Sam DaSilva Date: Mon, 23 Mar 2026 14:32:11 -0400 Subject: [PATCH 9/9] fix(vsp): make p4 hostPath writable Signed-off-by: Sam DaSilva --- .../bindata/vsp/intel-ipu/99.vsp-pod.yaml | 4 +- .../bindata/vsp/marvell-dpu/99.vsp-pod.yaml | 4 +- .../dataprocessingunit_controller.go | 33 ++++++++ internal/daemon/daemon.go | 79 +++++++++++++++---- internal/utils/labels.go | 13 +++ 5 files changed, 112 insertions(+), 21 deletions(-) create mode 100644 internal/utils/labels.go diff --git a/internal/controller/bindata/vsp/intel-ipu/99.vsp-pod.yaml b/internal/controller/bindata/vsp/intel-ipu/99.vsp-pod.yaml index 114fc4b27..611d99103 100644 --- a/internal/controller/bindata/vsp/intel-ipu/99.vsp-pod.yaml +++ b/internal/controller/bindata/vsp/intel-ipu/99.vsp-pod.yaml @@ -43,8 +43,8 @@ spec: type: "" name: host-proc - hostPath: - path: /opt/p4/p4-cp-nws/var - type: "" + path: {{.P4StateHostPath}} + type: DirectoryOrCreate name: host-opt - hostPath: path: /var/run/ diff --git a/internal/controller/bindata/vsp/marvell-dpu/99.vsp-pod.yaml b/internal/controller/bindata/vsp/marvell-dpu/99.vsp-pod.yaml index cdbab78da..9dd8465d2 100644 --- a/internal/controller/bindata/vsp/marvell-dpu/99.vsp-pod.yaml +++ b/internal/controller/bindata/vsp/marvell-dpu/99.vsp-pod.yaml @@ -46,8 +46,8 @@ spec: type: "" name: host-proc - hostPath: - path: /opt/p4/p4-cp-nws/var - type: "" + path: {{.P4StateHostPath}} + type: DirectoryOrCreate name: host-opt - hostPath: path: /var/run/ diff --git a/internal/controller/dataprocessingunit_controller.go b/internal/controller/dataprocessingunit_controller.go index efabd7425..164498008 100644 --- a/internal/controller/dataprocessingunit_controller.go +++ b/internal/controller/dataprocessingunit_controller.go @@ -26,6 +26,7 @@ import ( configv1 "github.com/openshift/dpu-operator/api/v1" "github.com/openshift/dpu-operator/internal/images" "github.com/openshift/dpu-operator/internal/platform" + "github.com/openshift/dpu-operator/internal/utils" "github.com/openshift/dpu-operator/pkgs/render" "github.com/openshift/dpu-operator/pkgs/vars" corev1 "k8s.io/api/core/v1" @@ -67,6 +68,7 @@ func (r *DataProcessingUnitReconciler) WithImagePullPolicy(policy string) *DataP } // +kubebuilder:rbac:groups="",resources=pods,verbs=* +// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch // +kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=secrets,verbs=* // +kubebuilder:rbac:groups="",resources=services,verbs=* @@ -137,11 +139,17 @@ func (r *DataProcessingUnitReconciler) ensureVSPResources(ctx context.Context, d return fmt.Errorf("failed to get VSP image for DPU type %s: %v", dpu.Spec.DpuProductName, err) } + p4StateHostPath, err := r.resolveP4StateHostPath(ctx, dpu) + if err != nil { + return err + } + additionalVars := map[string]string{ "Namespace": vars.Namespace, "VspName": r.getVSPName(dpu), "DpuName": dpu.Name, "NodeName": dpu.Spec.NodeName, + "P4StateHostPath": p4StateHostPath, "VendorSpecificPluginImage": vspImage, "ImagePullPolicy": r.imagePullPolicy, "Command": "[]", @@ -170,6 +178,31 @@ func (r *DataProcessingUnitReconciler) ensureVSPResources(ctx context.Context, d return nil } +func (r *DataProcessingUnitReconciler) resolveP4StateHostPath(ctx context.Context, dpu *configv1.DataProcessingUnit) (string, error) { + node := &corev1.Node{} + if err := r.Get(ctx, client.ObjectKey{Name: dpu.Spec.NodeName}, node); err != nil { + return "", fmt.Errorf("failed to get node %s for DataProcessingUnit %s: %w", dpu.Spec.NodeName, dpu.Name, err) + } + + if node.Labels == nil { + return "", fmt.Errorf("missing %s label on node %s for DataProcessingUnit %s", utils.P4HostPathLabelKey, dpu.Spec.NodeName, dpu.Name) + } + + mode, exists := node.Labels[utils.P4HostPathLabelKey] + if !exists { + return "", fmt.Errorf("missing %s label on node %s for DataProcessingUnit %s", utils.P4HostPathLabelKey, dpu.Spec.NodeName, dpu.Name) + } + + switch mode { + case utils.P4HostPathLabelValueOpt: + return "/opt/p4/p4-cp-nws/var", nil + case utils.P4HostPathLabelValueVarOpt: + return "/var/opt/p4/p4-cp-nws/var", nil + default: + return "", fmt.Errorf("invalid %s label value %q on node %s for DataProcessingUnit %s", utils.P4HostPathLabelKey, mode, dpu.Spec.NodeName, dpu.Name) + } +} + func (r *DataProcessingUnitReconciler) applyVSPResourcesWithTracking(logger logr.Logger, binDataPath string, data map[string]string, owner client.Object, dpuName string) error { // Get or create VSP resource renderer for this DataProcessingUnit renderer, exists := r.vspResourceRenderers[dpuName] diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 2365bc1e9..030d67ae4 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -16,6 +16,7 @@ import ( "github.com/openshift/dpu-operator/internal/scheme" "github.com/openshift/dpu-operator/internal/utils" + "golang.org/x/sys/unix" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -29,7 +30,7 @@ import ( var () -const DpuSideLabelKey = "dpu.config.openshift.io/dpuside" +const DpuSideLabelKey = utils.DpuSideLabelKey type SideManager interface { StartVsp(ctx context.Context) error @@ -562,50 +563,94 @@ func (d *Daemon) updateNodeLabels() error { return fmt.Errorf("Failed to get node %s: %v", d.nodeName, err) } + p4HostPathMode, err := detectP4HostPathMode() + if err != nil { + d.log.Error(err, "Failed to auto-detect host P4 path mode, defaulting to /opt") + p4HostPathMode = utils.P4HostPathLabelValueOpt + } + + if node.Labels == nil { + node.Labels = make(map[string]string) + } + + changed := false + if node.Labels[utils.P4HostPathLabelKey] != p4HostPathMode { + node.Labels[utils.P4HostPathLabelKey] = p4HostPathMode + changed = true + } + // Determine the label value based on detected DPUs var labelValue string if len(d.managedDpus) == 0 { // No DPUs detected, remove the label if it exists - if node.Labels != nil { - if _, exists := node.Labels[DpuSideLabelKey]; exists { - delete(node.Labels, DpuSideLabelKey) - err := d.client.Update(context.TODO(), node) - if err != nil { - return fmt.Errorf("Failed to remove DPU side label from node %s: %v", d.nodeName, err) - } - d.log.Info("Removed DPU side label from node", "nodeName", d.nodeName) + if _, exists := node.Labels[DpuSideLabelKey]; exists { + delete(node.Labels, DpuSideLabelKey) + changed = true + } + + if changed { + err := d.client.Update(context.TODO(), node) + if err != nil { + return fmt.Errorf("Failed to update labels on node %s: %v", d.nodeName, err) } + d.log.Info("Updated node labels", "nodeName", d.nodeName, "p4HostPathMode", p4HostPathMode) } + return nil } for _, managedDpu := range d.managedDpus { if managedDpu.DpuCR.Spec.IsDpuSide { - labelValue = "dpu" + labelValue = utils.DpuSideLabelValueDpu } else { - labelValue = "dpu-host" + labelValue = utils.DpuSideLabelValueHost } break // It is a bug if there is node with managedDPU that is both hosting a DPU and is a DPU itself. Hense we only need to look at the first managedDPU DPU CR. } - if node.Labels == nil { - node.Labels = make(map[string]string) - } - // Check if the label needs to be updated currentValue, exists := node.Labels[DpuSideLabelKey] if !exists || currentValue != labelValue { node.Labels[DpuSideLabelKey] = labelValue + changed = true + } + + if changed { err := d.client.Update(context.TODO(), node) if err != nil { - return fmt.Errorf("Failed to update DPU side label on node %s: %v", d.nodeName, err) + return fmt.Errorf("Failed to update labels on node %s: %v", d.nodeName, err) } - d.log.Info("Updated DPU side label on node", "nodeName", d.nodeName, "labelValue", labelValue) + d.log.Info("Updated node labels", "nodeName", d.nodeName, "dpuSide", labelValue, "p4HostPathMode", p4HostPathMode) } return nil } +func detectP4HostPathMode() (string, error) { + paths := []string{"/proc/1/root/opt", "/opt"} + var lastErr error + + for _, path := range paths { + var fsStat unix.Statfs_t + if err := unix.Statfs(path, &fsStat); err != nil { + lastErr = err + continue + } + + if fsStat.Flags&unix.ST_RDONLY != 0 { + return utils.P4HostPathLabelValueVarOpt, nil + } + + return utils.P4HostPathLabelValueOpt, nil + } + + if lastErr != nil { + return "", fmt.Errorf("failed to stat host /opt filesystem: %w", lastErr) + } + + return "", fmt.Errorf("failed to stat host /opt filesystem") +} + // setOwnerReference sets the DpuOperatorConfig as the owner of the DataProcessingUnit CR func (d *Daemon) setOwnerReference(dpuCR *configv1.DataProcessingUnit) error { // Find the DpuOperatorConfig that should own this DPU CR diff --git a/internal/utils/labels.go b/internal/utils/labels.go new file mode 100644 index 000000000..ce8d3a0e3 --- /dev/null +++ b/internal/utils/labels.go @@ -0,0 +1,13 @@ +package utils + +const ( + DpuSideLabelKey = "dpu.config.openshift.io/dpuside" + + DpuSideLabelValueDpu = "dpu" + DpuSideLabelValueHost = "dpu-host" + + P4HostPathLabelKey = "dpu.config.openshift.io/p4hostpath" + + P4HostPathLabelValueOpt = "opt" + P4HostPathLabelValueVarOpt = "varopt" +)