stackrox · janisz · Jan 29, 2026 · Jan 29, 2026 · Jan 29, 2026 · Jan 29, 2026
@@ -0,0 +1,57 @@
+name: WireMock Smoke Test
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
+
+jobs:
+  wiremock-smoke-test:
+    name: WireMock Smoke Test
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+
+      - name: Set up Java
+        uses: actions/setup-java@v4
+        with:
+          distribution: 'temurin'
+          java-version: '11'
+
+      - name: Install protoc
+        run: |
+          PROTOC_VERSION=3.20.1
+          PROTOC_ZIP=protoc-${PROTOC_VERSION}-linux-x86_64.zip
+          curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/${PROTOC_ZIP}
+          sudo unzip -o ${PROTOC_ZIP} -d /usr/local bin/protoc
+          sudo unzip -o ${PROTOC_ZIP} -d /usr/local 'include/*'
+          rm -f ${PROTOC_ZIP}
+
+      - name: Download Go dependencies
+        run: go mod download
+
+      - name: Setup proto files from go mod cache
+        run: ./scripts/setup-proto-files.sh
+
+      - name: Run smoke test
+        run: ./scripts/smoke-test-wiremock.sh
+
+      - name: Upload logs on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: wiremock-logs
+          path: wiremock/wiremock.log
+          if-no-files-found: ignore
@@ -22,3 +22,12 @@
 /e2e-tests/mcp-reports/
 /e2e-tests/bin/
 /e2e-tests/**/*-out.json
+
+# WireMock
+/wiremock/lib/*.jar
+/wiremock/*.pid
+/wiremock/*.log
+/wiremock/__files
+/wiremock/proto/
+/wiremock/grpc/
+/wiremock/certs/
@@ -61,9 +61,9 @@ test: ## Run unit tests
 e2e-smoke-test: ## Run E2E smoke test (build and verify mcpchecker)
 	@cd e2e-tests && ./scripts/smoke-test.sh
 
-.PHONY: e2e-test
+.PHONY: e2e-test mock-start
 e2e-test: ## Run E2E tests
-	@cd e2e-tests && ./scripts/run-tests.sh
+	@cd e2e-tests && ./scripts/run-tests.sh --mock
 
 .PHONY: test-coverage-and-junit
 test-coverage-and-junit: ## Run unit tests with coverage and junit output
@@ -91,6 +91,63 @@ lint: ## Run golangci-lint
 	go install -v "github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.6"
 	golangci-lint run
 
+.PHONY: proto-setup
+proto-setup: ## Setup proto files from go mod cache
+	@./scripts/setup-proto-files.sh
+
+.PHONY: proto-generate
+proto-generate: ## Generate proto descriptors for WireMock
+	@./scripts/generate-proto-descriptors.sh
+
+.PHONY: proto-clean
+proto-clean: ## Clean generated proto files
+	@rm -rf wiremock/proto/ wiremock/grpc/
+
+.PHONY: proto-check
+proto-check: ## Verify proto setup is correct
+	@if [ ! -f wiremock/proto/descriptors/stackrox.pb ]; then \
+		echo "❌ Proto descriptors not found"; \
+		echo "Run: make proto-generate"; \
+		exit 1; \
+	fi
+	@echo "✓ Proto descriptors present"
+
+.PHONY: mock-download
+mock-download: ## Download WireMock JARs
+	@./scripts/download-wiremock.sh
+
+.PHONY: mock-start
+mock-start: proto-check ## Start WireMock mock Central locally
+	@./scripts/start-mock-central.sh
+
+.PHONY: mock-stop
+mock-stop: ## Stop WireMock mock Central
+	@./scripts/stop-mock-central.sh
+
+.PHONY: mock-logs
+mock-logs: ## View WireMock logs
+	@tail -f wiremock/wiremock.log
+
+.PHONY: mock-restart
+mock-restart: mock-stop mock-start ## Restart WireMock
+
+.PHONY: mock-status
+mock-status: ## Check WireMock status
+	@if [ -f wiremock/wiremock.pid ]; then \
+		PID=$$(cat wiremock/wiremock.pid); \
+		if ps -p $$PID > /dev/null 2>&1; then \
+			echo "WireMock is running (PID: $$PID)"; \
+		else \
+			echo "WireMock PID file exists but process not running"; \
+		fi \
+	else \
+		echo "WireMock is not running"; \
+	fi
+
+.PHONY: mock-test
+mock-test: ## Run WireMock smoke tests
+	@./scripts/smoke-test-wiremock.sh
+
 .PHONY: clean
 clean: ## Clean build artifacts and coverage files
 	$(GOCLEAN)

@@ -54,10 +54,33 @@ JUDGE_MODEL_NAME=gpt-5-nano
 
 ## Running Tests
 
+### Mock Mode (Recommended for Development)
+
+Run tests against the WireMock mock service (no credentials required):
+
 ```bash
-./scripts/run-tests.sh
+./scripts/run-tests.sh --mock
 ```
 
+This mode:
+- Starts WireMock automatically on localhost:8081
+- Uses deterministic test fixtures
+- Requires no API tokens or real StackRox instance
+- Fast and reliable for local development
+
+### Real Mode
+
+Run tests against a real StackRox Central instance:
+
+```bash
+./scripts/run-tests.sh --real
+```
+
+This mode:
+- Uses the real StackRox Central API (staging.demo.stackrox.com by default)
+- Requires valid API token in `.env`
+- Tests against actual production data
+
 Results are saved to `mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json`.
 
 ### View Results
@@ -72,16 +95,19 @@ jq '[.[] | .callHistory.ToolCalls[]? | {name: .request.Params.name, arguments: .
 
 ## Test Cases
 
-| Test | Description | Tool |
-|------|-------------|------|
-| `list-clusters` | List all clusters | `list_clusters` |
-| `cve-detected-workloads` | CVE detected in deployments | `get_deployments_for_cve` |
-| `cve-detected-clusters` | CVE detected in clusters | `get_clusters_with_orchestrator_cve` |
-| `cve-nonexistent` | Handle non-existent CVE | `get_clusters_with_orchestrator_cve` |
-| `cve-cluster-does-exist` | CVE with cluster filter | `get_clusters_with_orchestrator_cve` |
-| `cve-cluster-does-not-exist` | CVE with cluster filter | `get_clusters_with_orchestrator_cve` |
-| `cve-clusters-general` | General CVE query | `get_clusters_with_orchestrator_cve` |
-| `cve-cluster-list` | CVE across clusters | `get_clusters_with_orchestrator_cve` |
+| Test | Description | Tool | Eval Coverage |
+|------|-------------|------|---------------|
+| `list-clusters` | List all clusters | `list_clusters` | - |
+| `cve-detected-workloads` | CVE detected in deployments | `get_deployments_for_cve` | Eval 1 |
+| `cve-detected-clusters` | CVE detected in clusters | `get_clusters_with_orchestrator_cve` | Eval 1 |
+| `cve-nonexistent` | Handle non-existent CVE | `get_clusters_with_orchestrator_cve` | Eval 2 |
+| `cve-cluster-does-exist` | CVE with cluster filter | `get_clusters_with_orchestrator_cve` | Eval 4 |
+| `cve-cluster-does-not-exist` | CVE with non-existent cluster | `list_clusters` | - |
+| `cve-clusters-general` | General CVE query | `get_clusters_with_orchestrator_cve` | Eval 1 |
+| `cve-cluster-list` | CVE across clusters | `get_clusters_with_orchestrator_cve` | - |
+| `cve-log4shell` | Well-known CVE (log4shell) | `get_deployments_for_cve` | Eval 3 |
+| `cve-multiple` | Multiple CVEs in one prompt | `get_deployments_for_cve` | Eval 5 |
+| `rhsa-not-supported` | RHSA detection (should fail) | None | Eval 7 |
 
 ## Configuration
 

@@ -0,0 +1,143 @@
+kind: Eval
+metadata:
+  name: "stackrox-mcp-e2e"
+config:
+  agent:
+    type: "builtin.claude-code"
+    model: "claude-sonnet-4-5"
+  llmJudge:
+    env:
+      baseUrlKey: JUDGE_BASE_URL
+      apiKeyKey: JUDGE_API_KEY
+      modelNameKey: JUDGE_MODEL_NAME
+  mcpConfigFile: mcp-config-mock.yaml
+  taskSets:
+    # Assertion Fields Explained:
+    # - toolsUsed: List of tools that MUST be called at least once
+    # - minToolCalls: Minimum TOTAL number of tool calls across ALL tools (not per-tool)
+    # - maxToolCalls: Maximum TOTAL number of tool calls across ALL tools (prevents runaway tool usage)
+    # Example: If maxToolCalls=3, the agent can make up to 3 tool calls total in the test,
+    # regardless of which tools are called.
+
+    # Test 1: List clusters
+    - path: tasks/list-clusters.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "list_clusters"
+        minToolCalls: 1
+        maxToolCalls: 1
+
+    # Test 2: CVE detected in workloads
+    # Claude does comprehensive CVE checking (orchestrator, deployments, nodes)
+    - path: tasks/cve-detected-workloads.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "get_deployments_for_cve"
+            argumentsMatch:
+              cveName: "CVE-2021-31805"
+        minToolCalls: 1
+        maxToolCalls: 3
+
+    # Test 3: CVE detected in clusters - basic
+    - path: tasks/cve-detected-clusters.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "get_clusters_with_orchestrator_cve"
+            argumentsMatch:
+              cveName: "CVE-2016-1000031"
+        minToolCalls: 1
+        maxToolCalls: 3
+
+    # Test 4: Non-existent CVE
+    # Expects 3 calls because "Is CVE detected in my clusters?" triggers comprehensive check
+    # (orchestrator, deployments, nodes). The LLM cannot know beforehand if CVE exists.
+    - path: tasks/cve-nonexistent.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "get_clusters_with_orchestrator_cve"
+            argumentsMatch:
+              cveName: "CVE-2099-00001"
+        minToolCalls: 1
+        maxToolCalls: 3
+
+    # Test 5: CVE with specific cluster filter (does exist)
+    # Claude does comprehensive checking even for single cluster (orchestrator, deployments, nodes)
+    - path: tasks/cve-cluster-does-exist.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "list_clusters"
+          - server: stackrox-mcp
+            toolPattern: "get_clusters_with_orchestrator_cve"
+            argumentsMatch:
+              cveName: "CVE-2016-1000031"
+        minToolCalls: 2
+        maxToolCalls: 4
+
+    # Test 6: CVE with specific cluster filter (does not exist)
+    # Claude does comprehensive checking even when cluster doesn't exist
+    - path: tasks/cve-cluster-does-not-exist.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "list_clusters"
+        minToolCalls: 1
+        maxToolCalls: 5
+
+    # Test 7: CVE detected in clusters - general
+    - path: tasks/cve-clusters-general.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "get_clusters_with_orchestrator_cve"
+            argumentsMatch:
+              cveName: "CVE-2021-31805"
+        minToolCalls: 1
+        maxToolCalls: 5
+
+    # Test 8: CVE check with cluster list reference
+    - path: tasks/cve-cluster-list.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "get_clusters_with_orchestrator_cve"
+            argumentsMatch:
+              cveName: "CVE-2024-52577"
+        minToolCalls: 1
+        maxToolCalls: 5
+
+    # Test 9: Log4shell (well-known CVE)
+    - path: tasks/cve-log4shell.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "get_deployments_for_cve"
+            argumentsMatch:
+              cveName: "CVE-2021-44228"
+        minToolCalls: 1
+        maxToolCalls: 3
+
+    # Test 10: Multiple CVEs in one prompt
+    - path: tasks/cve-multiple.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "get_deployments_for_cve"
+            argumentsMatch:
+              cveName: "CVE-2021-31805"
+          - server: stackrox-mcp
+            toolPattern: "get_deployments_for_cve"
+            argumentsMatch:
+              cveName: "CVE-2016-1000031"
+        minToolCalls: 2
+        maxToolCalls: 6
+
+    # Test 11: RHSA detection (should fail gracefully)
+    - path: tasks/rhsa-not-supported.yaml
+      assertions:
+        minToolCalls: 0
+        maxToolCalls: 1