Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .github/workflows/wiremock-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: WireMock Smoke Test

on:
push:
branches:
- main
pull_request:
types:
- opened
- reopened
- synchronize

jobs:
wiremock-smoke-test:
name: WireMock Smoke Test
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version-file: go.mod

- name: Set up Java
uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '11'

- name: Install protoc
run: |
PROTOC_VERSION=3.20.1
PROTOC_ZIP=protoc-${PROTOC_VERSION}-linux-x86_64.zip
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/${PROTOC_ZIP}
sudo unzip -o ${PROTOC_ZIP} -d /usr/local bin/protoc
sudo unzip -o ${PROTOC_ZIP} -d /usr/local 'include/*'
rm -f ${PROTOC_ZIP}

- name: Download Go dependencies
run: go mod download

- name: Setup proto files from go mod cache
run: ./scripts/setup-proto-files.sh

- name: Run smoke test
run: ./scripts/smoke-test-wiremock.sh

- name: Upload logs on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: wiremock-logs
path: wiremock/wiremock.log
if-no-files-found: ignore
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,12 @@
/e2e-tests/mcp-reports/
/e2e-tests/bin/
/e2e-tests/**/*-out.json

# WireMock
/wiremock/lib/*.jar
/wiremock/*.pid
/wiremock/*.log
/wiremock/__files
/wiremock/proto/
/wiremock/grpc/
/wiremock/certs/
61 changes: 59 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ test: ## Run unit tests
e2e-smoke-test: ## Run E2E smoke test (build and verify mcpchecker)
@cd e2e-tests && ./scripts/smoke-test.sh

.PHONY: e2e-test
.PHONY: e2e-test mock-start
e2e-test: ## Run E2E tests
@cd e2e-tests && ./scripts/run-tests.sh
@cd e2e-tests && ./scripts/run-tests.sh --mock

.PHONY: test-coverage-and-junit
test-coverage-and-junit: ## Run unit tests with coverage and junit output
Expand Down Expand Up @@ -91,6 +91,63 @@ lint: ## Run golangci-lint
go install -v "github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.6"
golangci-lint run

.PHONY: proto-setup
proto-setup: ## Setup proto files from go mod cache
@./scripts/setup-proto-files.sh

.PHONY: proto-generate
proto-generate: ## Generate proto descriptors for WireMock
@./scripts/generate-proto-descriptors.sh

.PHONY: proto-clean
proto-clean: ## Clean generated proto files
@rm -rf wiremock/proto/ wiremock/grpc/

.PHONY: proto-check
proto-check: ## Verify proto setup is correct
@if [ ! -f wiremock/proto/descriptors/stackrox.pb ]; then \
echo "❌ Proto descriptors not found"; \
echo "Run: make proto-generate"; \
exit 1; \
fi
@echo "✓ Proto descriptors present"

.PHONY: mock-download
mock-download: ## Download WireMock JARs
@./scripts/download-wiremock.sh

.PHONY: mock-start
mock-start: proto-check ## Start WireMock mock Central locally
@./scripts/start-mock-central.sh

.PHONY: mock-stop
mock-stop: ## Stop WireMock mock Central
@./scripts/stop-mock-central.sh

.PHONY: mock-logs
mock-logs: ## View WireMock logs
@tail -f wiremock/wiremock.log

.PHONY: mock-restart
mock-restart: mock-stop mock-start ## Restart WireMock

.PHONY: mock-status
mock-status: ## Check WireMock status
@if [ -f wiremock/wiremock.pid ]; then \
PID=$$(cat wiremock/wiremock.pid); \
if ps -p $$PID > /dev/null 2>&1; then \
echo "WireMock is running (PID: $$PID)"; \
else \
echo "WireMock PID file exists but process not running"; \
fi \
else \
echo "WireMock is not running"; \
fi

.PHONY: mock-test
mock-test: ## Run WireMock smoke tests
@./scripts/smoke-test-wiremock.sh

.PHONY: clean
clean: ## Clean build artifacts and coverage files
$(GOCLEAN)
Expand Down
48 changes: 37 additions & 11 deletions e2e-tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,33 @@ JUDGE_MODEL_NAME=gpt-5-nano

## Running Tests

### Mock Mode (Recommended for Development)

Run tests against the WireMock mock service (no credentials required):

```bash
./scripts/run-tests.sh
./scripts/run-tests.sh --mock
```

This mode:
- Starts WireMock automatically on localhost:8081
- Uses deterministic test fixtures
- Requires no API tokens or real StackRox instance
- Fast and reliable for local development

### Real Mode

Run tests against a real StackRox Central instance:

```bash
./scripts/run-tests.sh --real
```

This mode:
- Uses the real StackRox Central API (staging.demo.stackrox.com by default)
- Requires valid API token in `.env`
- Tests against actual production data

Results are saved to `mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json`.

### View Results
Expand All @@ -72,16 +95,19 @@ jq '[.[] | .callHistory.ToolCalls[]? | {name: .request.Params.name, arguments: .

## Test Cases

| Test | Description | Tool |
|------|-------------|------|
| `list-clusters` | List all clusters | `list_clusters` |
| `cve-detected-workloads` | CVE detected in deployments | `get_deployments_for_cve` |
| `cve-detected-clusters` | CVE detected in clusters | `get_clusters_with_orchestrator_cve` |
| `cve-nonexistent` | Handle non-existent CVE | `get_clusters_with_orchestrator_cve` |
| `cve-cluster-does-exist` | CVE with cluster filter | `get_clusters_with_orchestrator_cve` |
| `cve-cluster-does-not-exist` | CVE with cluster filter | `get_clusters_with_orchestrator_cve` |
| `cve-clusters-general` | General CVE query | `get_clusters_with_orchestrator_cve` |
| `cve-cluster-list` | CVE across clusters | `get_clusters_with_orchestrator_cve` |
| Test | Description | Tool | Eval Coverage |
|------|-------------|------|---------------|
| `list-clusters` | List all clusters | `list_clusters` | - |
| `cve-detected-workloads` | CVE detected in deployments | `get_deployments_for_cve` | Eval 1 |
| `cve-detected-clusters` | CVE detected in clusters | `get_clusters_with_orchestrator_cve` | Eval 1 |
| `cve-nonexistent` | Handle non-existent CVE | `get_clusters_with_orchestrator_cve` | Eval 2 |
| `cve-cluster-does-exist` | CVE with cluster filter | `get_clusters_with_orchestrator_cve` | Eval 4 |
| `cve-cluster-does-not-exist` | CVE with non-existent cluster | `list_clusters` | - |
| `cve-clusters-general` | General CVE query | `get_clusters_with_orchestrator_cve` | Eval 1 |
| `cve-cluster-list` | CVE across clusters | `get_clusters_with_orchestrator_cve` | - |
| `cve-log4shell` | Well-known CVE (log4shell) | `get_deployments_for_cve` | Eval 3 |
| `cve-multiple` | Multiple CVEs in one prompt | `get_deployments_for_cve` | Eval 5 |
| `rhsa-not-supported` | RHSA detection (should fail) | None | Eval 7 |

## Configuration

Expand Down
143 changes: 143 additions & 0 deletions e2e-tests/mcpchecker/eval-mock.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
kind: Eval
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Separating tasks from eval configuration makes even less sense now.

We could have one task used for demo and mock. In one case, we don't have any CVE detected, in another, we do have. And we are using the same judging prompt. So the judging prompt will match anything.

Can we organise this differently? Maybe we can have in one directory all mock tasks together with the evaluation config, and in another directory all tasks (and config) for staging demo.

i.e.

mcpchecker/tasks/mock/eval.yaml
mcpchecker/tasks/mock/task-<description 1>.yaml
..
mcpchecker/tasks/mock/task-<description n>.yaml


mcpchecker/tasks/demo/eval.yaml
mcpchecker/tasks/demo/task-<description 1>.yaml
..
mcpchecker/tasks/demo/task-<description n>.yaml

metadata:
name: "stackrox-mcp-e2e"
config:
agent:
type: "builtin.claude-code"
model: "claude-sonnet-4-5"
llmJudge:
env:
baseUrlKey: JUDGE_BASE_URL
apiKeyKey: JUDGE_API_KEY
modelNameKey: JUDGE_MODEL_NAME
mcpConfigFile: mcp-config-mock.yaml
taskSets:
# Assertion Fields Explained:
# - toolsUsed: List of tools that MUST be called at least once
# - minToolCalls: Minimum TOTAL number of tool calls across ALL tools (not per-tool)
# - maxToolCalls: Maximum TOTAL number of tool calls across ALL tools (prevents runaway tool usage)
# Example: If maxToolCalls=3, the agent can make up to 3 tool calls total in the test,
# regardless of which tools are called.

# Test 1: List clusters
- path: tasks/list-clusters.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "list_clusters"
minToolCalls: 1
maxToolCalls: 1

# Test 2: CVE detected in workloads
# Claude does comprehensive CVE checking (orchestrator, deployments, nodes)
- path: tasks/cve-detected-workloads.yaml
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should change this. Right now "contains" supports both - no CVE and there is CVE. We should for sure expect that LLM displays CVE and relevant workloads.

assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "get_deployments_for_cve"
argumentsMatch:
cveName: "CVE-2021-31805"
minToolCalls: 1
maxToolCalls: 3

# Test 3: CVE detected in clusters - basic
- path: tasks/cve-detected-clusters.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "get_clusters_with_orchestrator_cve"
argumentsMatch:
cveName: "CVE-2016-1000031"
minToolCalls: 1
maxToolCalls: 3

# Test 4: Non-existent CVE
# Expects 3 calls because "Is CVE detected in my clusters?" triggers comprehensive check
# (orchestrator, deployments, nodes). The LLM cannot know beforehand if CVE exists.
- path: tasks/cve-nonexistent.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "get_clusters_with_orchestrator_cve"
argumentsMatch:
cveName: "CVE-2099-00001"
minToolCalls: 1
maxToolCalls: 3

# Test 5: CVE with specific cluster filter (does exist)
# Claude does comprehensive checking even for single cluster (orchestrator, deployments, nodes)
- path: tasks/cve-cluster-does-exist.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "list_clusters"
- server: stackrox-mcp
toolPattern: "get_clusters_with_orchestrator_cve"
argumentsMatch:
cveName: "CVE-2016-1000031"
minToolCalls: 2
maxToolCalls: 4

# Test 6: CVE with specific cluster filter (does not exist)
# Claude does comprehensive checking even when cluster doesn't exist
- path: tasks/cve-cluster-does-not-exist.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "list_clusters"
minToolCalls: 1
maxToolCalls: 5

# Test 7: CVE detected in clusters - general
- path: tasks/cve-clusters-general.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "get_clusters_with_orchestrator_cve"
argumentsMatch:
cveName: "CVE-2021-31805"
minToolCalls: 1
maxToolCalls: 5

# Test 8: CVE check with cluster list reference
- path: tasks/cve-cluster-list.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "get_clusters_with_orchestrator_cve"
argumentsMatch:
cveName: "CVE-2024-52577"
minToolCalls: 1
maxToolCalls: 5

# Test 9: Log4shell (well-known CVE)
- path: tasks/cve-log4shell.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "get_deployments_for_cve"
argumentsMatch:
cveName: "CVE-2021-44228"
minToolCalls: 1
maxToolCalls: 3

# Test 10: Multiple CVEs in one prompt
- path: tasks/cve-multiple.yaml
assertions:
toolsUsed:
- server: stackrox-mcp
toolPattern: "get_deployments_for_cve"
argumentsMatch:
cveName: "CVE-2021-31805"
- server: stackrox-mcp
toolPattern: "get_deployments_for_cve"
argumentsMatch:
cveName: "CVE-2016-1000031"
minToolCalls: 2
maxToolCalls: 6

# Test 11: RHSA detection (should fail gracefully)
- path: tasks/rhsa-not-supported.yaml
assertions:
minToolCalls: 0
maxToolCalls: 1
Loading
Loading