From 862fa54a86fac7251e81a7f73bdba1e507a07a37 Mon Sep 17 00:00:00 2001 From: Dustin Row Date: Fri, 15 May 2026 07:32:49 -0700 Subject: [PATCH] SREP-4866: Add RHOBS MCP server for AI agent integration Add `osdctl rhobs mcp` with two subcommands: - `server`: starts a stdio-based MCP server exposing RHOBS tools - `config`: prints MCP client configuration JSON for easy setup Usage: claude --mcp-config "$(osdctl rhobs mcp config)" Uses the official modelcontextprotocol/go-sdk maintained by the MCP org and Google. Compatible with any MCP client (Claude Code, Cursor, Windsurf, custom agents). Tools (all read-only, with OutputSchema): - rhobs_metrics: PromQL instant/range queries against Thanos - rhobs_logs: LogQL queries against Loki - rhobs_alerts: Firing alerts from Alertmanager Covers ROSA HCP infrastructure: hosted clusters, Management Clusters (MC), and Service Clusters (SC). Accepts any cluster ID; the correct RHOBS cell is resolved automatically. --- cmd/cmd.go | 15 +- cmd/rhobs/mcpCmd.go | 132 +++++ cmd/rhobs/mcp_helpers.go | 57 ++ cmd/rhobs/mcp_query.go | 223 ++++++++ cmd/rhobs/mcp_test.go | 896 ++++++++++++++++++++++++++++++++ cmd/rhobs/mcp_tools.go | 282 ++++++++++ cmd/rhobs/rootCmd.go | 6 + docs/README.md | 74 +++ docs/osdctl_rhobs.md | 1 + docs/osdctl_rhobs_mcp.md | 43 ++ docs/osdctl_rhobs_mcp_config.md | 35 ++ docs/osdctl_rhobs_mcp_server.md | 26 + go.mod | 7 +- go.sum | 14 +- 14 files changed, 1804 insertions(+), 7 deletions(-) create mode 100644 cmd/rhobs/mcpCmd.go create mode 100644 cmd/rhobs/mcp_helpers.go create mode 100644 cmd/rhobs/mcp_query.go create mode 100644 cmd/rhobs/mcp_test.go create mode 100644 cmd/rhobs/mcp_tools.go create mode 100644 docs/osdctl_rhobs_mcp.md create mode 100644 docs/osdctl_rhobs_mcp_config.md create mode 100644 docs/osdctl_rhobs_mcp_server.md diff --git a/cmd/cmd.go b/cmd/cmd.go index 5b0b912f6..773a6e488 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -78,8 +78,7 @@ func NewCmdRoot(streams genericclioptions.IOStreams) *cobra.Command { os.Exit(1) } - // Checks the skipVersionCheck flag and the command being run to determine if the version check should run - if shouldRunVersionCheck(skipVersionCheck, cmd.Use) { + if !skipVersionCheck && !shouldSkipVersionCheckForCmd(cmd) { versionCheck() } }, @@ -140,13 +139,21 @@ func shouldRunVersionCheck(skipVersionCheckFlag bool, commandName string) bool { } func canCommandSkipVersionCheck(commandName string) bool { - // Checks if the specific command is in the allowlist return slice.ContainsString(getSkipVersionCommands(), commandName, nil) } +func shouldSkipVersionCheckForCmd(cmd *cobra.Command) bool { + for c := cmd; c != nil; c = c.Parent() { + if canCommandSkipVersionCheck(c.Use) { + return true + } + } + return false +} + // Returns allowlist of commands that can skip version check func getSkipVersionCommands() []string { - return []string{"upgrade", "version"} + return []string{"upgrade", "version", "mcp"} } func versionCheck() { diff --git a/cmd/rhobs/mcpCmd.go b/cmd/rhobs/mcpCmd.go new file mode 100644 index 000000000..294a74840 --- /dev/null +++ b/cmd/rhobs/mcpCmd.go @@ -0,0 +1,132 @@ +package rhobs + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "os/exec" + "time" + + "github.com/modelcontextprotocol/go-sdk/mcp" + log "github.com/sirupsen/logrus" + "github.com/spf13/cobra" +) + +func checkVaultToken(ctx context.Context) error { + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + cmd := exec.CommandContext(ctx, "vault", "token", "lookup") + cmd.Env = append(os.Environ(), "VAULT_ADDR=https://vault.devshift.net") + cmd.Stdout = nil + cmd.Stderr = nil + if err := cmd.Run(); err != nil { + if errors.Is(err, exec.ErrNotFound) { + return fmt.Errorf("vault CLI not found in PATH; install Vault and retry") + } + if errors.Is(ctx.Err(), context.DeadlineExceeded) { + return fmt.Errorf("vault token lookup timed out; verify VAULT_ADDR and network connectivity") + } + return fmt.Errorf("vault token expired or missing, run: VAULT_ADDR=https://vault.devshift.net vault login -method=oidc") + } + return nil +} + +func newCmdMcp() *cobra.Command { + cmd := &cobra.Command{ + Use: "mcp", + Short: "RHOBS MCP server for AI agent integration", + Long: `MCP (Model Context Protocol) server that exposes RHOBS metrics, logs, +and alerts querying as tools for AI agents. + +Compatible with any MCP client (Claude Code, Cursor, Windsurf, custom agents). + +Subcommands: + server Start the stdio MCP server + config Print MCP client configuration JSON + +Quick start: + claude --mcp-config "$(osdctl rhobs mcp config)" + +Prerequisites: + - OCM login: ocm login --use-auth-code --url + - Vault login: VAULT_ADDR=https://vault.devshift.net vault login -method=oidc + - osdctl config: ~/.config/osdctl must have rhobs__vault_path entries`, + PersistentPreRunE: func(cmd *cobra.Command, args []string) error { + return nil + }, + } + + cmd.AddCommand(newCmdMcpServer()) + cmd.AddCommand(newCmdMcpConfig()) + + return cmd +} + +func newCmdMcpServer() *cobra.Command { + return &cobra.Command{ + Use: "server", + Short: "Start the RHOBS MCP server", + Args: cobra.NoArgs, + SilenceErrors: true, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) error { + log.SetOutput(io.Discard) + + if err := checkVaultToken(cmd.Context()); err != nil { + return err + } + + server := mcp.NewServer(&mcp.Implementation{ + Name: "osdctl-rhobs", + Version: "1.0.0", + }, nil) + + registerMcpTools(server) + + return server.Run(cmd.Context(), &mcp.StdioTransport{}) + }, + } +} + +func newCmdMcpConfig() *cobra.Command { + return &cobra.Command{ + Use: "config", + Short: "Print MCP client configuration JSON", + Long: `Print MCP client configuration JSON for use with AI agents. + +Usage with Claude Code: + claude --mcp-config "$(osdctl rhobs mcp config)" + +Or add to ~/.claude/mcp_settings.json manually.`, + Args: cobra.NoArgs, + SilenceErrors: true, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) error { + execPath, err := os.Executable() + if err != nil { + return fmt.Errorf("failed to determine osdctl binary path: %v", err) + } + + config := map[string]interface{}{ + "mcpServers": map[string]interface{}{ + "osdctl-rhobs": map[string]interface{}{ + "command": execPath, + "args": []string{"rhobs", "mcp", "server"}, + }, + }, + } + + output, err := json.MarshalIndent(config, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal config: %v", err) + } + + fmt.Println(string(output)) + return nil + }, + } +} diff --git a/cmd/rhobs/mcp_helpers.go b/cmd/rhobs/mcp_helpers.go new file mode 100644 index 000000000..e4798e6ad --- /dev/null +++ b/cmd/rhobs/mcp_helpers.go @@ -0,0 +1,57 @@ +package rhobs + +import ( + "encoding/json" + "fmt" + "sync" + + "github.com/modelcontextprotocol/go-sdk/mcp" + "golang.org/x/sync/singleflight" +) + +var fetcherCache sync.Map +var fetcherInit singleflight.Group + +func getCachedFetcher(clusterId string, usage RhobsFetchUsage) (*RhobsFetcher, error) { + key := fmt.Sprintf("%s:%s", clusterId, usage) + if cached, ok := fetcherCache.Load(key); ok { + return cached.(*RhobsFetcher), nil + } + + v, err, _ := fetcherInit.Do(key, func() (interface{}, error) { + if cached, ok := fetcherCache.Load(key); ok { + return cached, nil + } + fetcher, err := CreateRhobsFetcher(clusterId, usage, commonOptions.hiveOcmUrl) + if err != nil { + return nil, err + } + actual, _ := fetcherCache.LoadOrStore(key, fetcher) + return actual, nil + }) + if err != nil { + return nil, err + } + return v.(*RhobsFetcher), nil +} + +func mcpResultJSON(data interface{}) (*mcp.CallToolResult, error) { + jsonData, err := json.MarshalIndent(data, "", " ") + if err != nil { + return nil, fmt.Errorf("failed to marshal result: %v", err) + } + return &mcp.CallToolResult{ + Content: []mcp.Content{&mcp.TextContent{Text: string(jsonData)}}, + }, nil +} + +func mcpError(format string, args ...interface{}) (*mcp.CallToolResult, error) { + return &mcp.CallToolResult{ + Content: []mcp.Content{&mcp.TextContent{Text: fmt.Sprintf(format, args...)}}, + IsError: true, + }, nil +} + +func boolPtr(b bool) *bool { + return &b +} diff --git a/cmd/rhobs/mcp_query.go b/cmd/rhobs/mcp_query.go new file mode 100644 index 000000000..c9d354fbd --- /dev/null +++ b/cmd/rhobs/mcp_query.go @@ -0,0 +1,223 @@ +package rhobs + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "sort" + "strconv" + "time" + + rhobsclient "github.com/observatorium/api/client" + rhobsparameters "github.com/observatorium/api/client/parameters" +) + +type mcpLogEntry struct { + Timestamp time.Time `json:"timestamp"` + Message string `json:"message"` + Stream map[string]string `json:"stream,omitempty"` +} + +func (q *RhobsFetcher) QueryInstantMetrics(ctx context.Context, promExpr string, filterCluster bool) ([]instantMetricResult, error) { + client, err := q.getClient() + if err != nil { + return nil, err + } + + promQuery := rhobsparameters.PromqlQuery(promExpr) + queryParams := &rhobsclient.GetInstantQueryParams{Query: &promQuery} + + response, err := client.GetInstantQueryWithResponse(ctx, "hcp", queryParams) + if err != nil { + return nil, fmt.Errorf("failed to send request to RHOBS: %v", err) + } + if response.HTTPResponse.StatusCode != http.StatusOK { + return nil, fmt.Errorf("RHOBS query failed with status code: %d - body: %s", response.HTTPResponse.StatusCode, string(response.Body)) + } + + var formattedResponse getInstantMetricsResponse + if err := json.Unmarshal(response.Body, &formattedResponse); err != nil { + return nil, fmt.Errorf("failed to unmarshal response from RHOBS: %v", err) + } + if formattedResponse.Status != "success" { + return nil, fmt.Errorf("RHOBS query failed with status: %s", formattedResponse.Status) + } + + if !filterCluster { + return formattedResponse.Data.Results, nil + } + + var filtered []instantMetricResult + for _, result := range formattedResponse.Data.Results { + if q.isManagementCluster { + mcId := result.Metric["_mc_id"] + mcName := result.Metric["mc_name"] + if mcId == q.clusterId || mcName == q.clusterName { + filtered = append(filtered, result) + } + } else { + if result.Metric["_id"] == q.clusterExternalId { + filtered = append(filtered, result) + } + } + } + return filtered, nil +} + +func (q *RhobsFetcher) QueryRangeMetrics(ctx context.Context, promExpr, start, end, step string) (json.RawMessage, error) { + client, err := q.getClient() + if err != nil { + return nil, err + } + + promQuery := rhobsparameters.PromqlQuery(promExpr) + queryParams := &rhobsclient.GetRangeQueryParams{ + Query: &promQuery, + Start: (*rhobsparameters.StartTS)(&start), + End: (*rhobsparameters.EndTS)(&end), + Step: &step, + } + + response, err := client.GetRangeQueryWithResponse(ctx, "hcp", queryParams) + if err != nil { + return nil, fmt.Errorf("failed to send request to RHOBS: %v", err) + } + if response.HTTPResponse.StatusCode != http.StatusOK { + return nil, fmt.Errorf("RHOBS query failed with status code: %d - body: %s", response.HTTPResponse.StatusCode, string(response.Body)) + } + + return response.Body, nil +} + +func (q *RhobsFetcher) QueryLogs(ctx context.Context, lokiExpr string, startTime, endTime time.Time, logsCount int) ([]mcpLogEntry, error) { + client, err := q.getClient() + if err != nil { + return nil, err + } + + startTimeStamp := startTime.UnixNano() + endTimeStamp := endTime.UnixNano() + logsDir := "backward" + + var entries []mcpLogEntry + + for logsCount > 0 { + lokiQuery := rhobsparameters.LogqlQuery(lokiExpr) + startTimeStr := strconv.FormatInt(startTimeStamp, 10) + endTimeStr := strconv.FormatInt(endTimeStamp, 10) + + limit := float32(500) + if logsCount < 500 { + limit = float32(logsCount) + } + + queryParams := &rhobsclient.GetLogRangeQueryParams{ + Query: &lokiQuery, + Start: (*rhobsparameters.StartTS)(&startTimeStr), + End: (*rhobsparameters.EndTS)(&endTimeStr), + Direction: &logsDir, + Limit: (*rhobsparameters.Limit)(&limit), + } + + response, err := client.GetLogRangeQueryWithResponse(ctx, "hcp", queryParams) + if err != nil { + return nil, fmt.Errorf("failed to send request to RHOBS: %v", err) + } + if response.HTTPResponse.StatusCode != http.StatusOK { + return nil, fmt.Errorf("RHOBS query failed with status code: %d - body: %s", response.HTTPResponse.StatusCode, string(response.Body)) + } + + var formattedResponse getLogsResponse + if err := json.Unmarshal(response.Body, &formattedResponse); err != nil { + return nil, fmt.Errorf("failed to unmarshal response from RHOBS: %v", err) + } + if formattedResponse.Status != "success" { + return nil, fmt.Errorf("RHOBS query failed with status: %s", formattedResponse.Status) + } + if len(formattedResponse.Data.Results) == 0 { + break + } + + var flattenedResults []*logResult + for _, result := range formattedResponse.Data.Results { + for valIdx := range result.Values { + flattenedResults = append(flattenedResults, &logResult{ + Stream: result.Stream, + Values: []*[]string{result.Values[valIdx]}, + }) + } + } + + sort.Slice(flattenedResults, func(i, j int) bool { + return flattenedResults[i].getTimeStamp() > flattenedResults[j].getTimeStamp() + }) + + edgeTimeStamp := flattenedResults[len(flattenedResults)-1].getTimeStamp() + if flattenedResults[0].getTimeStamp() == edgeTimeStamp { + endTimeStamp = edgeTimeStamp - 1 + edgeTimeStamp = 0 + } else { + endTimeStamp = edgeTimeStamp + } + + for _, result := range flattenedResults { + ts := result.getTimeStamp() + if ts != edgeTimeStamp { + entry := mcpLogEntry{ + Timestamp: result.getTime(), + Message: result.getMessage(), + } + if result.Stream != nil { + entry.Stream = *result.Stream + } + entries = append(entries, entry) + logsCount-- + if logsCount <= 0 { + break + } + } + } + } + + return entries, nil +} + +func (q *RhobsFetcher) QueryAlerts(ctx context.Context) (json.RawMessage, error) { + client, err := q.getClient() + if err != nil { + return nil, err + } + + response, err := client.GetAlertsWithResponse(ctx, "hcp", &rhobsclient.GetAlertsParams{}) + if err != nil { + return nil, fmt.Errorf("failed to send request to RHOBS: %v", err) + } + if response.HTTPResponse.StatusCode != http.StatusOK { + return nil, fmt.Errorf("RHOBS query failed with status code: %d - body: %s", response.HTTPResponse.StatusCode, string(response.Body)) + } + + return response.Body, nil +} + +func (q *RhobsFetcher) QueryRules(ctx context.Context, ruleType string) (json.RawMessage, error) { + client, err := q.getClient() + if err != nil { + return nil, err + } + + params := &rhobsclient.GetRulesParams{} + if ruleType != "" { + params.Type = &ruleType + } + + response, err := client.GetRulesWithResponse(ctx, "hcp", params) + if err != nil { + return nil, fmt.Errorf("failed to send request to RHOBS: %v", err) + } + if response.HTTPResponse.StatusCode != http.StatusOK { + return nil, fmt.Errorf("RHOBS query failed with status code: %d - body: %s", response.HTTPResponse.StatusCode, string(response.Body)) + } + + return response.Body, nil +} diff --git a/cmd/rhobs/mcp_test.go b/cmd/rhobs/mcp_test.go new file mode 100644 index 000000000..d3d4ffbe3 --- /dev/null +++ b/cmd/rhobs/mcp_test.go @@ -0,0 +1,896 @@ +package rhobs + +import ( + "bytes" + "context" + "encoding/json" + "io" + "os" + "testing" + + "github.com/modelcontextprotocol/go-sdk/mcp" +) + +func makeRequest(tool string, args map[string]interface{}) *mcp.CallToolRequest { + argsJSON, _ := json.Marshal(args) + return &mcp.CallToolRequest{ + Params: &mcp.CallToolParamsRaw{ + Name: tool, + Arguments: argsJSON, + }, + } +} + +func isToolError(result *mcp.CallToolResult) bool { + return result != nil && result.IsError +} + +func getResultText(result *mcp.CallToolResult) string { + if result == nil || len(result.Content) == 0 { + return "" + } + for _, c := range result.Content { + if tc, ok := c.(*mcp.TextContent); ok { + return tc.Text + } + } + return "" +} + +func parseResultJSON(t *testing.T, result *mcp.CallToolResult) map[string]interface{} { + t.Helper() + text := getResultText(result) + var parsed map[string]interface{} + if err := json.Unmarshal([]byte(text), &parsed); err != nil { + t.Fatalf("result is not valid JSON: %v\ntext: %s", err, text) + } + return parsed +} + +// --- getArgs tests --- + +func TestGetArgs(t *testing.T) { + t.Run("valid JSON", func(t *testing.T) { + req := makeRequest("test", map[string]interface{}{"key": "value"}) + args := getArgs(req) + if args["key"] != "value" { + t.Errorf("expected key=value, got %v", args["key"]) + } + }) + + t.Run("empty arguments", func(t *testing.T) { + req := &mcp.CallToolRequest{ + Params: &mcp.CallToolParamsRaw{ + Name: "test", + Arguments: json.RawMessage(`{}`), + }, + } + args := getArgs(req) + if len(args) != 0 { + t.Errorf("expected empty map, got %v", args) + } + }) + + t.Run("malformed JSON", func(t *testing.T) { + req := &mcp.CallToolRequest{ + Params: &mcp.CallToolParamsRaw{ + Name: "test", + Arguments: json.RawMessage(`not json`), + }, + } + args := getArgs(req) + if len(args) != 0 { + t.Errorf("expected empty map for malformed JSON, got %v", args) + } + }) + + t.Run("nil arguments", func(t *testing.T) { + req := &mcp.CallToolRequest{ + Params: &mcp.CallToolParamsRaw{ + Name: "test", + Arguments: nil, + }, + } + args := getArgs(req) + if len(args) != 0 { + t.Errorf("expected empty map for nil arguments, got %v", args) + } + }) +} + +// --- Arg parser tests --- + +func TestGetStringArg(t *testing.T) { + args := map[string]interface{}{ + "name": "test-cluster", + "empty": "", + "num": 42.0, + } + + tests := []struct { + key string + def string + expected string + }{ + {"name", "default", "test-cluster"}, + {"empty", "default", ""}, + {"missing", "default", "default"}, + {"num", "default", "default"}, + } + + for _, tt := range tests { + t.Run(tt.key, func(t *testing.T) { + got := getStringArg(args, tt.key, tt.def) + if got != tt.expected { + t.Errorf("getStringArg(%q) = %q, want %q", tt.key, got, tt.expected) + } + }) + } +} + +func TestGetIntArg(t *testing.T) { + args := map[string]interface{}{ + "count": float64(42), + "zero": float64(0), + "negative": float64(-5), + "string": "not a number", + "bool": true, + } + + tests := []struct { + key string + def int + expected int + }{ + {"count", 10, 42}, + {"zero", 10, 0}, + {"negative", 10, -5}, + {"missing", 10, 10}, + {"string", 10, 10}, + {"bool", 10, 10}, + } + + for _, tt := range tests { + t.Run(tt.key, func(t *testing.T) { + got := getIntArg(args, tt.key, tt.def) + if got != tt.expected { + t.Errorf("getIntArg(%q) = %d, want %d", tt.key, got, tt.expected) + } + }) + } +} + +func TestGetBoolArg(t *testing.T) { + args := map[string]interface{}{ + "yes": true, + "no": false, + "string": "true", + "num": 1.0, + } + + tests := []struct { + key string + def bool + expected bool + }{ + {"yes", false, true}, + {"no", true, false}, + {"missing", true, true}, + {"missing", false, false}, + {"string", false, false}, + {"num", true, true}, + } + + for _, tt := range tests { + t.Run(tt.key+"_def_"+boolStr(tt.def), func(t *testing.T) { + got := getBoolArg(args, tt.key, tt.def) + if got != tt.expected { + t.Errorf("getBoolArg(%q, %v) = %v, want %v", tt.key, tt.def, got, tt.expected) + } + }) + } +} + +func boolStr(b bool) string { + if b { + return "true" + } + return "false" +} + +// --- Result helper tests --- + +func TestMcpResultJSON(t *testing.T) { + t.Run("valid data", func(t *testing.T) { + data := map[string]interface{}{"key": "value", "count": 42} + result, err := mcpResultJSON(data) + if err != nil { + t.Fatalf("mcpResultJSON returned error: %v", err) + } + if isToolError(result) { + t.Fatal("mcpResultJSON returned tool error") + } + parsed := parseResultJSON(t, result) + if parsed["key"] != "value" { + t.Errorf("expected key=value, got %v", parsed["key"]) + } + }) + + t.Run("nil data", func(t *testing.T) { + result, err := mcpResultJSON(nil) + if err != nil { + t.Fatalf("mcpResultJSON returned error: %v", err) + } + text := getResultText(result) + if text != "null" { + t.Errorf("expected 'null', got %q", text) + } + }) + + t.Run("empty slice", func(t *testing.T) { + result, err := mcpResultJSON([]string{}) + if err != nil { + t.Fatalf("mcpResultJSON returned error: %v", err) + } + text := getResultText(result) + if text != "[]" { + t.Errorf("expected '[]', got %q", text) + } + }) +} + +func TestMcpError(t *testing.T) { + t.Run("formatted message", func(t *testing.T) { + result, err := mcpError("test error: %s %d", "details", 42) + if err != nil { + t.Fatalf("mcpError returned Go error: %v", err) + } + if !isToolError(result) { + t.Fatal("mcpError should return tool error") + } + text := getResultText(result) + if text != "test error: details 42" { + t.Errorf("error text = %q, want %q", text, "test error: details 42") + } + }) + + t.Run("simple message", func(t *testing.T) { + result, _ := mcpError("simple error") + if !isToolError(result) { + t.Fatal("expected tool error") + } + if getResultText(result) != "simple error" { + t.Errorf("unexpected text: %s", getResultText(result)) + } + }) +} + +func TestBoolPtr(t *testing.T) { + truePtr := boolPtr(true) + falsePtr := boolPtr(false) + if *truePtr != true { + t.Error("boolPtr(true) should be true") + } + if *falsePtr != false { + t.Error("boolPtr(false) should be false") + } + // Verify they are distinct pointers + if truePtr == falsePtr { + t.Error("boolPtr should return distinct pointers") + } +} + +// --- handleMetrics validation tests --- + +func TestHandleMetrics_MissingParams(t *testing.T) { + tests := []struct { + name string + args map[string]interface{} + expectedMsg string + }{ + {"no args", map[string]interface{}{}, "cluster_id and query are required"}, + {"only cluster", map[string]interface{}{"cluster_id": "test"}, "cluster_id and query are required"}, + {"only query", map[string]interface{}{"query": "up"}, "cluster_id and query are required"}, + {"empty cluster", map[string]interface{}{"cluster_id": "", "query": "up"}, "cluster_id and query are required"}, + {"empty query", map[string]interface{}{"cluster_id": "test", "query": ""}, "cluster_id and query are required"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req := makeRequest("rhobs_metrics", tt.args) + result, err := handleMetrics(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + if !isToolError(result) { + t.Error("expected tool error for missing params") + } + if got := getResultText(result); got != tt.expectedMsg { + t.Errorf("error = %q, want %q", got, tt.expectedMsg) + } + }) + } +} + +func TestHandleMetrics_PartialRangeArgs(t *testing.T) { + tests := []struct { + name string + args map[string]interface{} + }{ + {"only start", map[string]interface{}{"cluster_id": "test", "query": "up", "start": "1234567890"}}, + {"only end", map[string]interface{}{"cluster_id": "test", "query": "up", "end": "1234567890"}}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req := makeRequest("rhobs_metrics", tt.args) + result, err := handleMetrics(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + if !isToolError(result) { + t.Error("expected tool error for partial range args") + } + text := getResultText(result) + if text != "start and end must be provided together for range queries" { + t.Errorf("unexpected error: %s", text) + } + }) + } +} + +func TestHandleMetrics_BothStartEndPassesValidation(t *testing.T) { + req := makeRequest("rhobs_metrics", map[string]interface{}{ + "cluster_id": "will-fail-at-fetcher", + "query": "up", + "start": "1234567890", + "end": "1234567900", + "step": "15s", + }) + result, err := handleMetrics(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + if !isToolError(result) { + t.Fatal("expected tool error (fetcher should fail)") + } + text := getResultText(result) + if text == "start and end must be provided together for range queries" { + t.Error("range validation should pass when both start and end are provided") + } +} + +func TestHandleMetrics_NeitherStartNorEndPassesValidation(t *testing.T) { + req := makeRequest("rhobs_metrics", map[string]interface{}{ + "cluster_id": "will-fail-at-fetcher", + "query": "up", + }) + result, err := handleMetrics(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + if !isToolError(result) { + t.Fatal("expected tool error (fetcher should fail)") + } + text := getResultText(result) + if text == "start and end must be provided together for range queries" { + t.Error("should not get range validation error when neither start nor end provided") + } +} + +func TestHandleMetrics_DefaultFilterCluster(t *testing.T) { + req := makeRequest("rhobs_metrics", map[string]interface{}{ + "cluster_id": "will-fail-at-fetcher", + "query": "up", + }) + // Verify handler doesn't error on missing filter_cluster (defaults to true) + result, err := handleMetrics(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + // Will fail at fetcher, but confirms filter_cluster default doesn't cause issues + if !isToolError(result) { + t.Error("expected fetcher error") + } +} + +func TestHandleMetrics_DefaultStep(t *testing.T) { + req := makeRequest("rhobs_metrics", map[string]interface{}{ + "cluster_id": "will-fail-at-fetcher", + "query": "up", + "start": "1234567890", + "end": "1234567900", + }) + result, err := handleMetrics(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + // Will fail at fetcher but confirms missing step doesn't panic (defaults to "60s") + if !isToolError(result) { + t.Error("expected fetcher error") + } +} + +// --- handleLogs validation tests --- + +func TestHandleLogs_MissingParams(t *testing.T) { + tests := []struct { + name string + args map[string]interface{} + expectedMsg string + }{ + {"no args", map[string]interface{}{}, "cluster_id is required"}, + {"only cluster", map[string]interface{}{"cluster_id": "test"}, "either namespace or query is required"}, + {"missing cluster", map[string]interface{}{"namespace": "default"}, "cluster_id is required"}, + {"empty cluster", map[string]interface{}{"cluster_id": "", "namespace": "default"}, "cluster_id is required"}, + {"no namespace or query", map[string]interface{}{"cluster_id": "test", "since": "5m"}, "either namespace or query is required"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req := makeRequest("rhobs_logs", tt.args) + result, err := handleLogs(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + if !isToolError(result) { + t.Error("expected tool error for missing params") + } + if got := getResultText(result); got != tt.expectedMsg { + t.Errorf("error = %q, want %q", got, tt.expectedMsg) + } + }) + } +} + +func TestHandleLogs_InvalidSinceDuration(t *testing.T) { + tests := []struct { + name string + since string + }{ + {"bogus string", "bogus"}, + {"no unit", "30"}, + {"invalid unit", "5x"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req := makeRequest("rhobs_logs", map[string]interface{}{ + "cluster_id": "will-fail-at-fetcher", + "namespace": "default", + "since": tt.since, + }) + result, err := handleLogs(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + if !isToolError(result) { + t.Error("expected tool error") + } + }) + } +} + +func TestHandleLogs_LimitCapping(t *testing.T) { + req := makeRequest("rhobs_logs", map[string]interface{}{ + "cluster_id": "will-fail-at-fetcher", + "namespace": "default", + "limit": float64(99999), + }) + result, err := handleLogs(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + // Will fail at fetcher, but proves handler didn't panic on large limit + if !isToolError(result) { + t.Error("expected tool error (invalid cluster)") + } +} + +func TestHandleLogs_NonPositiveLimit(t *testing.T) { + tests := []struct { + name string + limit float64 + }{ + {"zero", 0}, + {"negative", -5}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req := makeRequest("rhobs_logs", map[string]interface{}{ + "cluster_id": "test", + "namespace": "default", + "limit": tt.limit, + }) + result, err := handleLogs(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + if !isToolError(result) { + t.Error("expected tool error for non-positive limit") + } + if got := getResultText(result); got != "limit must be greater than 0" { + t.Errorf("error = %q, want %q", got, "limit must be greater than 0") + } + }) + } +} + +func TestHandleLogs_NonPositiveSince(t *testing.T) { + req := makeRequest("rhobs_logs", map[string]interface{}{ + "cluster_id": "will-fail-at-fetcher", + "namespace": "default", + "since": "-5m", + }) + result, err := handleLogs(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + if !isToolError(result) { + t.Error("expected tool error for negative since") + } + if got := getResultText(result); got != "'since' must be greater than 0" { + t.Errorf("error = %q, want %q", got, "'since' must be greater than 0") + } +} + +func TestHandleLogs_DefaultSinceAndLimit(t *testing.T) { + req := makeRequest("rhobs_logs", map[string]interface{}{ + "cluster_id": "will-fail-at-fetcher", + "namespace": "default", + }) + result, err := handleLogs(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + // Confirms default since="5m" and limit=500 don't cause issues + if !isToolError(result) { + t.Error("expected fetcher error") + } +} + +func TestHandleLogs_RawQueryPath(t *testing.T) { + req := makeRequest("rhobs_logs", map[string]interface{}{ + "cluster_id": "will-fail-at-fetcher", + "query": `{k8s_namespace_name="custom"} |= "error"`, + }) + result, err := handleLogs(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + // Will fail at fetcher, but proves raw query path works (namespace not required) + if !isToolError(result) { + t.Error("expected fetcher error") + } +} + +func TestHandleLogs_NamespaceWithRegex(t *testing.T) { + req := makeRequest("rhobs_logs", map[string]interface{}{ + "cluster_id": "will-fail-at-fetcher", + "namespace": "openshift-monitoring", + "contain_regex": "(?i)(error|timeout)", + }) + result, err := handleLogs(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + // Proves namespace + contain_regex combination doesn't panic + if !isToolError(result) { + t.Error("expected fetcher error") + } +} + +// --- handleAlerts validation tests --- + +func TestHandleAlerts_MissingCluster(t *testing.T) { + tests := []struct { + name string + args map[string]interface{} + expectedMsg string + }{ + {"no args", map[string]interface{}{}, "cluster_id is required"}, + {"empty cluster", map[string]interface{}{"cluster_id": ""}, "cluster_id is required"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req := makeRequest("rhobs_alerts", tt.args) + result, err := handleAlerts(context.Background(), req) + if err != nil { + t.Fatalf("handler returned Go error: %v", err) + } + if !isToolError(result) { + t.Error("expected tool error for missing cluster_id") + } + if got := getResultText(result); got != tt.expectedMsg { + t.Errorf("error = %q, want %q", got, tt.expectedMsg) + } + }) + } +} + +// --- Tool registration test --- + +func TestRegisterMcpTools(t *testing.T) { + s := mcp.NewServer(&mcp.Implementation{Name: "test", Version: "1.0.0"}, nil) + registerMcpTools(s) + + // Use in-memory transport to verify tools are actually registered + serverTransport, clientTransport := mcp.NewInMemoryTransports() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go func() { _ = s.Run(ctx, serverTransport) }() + + client := mcp.NewClient(&mcp.Implementation{Name: "test-client", Version: "1.0.0"}, nil) + session, err := client.Connect(ctx, clientTransport, nil) + if err != nil { + t.Fatalf("client connect failed: %v", err) + } + defer session.Close() + + result, err := session.ListTools(ctx, nil) + if err != nil { + t.Fatalf("ListTools failed: %v", err) + } + + expectedNames := map[string]bool{ + "rhobs_metrics": false, + "rhobs_logs": false, + "rhobs_alerts": false, + } + + if len(result.Tools) != len(expectedNames) { + t.Fatalf("expected %d tools, got %d", len(expectedNames), len(result.Tools)) + } + + for _, tool := range result.Tools { + if _, ok := expectedNames[tool.Name]; !ok { + t.Errorf("unexpected tool: %s", tool.Name) + } else { + expectedNames[tool.Name] = true + } + + // Verify annotations + if tool.Annotations == nil { + t.Errorf("tool %s missing annotations", tool.Name) + continue + } + if !tool.Annotations.ReadOnlyHint { + t.Errorf("tool %s should be read-only", tool.Name) + } + if tool.Annotations.DestructiveHint == nil || *tool.Annotations.DestructiveHint { + t.Errorf("tool %s should not be destructive", tool.Name) + } + + // Verify OutputSchema is set + if tool.OutputSchema == nil { + t.Errorf("tool %s missing OutputSchema", tool.Name) + } else { + schemaBytes, err := json.Marshal(tool.OutputSchema) + if err != nil { + t.Errorf("tool %s OutputSchema not marshalable: %v", tool.Name, err) + } else { + var schema map[string]interface{} + if err := json.Unmarshal(schemaBytes, &schema); err != nil { + t.Errorf("tool %s OutputSchema not valid JSON: %v", tool.Name, err) + } else if schema["type"] != "object" { + t.Errorf("tool %s OutputSchema type=%v, want object", tool.Name, schema["type"]) + } + } + } + } + + for name, found := range expectedNames { + if !found { + t.Errorf("expected tool %q not registered", name) + } + } +} + +func TestRegisterMcpTools_SchemaValidation(t *testing.T) { + s := mcp.NewServer(&mcp.Implementation{Name: "test", Version: "1.0.0"}, nil) + registerMcpTools(s) + + serverTransport, clientTransport := mcp.NewInMemoryTransports() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go func() { _ = s.Run(ctx, serverTransport) }() + + client := mcp.NewClient(&mcp.Implementation{Name: "test-client", Version: "1.0.0"}, nil) + session, err := client.Connect(ctx, clientTransport, nil) + if err != nil { + t.Fatalf("client connect failed: %v", err) + } + defer session.Close() + + result, err := session.ListTools(ctx, nil) + if err != nil { + t.Fatalf("ListTools failed: %v", err) + } + + for _, tool := range result.Tools { + t.Run(tool.Name+"_schema", func(t *testing.T) { + // Verify InputSchema is valid JSON + schemaBytes, err := json.Marshal(tool.InputSchema) + if err != nil { + t.Fatalf("failed to marshal schema: %v", err) + } + var schema map[string]interface{} + if err := json.Unmarshal(schemaBytes, &schema); err != nil { + t.Fatalf("InputSchema is not valid JSON: %v", err) + } + + // Verify it has the expected structure + if schema["type"] != "object" { + t.Errorf("expected type=object, got %v", schema["type"]) + } + props, ok := schema["properties"].(map[string]interface{}) + if !ok { + t.Fatal("missing properties") + } + + // All tools should have cluster_id + if _, ok := props["cluster_id"]; !ok { + t.Error("missing cluster_id property") + } + + // All tools should require cluster_id + required, ok := schema["required"].([]interface{}) + if !ok { + t.Fatal("missing required array") + } + hasClusterId := false + for _, r := range required { + if r == "cluster_id" { + hasClusterId = true + } + } + if !hasClusterId { + t.Error("cluster_id should be required") + } + }) + } +} + +// --- Roundtrip tool call test via in-memory transport --- + +func TestToolCall_Roundtrip_ValidationErrors(t *testing.T) { + s := mcp.NewServer(&mcp.Implementation{Name: "test", Version: "1.0.0"}, nil) + registerMcpTools(s) + + serverTransport, clientTransport := mcp.NewInMemoryTransports() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go func() { _ = s.Run(ctx, serverTransport) }() + + client := mcp.NewClient(&mcp.Implementation{Name: "test-client", Version: "1.0.0"}, nil) + session, err := client.Connect(ctx, clientTransport, nil) + if err != nil { + t.Fatalf("client connect failed: %v", err) + } + defer session.Close() + + tests := []struct { + name string + tool string + args map[string]interface{} + expectedMsg string + }{ + { + "metrics missing query", + "rhobs_metrics", + map[string]interface{}{"cluster_id": "test"}, + "cluster_id and query are required", + }, + { + "metrics partial range", + "rhobs_metrics", + map[string]interface{}{"cluster_id": "test", "query": "up", "start": "123"}, + "start and end must be provided together for range queries", + }, + { + "logs missing namespace", + "rhobs_logs", + map[string]interface{}{"cluster_id": "test"}, + "either namespace or query is required", + }, + { + "alerts empty cluster", + "rhobs_alerts", + map[string]interface{}{"cluster_id": ""}, + "cluster_id is required", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := session.CallTool(ctx, &mcp.CallToolParams{ + Name: tt.tool, + Arguments: tt.args, + }) + if err != nil { + t.Fatalf("CallTool returned protocol error: %v", err) + } + if !result.IsError { + t.Error("expected tool error") + } + text := "" + for _, c := range result.Content { + if tc, ok := c.(*mcp.TextContent); ok { + text = tc.Text + } + } + if text != tt.expectedMsg { + t.Errorf("error = %q, want %q", text, tt.expectedMsg) + } + }) + } +} + +// --- Config subcommand test --- + +func TestMcpConfigCommand(t *testing.T) { + cmd := newCmdMcpConfig() + + // Capture stdout + old := os.Stdout + r, w, _ := os.Pipe() + os.Stdout = w + + err := cmd.RunE(cmd, []string{}) + + w.Close() + os.Stdout = old + + if err != nil { + t.Fatalf("config command returned error: %v", err) + } + + var buf bytes.Buffer + _, _ = io.Copy(&buf, r) + output := buf.String() + + var config map[string]interface{} + if err := json.Unmarshal([]byte(output), &config); err != nil { + t.Fatalf("config output is not valid JSON: %v\noutput: %s", err, output) + } + + servers, ok := config["mcpServers"].(map[string]interface{}) + if !ok { + t.Fatal("missing mcpServers key") + } + + rhobs, ok := servers["osdctl-rhobs"].(map[string]interface{}) + if !ok { + t.Fatal("missing osdctl-rhobs server") + } + + if _, ok := rhobs["command"].(string); !ok { + t.Error("missing command field") + } + + args, ok := rhobs["args"].([]interface{}) + if !ok { + t.Fatal("missing args field") + } + if len(args) != 3 || args[0] != "rhobs" || args[1] != "mcp" || args[2] != "server" { + t.Errorf("args = %v, want [rhobs mcp server]", args) + } +} + +// --- Annotation tests --- + +func TestToolAnnotations(t *testing.T) { + if readOnlyAnnotations.ReadOnlyHint != true { + t.Error("expected ReadOnlyHint=true") + } + if readOnlyAnnotations.DestructiveHint == nil || *readOnlyAnnotations.DestructiveHint { + t.Error("expected DestructiveHint=false") + } +} diff --git a/cmd/rhobs/mcp_tools.go b/cmd/rhobs/mcp_tools.go new file mode 100644 index 000000000..fcdeb943e --- /dev/null +++ b/cmd/rhobs/mcp_tools.go @@ -0,0 +1,282 @@ +package rhobs + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/modelcontextprotocol/go-sdk/mcp" +) + +var readOnlyAnnotations = &mcp.ToolAnnotations{ + ReadOnlyHint: true, + DestructiveHint: boolPtr(false), +} + +func registerMcpTools(s *mcp.Server) { + s.AddTool(&mcp.Tool{ + Name: "rhobs_metrics", + Description: "Query RHOBS Prometheus/Thanos metrics for ROSA HCP infrastructure. " + + "Covers HCP hosted clusters, Management Clusters (MC), and Service Clusters (SC). " + + "Accepts any cluster ID or name; the correct RHOBS cell is resolved automatically. " + + "Instant query by default; add start/end for range query.", + Annotations: readOnlyAnnotations, + InputSchema: json.RawMessage(`{ + "type": "object", + "properties": { + "cluster_id": {"type": "string", "description": "Cluster ID or name (HCP, MC, or SC)"}, + "query": {"type": "string", "description": "PromQL expression"}, + "filter_cluster": {"type": "boolean", "description": "Filter to target cluster only. Default: true"}, + "start": {"type": "string", "description": "Range query start (RFC3339 or Unix timestamp). Omit for instant query."}, + "end": {"type": "string", "description": "Range query end (RFC3339 or Unix timestamp)"}, + "step": {"type": "string", "description": "Range query step (e.g., 15s, 1m). Default: 60s"} + }, + "required": ["cluster_id", "query"] + }`), + OutputSchema: json.RawMessage(`{ + "type": "object", + "properties": { + "cell": {"type": "string", "description": "RHOBS cell URL (regional Thanos/Loki endpoint)"}, + "cluster_id": {"type": "string", "description": "Internal cluster ID"}, + "environment": {"type": "string", "description": "OCM environment (production, stage, integration)"}, + "results": {"type": "array", "description": "Metric results with labels and values (instant query)"}, + "count": {"type": "integer", "description": "Number of results (instant query)"}, + "result": {"type": "object", "description": "Range query result (when start/end provided)"} + } + }`), + }, handleMetrics) + + s.AddTool(&mcp.Tool{ + Name: "rhobs_logs", + Description: "Query RHOBS Loki logs for ROSA HCP infrastructure. " + + "Covers HCP hosted clusters, Management Clusters (MC), and Service Clusters (SC). " + + "Requires an MC cluster ID since logs are collected at the MC level. " + + "The correct RHOBS cell is resolved automatically.", + Annotations: readOnlyAnnotations, + InputSchema: json.RawMessage(`{ + "type": "object", + "properties": { + "cluster_id": {"type": "string", "description": "MC cluster ID or name (logs are collected at the MC level for HCP)"}, + "namespace": {"type": "string", "description": "Kubernetes namespace. Required unless query is set."}, + "query": {"type": "string", "description": "Raw LogQL expression (overrides namespace)"}, + "contain_regex": {"type": "string", "description": "Server-side regex filter (e.g., (?i)(error|timeout))"}, + "since": {"type": "string", "description": "Duration string (e.g., 1h, 30m). Default: 5m"}, + "limit": {"type": "number", "description": "Max log entries. Default: 500, max: 10000"} + }, + "required": ["cluster_id"] + }`), + OutputSchema: json.RawMessage(`{ + "type": "object", + "properties": { + "cell": {"type": "string", "description": "RHOBS cell URL (regional Thanos/Loki endpoint)"}, + "cluster_id": {"type": "string", "description": "Internal cluster ID"}, + "environment": {"type": "string", "description": "OCM environment (production, stage, integration)"}, + "query": {"type": "string", "description": "LogQL expression that was executed"}, + "entries": {"type": "array", "description": "Log entries with timestamp, message, and stream labels"}, + "count": {"type": "integer", "description": "Number of log entries returned"} + } + }`), + }, handleLogs) + + s.AddTool(&mcp.Tool{ + Name: "rhobs_alerts", + Description: "Query firing alerts from RHOBS Alertmanager for ROSA HCP infrastructure. " + + "Covers HCP hosted clusters, Management Clusters (MC), and Service Clusters (SC). " + + "Accepts any cluster ID or name; the correct RHOBS cell is resolved automatically.", + Annotations: readOnlyAnnotations, + InputSchema: json.RawMessage(`{ + "type": "object", + "properties": { + "cluster_id": {"type": "string", "description": "Cluster ID or name (HCP, MC, or SC)"} + }, + "required": ["cluster_id"] + }`), + OutputSchema: json.RawMessage(`{ + "type": "object", + "properties": { + "cell": {"type": "string", "description": "RHOBS cell URL (regional Thanos/Loki endpoint)"}, + "cluster_id": {"type": "string", "description": "Internal cluster ID"}, + "environment": {"type": "string", "description": "OCM environment (production, stage, integration)"}, + "alerts": {"type": "array", "description": "Firing alerts with labels and annotations"} + } + }`), + }, handleAlerts) +} + +func getArgs(req *mcp.CallToolRequest) map[string]interface{} { + var args map[string]interface{} + if err := json.Unmarshal(req.Params.Arguments, &args); err != nil { + return map[string]interface{}{} + } + return args +} + +func getStringArg(args map[string]interface{}, key, defaultValue string) string { + if val, ok := args[key].(string); ok { + return val + } + return defaultValue +} + +func getIntArg(args map[string]interface{}, key string, defaultValue int) int { + if val, ok := args[key].(float64); ok { + return int(val) + } + return defaultValue +} + +func getBoolArg(args map[string]interface{}, key string, defaultValue bool) bool { + if val, ok := args[key].(bool); ok { + return val + } + return defaultValue +} + +func handleMetrics(ctx context.Context, req *mcp.CallToolRequest) (*mcp.CallToolResult, error) { + args := getArgs(req) + clusterId := getStringArg(args, "cluster_id", "") + query := getStringArg(args, "query", "") + filterCluster := getBoolArg(args, "filter_cluster", true) + start := getStringArg(args, "start", "") + end := getStringArg(args, "end", "") + step := getStringArg(args, "step", "60s") + + if clusterId == "" || query == "" { + return mcpError("cluster_id and query are required") + } + if (start == "") != (end == "") { + return mcpError("start and end must be provided together for range queries") + } + + fetcher, err := getCachedFetcher(clusterId, RhobsFetchForMetrics) + if err != nil { + return mcpError("Failed to initialize RHOBS fetcher: %v", err) + } + + cellInfo := map[string]interface{}{ + "cell": fetcher.RhobsCell, + "cluster_id": fetcher.clusterId, + "environment": fetcher.ocmEnvName, + } + + if start != "" && end != "" { + rawResult, err := fetcher.QueryRangeMetrics(ctx, query, start, end, step) + if err != nil { + return mcpError("Range metrics query failed: %v", err) + } + var parsed interface{} + if err := json.Unmarshal(rawResult, &parsed); err != nil { + cellInfo["result_raw"] = string(rawResult) + return mcpResultJSON(cellInfo) + } + cellInfo["result"] = parsed + return mcpResultJSON(cellInfo) + } + + results, err := fetcher.QueryInstantMetrics(ctx, query, filterCluster) + if err != nil { + return mcpError("Metrics query failed: %v", err) + } + cellInfo["results"] = results + cellInfo["count"] = len(results) + return mcpResultJSON(cellInfo) +} + +func handleLogs(ctx context.Context, req *mcp.CallToolRequest) (*mcp.CallToolResult, error) { + args := getArgs(req) + clusterId := getStringArg(args, "cluster_id", "") + namespace := getStringArg(args, "namespace", "") + rawQuery := getStringArg(args, "query", "") + containRegex := getStringArg(args, "contain_regex", "") + sinceStr := getStringArg(args, "since", "5m") + limit := getIntArg(args, "limit", 500) + + if clusterId == "" { + return mcpError("cluster_id is required") + } + if namespace == "" && rawQuery == "" { + return mcpError("either namespace or query is required") + } + if limit <= 0 { + return mcpError("limit must be greater than 0") + } + if limit > 10000 { + limit = 10000 + } + + duration, err := time.ParseDuration(sinceStr) + if err != nil { + return mcpError("Invalid 'since' duration '%s': %v", sinceStr, err) + } + if duration <= 0 { + return mcpError("'since' must be greater than 0") + } + + fetcher, err := getCachedFetcher(clusterId, RhobsFetchForLogs) + if err != nil { + return mcpError("Failed to initialize RHOBS fetcher: %v", err) + } + + var lokiExpr string + if rawQuery != "" { + lokiExpr = rawQuery + } else { + lokiExpr = fmt.Sprintf(`{k8s_namespace_name="%s"}`, namespace) + if containRegex != "" { + lokiExpr += fmt.Sprintf(` |~ "%s"`, containRegex) + } + lokiExpr += fmt.Sprintf(` | openshift_cluster_id = "%s"`, fetcher.clusterExternalId) + } + + now := time.Now() + startTime := now.Add(-duration) + + entries, err := fetcher.QueryLogs(ctx, lokiExpr, startTime, now, limit) + if err != nil { + return mcpError("Log query failed: %v", err) + } + + return mcpResultJSON(map[string]interface{}{ + "cell": fetcher.RhobsCell, + "cluster_id": fetcher.clusterId, + "environment": fetcher.ocmEnvName, + "query": lokiExpr, + "entries": entries, + "count": len(entries), + }) +} + +func handleAlerts(ctx context.Context, req *mcp.CallToolRequest) (*mcp.CallToolResult, error) { + args := getArgs(req) + clusterId := getStringArg(args, "cluster_id", "") + + if clusterId == "" { + return mcpError("cluster_id is required") + } + + fetcher, err := getCachedFetcher(clusterId, RhobsFetchForMetrics) + if err != nil { + return mcpError("Failed to initialize RHOBS fetcher: %v", err) + } + + rawResult, err := fetcher.QueryAlerts(ctx) + if err != nil { + return mcpError("Alerts query failed: %v", err) + } + + result := map[string]interface{}{ + "cell": fetcher.RhobsCell, + "cluster_id": fetcher.clusterId, + "environment": fetcher.ocmEnvName, + } + + var parsed interface{} + if err := json.Unmarshal(rawResult, &parsed); err != nil { + result["alerts_raw"] = string(rawResult) + } else { + result["alerts"] = parsed + } + + return mcpResultJSON(result) +} diff --git a/cmd/rhobs/rootCmd.go b/cmd/rhobs/rootCmd.go index e4aaf69bc..a1a16db81 100644 --- a/cmd/rhobs/rootCmd.go +++ b/cmd/rhobs/rootCmd.go @@ -17,6 +17,11 @@ func NewCmdRhobs() *cobra.Command { Args: cobra.NoArgs, SilenceErrors: true, PersistentPreRunE: func(cmd *cobra.Command, args []string) error { + for c := cmd; c != nil; c = c.Parent() { + if c.Name() == "mcp" { + return nil + } + } if commonOptions.clusterId == "" { var err error @@ -33,6 +38,7 @@ func NewCmdRhobs() *cobra.Command { cmd.AddCommand(newCmdCell()) cmd.AddCommand(newCmdLogs()) cmd.AddCommand(newCmdMetrics()) + cmd.AddCommand(newCmdMcp()) cmd.PersistentFlags().StringVarP(&commonOptions.clusterId, "cluster-id", "C", "", "Name or Internal ID of the cluster (defaults to current cluster context)") cmd.PersistentFlags().StringVar(&commonOptions.hiveOcmUrl, "hive-ocm-url", "production", `OCM environment URL for hive operations - aliases: "production", "staging", "integration"`) diff --git a/docs/README.md b/docs/README.md index 76b8c93d1..f39f8a19f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -142,6 +142,9 @@ - `rhobs` - RHOBS.next related utilities - `cell` - Get the RHOBS cell for a given cluster - `logs [pod]` - Fetch logs from RHOBS for a given cluster + - `mcp` - RHOBS MCP server for AI agent integration + - `config` - Print MCP client configuration JSON + - `server` - Start the RHOBS MCP server - `metrics prometheus-expression` - Fetch metrics from RHOBS for a given cluster - `servicelog` - OCM/Hive Service log - `list --cluster-id [flags] [options]` - Get service logs for a given cluster identifier. @@ -4344,6 +4347,77 @@ osdctl rhobs logs [pod] [flags] --ts Print metadata timestamps - to be used when log messages do not have a timestamp - not possible with the "json" output format ``` +### osdctl rhobs mcp + +MCP (Model Context Protocol) server that exposes RHOBS metrics, logs, +and alerts querying as tools for AI agents. + +Compatible with any MCP client (Claude Code, Cursor, Windsurf, custom agents). + +Subcommands: + server Start the stdio MCP server + config Print MCP client configuration JSON + +Quick start: + claude --mcp-config "$(osdctl rhobs mcp config)" + +Prerequisites: + - OCM login: ocm login --use-auth-code --url + - Vault login: VAULT_ADDR=https://vault.devshift.net vault login -method=oidc + - osdctl config: ~/.config/osdctl must have rhobs__vault_path entries + +``` +osdctl rhobs mcp [flags] +``` + +#### Flags + +``` + -C, --cluster-id string Name or Internal ID of the cluster (defaults to current cluster context) + -h, --help help for mcp + --hive-ocm-url string OCM environment URL for hive operations - aliases: "production", "staging", "integration" (default "production") + -S, --skip-version-check skip checking to see if this is the most recent release +``` + +### osdctl rhobs mcp config + +Print MCP client configuration JSON for use with AI agents. + +Usage with Claude Code: + claude --mcp-config "$(osdctl rhobs mcp config)" + +Or add to ~/.claude/mcp_settings.json manually. + +``` +osdctl rhobs mcp config [flags] +``` + +#### Flags + +``` + -C, --cluster-id string Name or Internal ID of the cluster (defaults to current cluster context) + -h, --help help for config + --hive-ocm-url string OCM environment URL for hive operations - aliases: "production", "staging", "integration" (default "production") + -S, --skip-version-check skip checking to see if this is the most recent release +``` + +### osdctl rhobs mcp server + +Start the RHOBS MCP server + +``` +osdctl rhobs mcp server [flags] +``` + +#### Flags + +``` + -C, --cluster-id string Name or Internal ID of the cluster (defaults to current cluster context) + -h, --help help for server + --hive-ocm-url string OCM environment URL for hive operations - aliases: "production", "staging", "integration" (default "production") + -S, --skip-version-check skip checking to see if this is the most recent release +``` + ### osdctl rhobs metrics Fetch metrics from RHOBS for a given cluster diff --git a/docs/osdctl_rhobs.md b/docs/osdctl_rhobs.md index 3846b2321..885676dcf 100644 --- a/docs/osdctl_rhobs.md +++ b/docs/osdctl_rhobs.md @@ -21,5 +21,6 @@ RHOBS.next related utilities * [osdctl](osdctl.md) - OSD CLI * [osdctl rhobs cell](osdctl_rhobs_cell.md) - Get the RHOBS cell for a given cluster * [osdctl rhobs logs](osdctl_rhobs_logs.md) - Fetch logs from RHOBS for a given cluster +* [osdctl rhobs mcp](osdctl_rhobs_mcp.md) - RHOBS MCP server for AI agent integration * [osdctl rhobs metrics](osdctl_rhobs_metrics.md) - Fetch metrics from RHOBS for a given cluster diff --git a/docs/osdctl_rhobs_mcp.md b/docs/osdctl_rhobs_mcp.md new file mode 100644 index 000000000..4fb106ad9 --- /dev/null +++ b/docs/osdctl_rhobs_mcp.md @@ -0,0 +1,43 @@ +## osdctl rhobs mcp + +RHOBS MCP server for AI agent integration + +### Synopsis + +MCP (Model Context Protocol) server that exposes RHOBS metrics, logs, +and alerts querying as tools for AI agents. + +Compatible with any MCP client (Claude Code, Cursor, Windsurf, custom agents). + +Subcommands: + server Start the stdio MCP server + config Print MCP client configuration JSON + +Quick start: + claude --mcp-config "$(osdctl rhobs mcp config)" + +Prerequisites: + - OCM login: ocm login --use-auth-code --url + - Vault login: VAULT_ADDR=https://vault.devshift.net vault login -method=oidc + - osdctl config: ~/.config/osdctl must have rhobs__vault_path entries + +### Options + +``` + -h, --help help for mcp +``` + +### Options inherited from parent commands + +``` + -C, --cluster-id string Name or Internal ID of the cluster (defaults to current cluster context) + --hive-ocm-url string OCM environment URL for hive operations - aliases: "production", "staging", "integration" (default "production") + -S, --skip-version-check skip checking to see if this is the most recent release +``` + +### SEE ALSO + +* [osdctl rhobs](osdctl_rhobs.md) - RHOBS.next related utilities +* [osdctl rhobs mcp config](osdctl_rhobs_mcp_config.md) - Print MCP client configuration JSON +* [osdctl rhobs mcp server](osdctl_rhobs_mcp_server.md) - Start the RHOBS MCP server + diff --git a/docs/osdctl_rhobs_mcp_config.md b/docs/osdctl_rhobs_mcp_config.md new file mode 100644 index 000000000..02cf96a61 --- /dev/null +++ b/docs/osdctl_rhobs_mcp_config.md @@ -0,0 +1,35 @@ +## osdctl rhobs mcp config + +Print MCP client configuration JSON + +### Synopsis + +Print MCP client configuration JSON for use with AI agents. + +Usage with Claude Code: + claude --mcp-config "$(osdctl rhobs mcp config)" + +Or add to ~/.claude/mcp_settings.json manually. + +``` +osdctl rhobs mcp config [flags] +``` + +### Options + +``` + -h, --help help for config +``` + +### Options inherited from parent commands + +``` + -C, --cluster-id string Name or Internal ID of the cluster (defaults to current cluster context) + --hive-ocm-url string OCM environment URL for hive operations - aliases: "production", "staging", "integration" (default "production") + -S, --skip-version-check skip checking to see if this is the most recent release +``` + +### SEE ALSO + +* [osdctl rhobs mcp](osdctl_rhobs_mcp.md) - RHOBS MCP server for AI agent integration + diff --git a/docs/osdctl_rhobs_mcp_server.md b/docs/osdctl_rhobs_mcp_server.md new file mode 100644 index 000000000..33a141e58 --- /dev/null +++ b/docs/osdctl_rhobs_mcp_server.md @@ -0,0 +1,26 @@ +## osdctl rhobs mcp server + +Start the RHOBS MCP server + +``` +osdctl rhobs mcp server [flags] +``` + +### Options + +``` + -h, --help help for server +``` + +### Options inherited from parent commands + +``` + -C, --cluster-id string Name or Internal ID of the cluster (defaults to current cluster context) + --hive-ocm-url string OCM environment URL for hive operations - aliases: "production", "staging", "integration" (default "production") + -S, --skip-version-check skip checking to see if this is the most recent release +``` + +### SEE ALSO + +* [osdctl rhobs mcp](osdctl_rhobs_mcp.md) - RHOBS MCP server for AI agent integration + diff --git a/go.mod b/go.mod index 7be8907e4..e87c02a81 100644 --- a/go.mod +++ b/go.mod @@ -30,12 +30,13 @@ require ( github.com/deckarep/golang-set v1.8.0 github.com/fatih/color v1.18.0 github.com/go-git/go-git/v5 v5.17.0 - github.com/golang-jwt/jwt/v5 v5.3.0 + github.com/golang-jwt/jwt/v5 v5.3.1 github.com/google/go-github/v63 v63.0.0 github.com/google/uuid v1.6.0 github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 github.com/hashicorp/hcl/v2 v2.23.0 github.com/manifoldco/promptui v0.9.0 + github.com/modelcontextprotocol/go-sdk v1.6.0 github.com/observatorium/api v0.1.3-0.20260127104902-9709b1e9e333 github.com/olekukonko/tablewriter v0.0.5 github.com/onsi/ginkgo v1.16.5 @@ -160,6 +161,7 @@ require ( github.com/google/gnostic-models v0.7.0 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/go-querystring v1.1.0 // indirect + github.com/google/jsonschema-go v0.4.3 // indirect github.com/google/s2a-go v0.1.9 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.7 // indirect github.com/googleapis/gax-go/v2 v2.16.0 // indirect @@ -215,6 +217,8 @@ require ( github.com/rivo/uniseg v0.4.7 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/sagikazarmark/locafero v0.12.0 // indirect + github.com/segmentio/asm v1.1.3 // indirect + github.com/segmentio/encoding v0.5.4 // indirect github.com/sergi/go-diff v1.4.0 // indirect github.com/skeema/knownhosts v1.3.1 // indirect github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966 // indirect @@ -226,6 +230,7 @@ require ( github.com/x448/float16 v0.8.4 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect github.com/xlab/treeprint v1.2.0 // indirect + github.com/yosida95/uritemplate/v3 v3.0.2 // indirect github.com/zalando/go-keyring v0.2.6 // indirect gitlab.com/c0b/go-ordered-json v0.0.0-20201030195603-febf46534d5a // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect diff --git a/go.sum b/go.sum index d782fd54e..6af069577 100644 --- a/go.sum +++ b/go.sum @@ -260,8 +260,8 @@ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v4 v4.5.2 h1:YtQM7lnr8iZ+j5q71MGKkNw9Mn7AjHM68uc9g5fXeUI= github.com/golang-jwt/jwt/v4 v4.5.2/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= -github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= -github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY= +github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v1.2.5 h1:DrW6hGnjIhtvhOIiAKT6Psh/Kd/ldepEa81DKeiRJ5I= github.com/golang/glog v1.2.5/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= @@ -299,6 +299,8 @@ github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/jsonschema-go v0.4.3 h1:/DBOLZTfDow7pe2GmaJNhltueGTtDKICi8V8p+DQPd0= +github.com/google/jsonschema-go v0.4.3/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83 h1:z2ogiKUYzX5Is6zr/vP9vJGqPwcdqsWjOt+V8J7+bTc= github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83/go.mod h1:MxpfABSjhmINe3F1It9d+8exIHFvUqtLIRCdOGNXqiI= @@ -413,6 +415,8 @@ github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ= github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc= +github.com/modelcontextprotocol/go-sdk v1.6.0 h1:PPLS3kn7WtOEnR+Af4X5H96SG0qSab8R/ZQT/HkhPkY= +github.com/modelcontextprotocol/go-sdk v1.6.0/go.mod h1:kzm3kzFL1/+AziGOE0nUs3gvPoNxMCvkxokMkuFapXQ= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -530,6 +534,10 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sagikazarmark/locafero v0.12.0 h1:/NQhBAkUb4+fH1jivKHWusDYFjMOOKU88eegjfxfHb4= github.com/sagikazarmark/locafero v0.12.0/go.mod h1:sZh36u/YSZ918v0Io+U9ogLYQJ9tLLBmM4eneO6WwsI= +github.com/segmentio/asm v1.1.3 h1:WM03sfUOENvvKexOLp+pCqgb/WDjsi7EK8gIsICtzhc= +github.com/segmentio/asm v1.1.3/go.mod h1:Ld3L4ZXGNcSLRg4JBsZ3//1+f/TjYl0Mzen/DQy1EJg= +github.com/segmentio/encoding v0.5.4 h1:OW1VRern8Nw6ITAtwSZ7Idrl3MXCFwXHPgqESYfvNt0= +github.com/segmentio/encoding v0.5.4/go.mod h1:HS1ZKa3kSN32ZHVZ7ZLPLXWvOVIiZtyJnO1gPH1sKt0= github.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw= github.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= @@ -580,6 +588,8 @@ github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ= github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= +github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= +github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=