From 52752a0b3eee74b7c8ea18c790b91d4c5c577656 Mon Sep 17 00:00:00 2001 From: Anwardeen A Date: Wed, 13 May 2026 16:23:27 +0530 Subject: [PATCH 1/2] Add sre-agent AI subcommand integration --- cmd/ai/cmd.go | 19 +++ cmd/ai/sre_agent/helper.go | 30 +++++ cmd/ai/sre_agent/sre_agent.go | 124 ++++++++++++++++++ cmd/ai/sre_agent/validate_sre_agent.go | 50 +++++++ cmd/ai/sre_agent/validate_sre_agent_config.go | 94 +++++++++++++ cmd/cmd.go | 2 + 6 files changed, 319 insertions(+) create mode 100644 cmd/ai/cmd.go create mode 100644 cmd/ai/sre_agent/helper.go create mode 100644 cmd/ai/sre_agent/sre_agent.go create mode 100644 cmd/ai/sre_agent/validate_sre_agent.go create mode 100644 cmd/ai/sre_agent/validate_sre_agent_config.go diff --git a/cmd/ai/cmd.go b/cmd/ai/cmd.go new file mode 100644 index 000000000..bdb8ad9f8 --- /dev/null +++ b/cmd/ai/cmd.go @@ -0,0 +1,19 @@ +package ai + +import ( + sreagent "github.com/openshift/osdctl/cmd/ai/sre_agent" + "github.com/spf13/cobra" +) + +// NewCmdAI implements the base AI command +func NewCmdAI() *cobra.Command { + aiCmd := &cobra.Command{ + Use: "ai", + Short: "AI-powered tools for SRE automation", + Args: cobra.NoArgs, + } + + aiCmd.AddCommand(sreagent.NewCmdSreAgent()) + + return aiCmd +} diff --git a/cmd/ai/sre_agent/helper.go b/cmd/ai/sre_agent/helper.go new file mode 100644 index 000000000..a7bc83d80 --- /dev/null +++ b/cmd/ai/sre_agent/helper.go @@ -0,0 +1,30 @@ +package sreagent + +import ( + "bufio" + "fmt" + "os" + "os/exec" + "strings" +) + +// copyRepository copies a directory recursively +func copyRepository(sourcePath, destPath string) error { + fmt.Fprintf(os.Stderr, "Copying repository to %s...\n", destPath) + cmd := exec.Command("cp", "-r", sourcePath, destPath) + cmd.Stdout = os.Stderr + cmd.Stderr = os.Stderr + + if err := cmd.Run(); err != nil { + return err + } + + return nil +} + +// promptUserInput reads a line of user input from stdin +func promptUserInput() string { + reader := bufio.NewReader(os.Stdin) + input, _ := reader.ReadString('\n') + return strings.ToLower(strings.TrimSpace(input)) +} diff --git a/cmd/ai/sre_agent/sre_agent.go b/cmd/ai/sre_agent/sre_agent.go new file mode 100644 index 000000000..569942037 --- /dev/null +++ b/cmd/ai/sre_agent/sre_agent.go @@ -0,0 +1,124 @@ +package sreagent + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + + "github.com/spf13/cobra" + cmdutil "k8s.io/kubectl/pkg/cmd/util" +) + +var ( + pdURL string + autoExecute bool + outputDir string +) + +const ( + sreAgentDescription = ` + SRE Agent is an AI-powered tool that helps SREs triage alerts and diagnose issues. + It automatically fetches incident details from PagerDuty, finds relevant SOPs, + and executes diagnostic commands on clusters. +` + + sreAgentExample = ` + # Interactive mode (asks for confirmation at each step) + osdctl ai sre-agent --pd-url "${PD_URL}" + + # Fully automated mode (no confirmations) + osdctl ai sre-agent --pd-url "${PD_URL}" --auto-execute + + # Specify output directory for sre-agent files + osdctl ai sre-agent --pd-url "${PD_URL}" --output /tmp/sre-agent-output +` +) + +func NewCmdSreAgent() *cobra.Command { + sreAgentCmd := &cobra.Command{ + Use: "sre-agent", + Short: "Run SRE Agent for automated incident investigation", + Long: sreAgentDescription, + Example: sreAgentExample, + Args: cobra.ArbitraryArgs, + SilenceUsage: true, + SilenceErrors: true, + Run: func(cmd *cobra.Command, args []string) { + homeDir, err := os.UserHomeDir() + if err != nil { + cmdutil.CheckErr(fmt.Errorf("failed to get home directory: %w", err)) + return + } + + // Step 1: Validate sre-agent installation + if !validateSreAgent(homeDir) { + return + } + + // Step 2: Check/Setup config (includes ops-sop setup) + if !checkSreAgentConfig(homeDir) { + return + } + + // Step 3: Execute sre-agent + sreAgentPath := filepath.Join(homeDir, ".local/share/sre-agent/venv/bin/sre-agent") + sreAgentArgs := buildSreAgentArgs(args) + + err = executeSreAgent(sreAgentPath, sreAgentArgs, outputDir) + if err != nil { + cmdutil.CheckErr(err) + } + }, + } + + sreAgentCmd.Flags().StringVar(&pdURL, "pd-url", "", "PagerDuty incident URL (required)") + sreAgentCmd.Flags().BoolVar(&autoExecute, "auto-execute", false, "Fully automated mode without confirmations") + sreAgentCmd.Flags().StringVar(&outputDir, "output", "", "Output directory for sre-agent files (default: current directory)") + + // Mark pd-url as required + sreAgentCmd.MarkFlagRequired("pd-url") + + return sreAgentCmd +} + +// buildSreAgentArgs constructs the argument list for sre-agent command +func buildSreAgentArgs(additionalArgs []string) []string { + args := []string{} + + if pdURL != "" { + args = append(args, "--pd-url", pdURL) + } + + if autoExecute { + args = append(args, "--auto-execute") + } + + // Add any additional arguments passed + args = append(args, additionalArgs...) + + return args +} + +// executeSreAgent runs the sre-agent command with provided arguments +func executeSreAgent(sreAgentPath string, args []string, outputDir string) error { + cmd := exec.Command(sreAgentPath, args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Stdin = os.Stdin + + // Set working directory if output directory is specified + if outputDir != "" { + // Create directory if it doesn't exist + if err := os.MkdirAll(outputDir, 0755); err != nil { + return fmt.Errorf("failed to create output directory: %w", err) + } + cmd.Dir = outputDir + } + + if err := cmd.Run(); err != nil { + return fmt.Errorf("sre-agent execution failed: %w", err) + } + + return nil +} diff --git a/cmd/ai/sre_agent/validate_sre_agent.go b/cmd/ai/sre_agent/validate_sre_agent.go new file mode 100644 index 000000000..f27343ac4 --- /dev/null +++ b/cmd/ai/sre_agent/validate_sre_agent.go @@ -0,0 +1,50 @@ +package sreagent + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/openshift/osdctl/internal/utils" + cmdutil "k8s.io/kubectl/pkg/cmd/util" +) + +// validateSreAgent checks if sre-agent is installed +func validateSreAgent(homeDir string) bool { + baseDir := filepath.Join(homeDir, ".local/share/sre-agent") + venvBinary := filepath.Join(baseDir, "venv/bin/sre-agent") + + // Check if sre-agent binary exists + if utils.FileExists(venvBinary) { + return true // Already installed + } + + fmt.Fprintf(os.Stderr, "sre-agent is not found in ~/.local/share/sre-agent/venv/\n\n") + + // Ask for path to sre-agent venv + fmt.Fprint(os.Stderr, "Enter the absolute path to sre-agent venv directory: ") + userVenvPath := promptUserInput() + + // Validate venv binary exists in provided path + userVenvBinary := filepath.Join(userVenvPath, "bin/sre-agent") + if !utils.FileExists(userVenvBinary) { + fmt.Fprintln(os.Stderr, "\nsre-agent isn't installed") + return false + } + + // Create base directory + if err := os.MkdirAll(baseDir, 0755); err != nil { + cmdutil.CheckErr(fmt.Errorf("failed to create base directory: %w", err)) + return false + } + + // Copy venv to ~/.local/share/sre-agent/venv + venvPath := filepath.Join(baseDir, "venv") + if err := copyRepository(userVenvPath, venvPath); err != nil { + fmt.Fprintf(os.Stderr, "\nCopy failed: %v\n", err) + return false + } + + fmt.Fprintln(os.Stderr, "\n✓ sre-agent venv copied successfully") + return true +} diff --git a/cmd/ai/sre_agent/validate_sre_agent_config.go b/cmd/ai/sre_agent/validate_sre_agent_config.go new file mode 100644 index 000000000..16b715cf9 --- /dev/null +++ b/cmd/ai/sre_agent/validate_sre_agent_config.go @@ -0,0 +1,94 @@ +package sreagent + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/openshift/osdctl/internal/utils" + "gopkg.in/yaml.v3" +) + +// checkSreAgentConfig validates config.yaml and updates ops-sop path if needed +func checkSreAgentConfig(homeDir string) bool { + baseDir := filepath.Join(homeDir, ".local/share/sre-agent") + configPath := filepath.Join(homeDir, ".config/sre-agent/config.yaml") + + // Check if config exists + if !utils.FileExists(configPath) { + fmt.Fprintln(os.Stderr, "\nsre-agent not configured") + fmt.Fprintln(os.Stderr, "Config file not found at:", configPath) + return false + } + + // Read existing config + data, err := os.ReadFile(configPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to read config: %v\n", err) + return false + } + + // Parse YAML + var config map[string]interface{} + if err := yaml.Unmarshal(data, &config); err != nil { + fmt.Fprintf(os.Stderr, "Failed to parse config: %v\n", err) + return false + } + + // Get current sop directory from config + sop, ok := config["sop"].(map[string]interface{}) + if !ok { + fmt.Fprintln(os.Stderr, "Invalid config: sop section not found") + return false + } + + currentSopDir, _ := sop["directory"].(string) + + // Ask user for ops-sop repository path + fmt.Fprintln(os.Stderr, "\nChecking ops-sop repository...") + fmt.Fprint(os.Stderr, "Enter the absolute path to ops-sop repository: ") + userOpsSopPath := promptUserInput() + + // Validate path exists + if !utils.FolderExists(userOpsSopPath) { + fmt.Fprintln(os.Stderr, "\nThe provided ops-sop path does not exist.") + return false + } + + opsSopPath := filepath.Join(baseDir, "ops-sop") + + // Copy ops-sop if not present + if !utils.FolderExists(opsSopPath) { + if err := copyRepository(userOpsSopPath, opsSopPath); err != nil { + fmt.Fprintf(os.Stderr, "\nCopy failed: %v\n", err) + return false + } + fmt.Fprintln(os.Stderr, "✓ ops-sop copied successfully") + } else { + fmt.Fprintln(os.Stderr, "✓ ops-sop repository found") + } + + // Check if sop directory in config is different from expected + if currentSopDir != opsSopPath { + // Update config with new path + sop["directory"] = opsSopPath + + // Write updated config + updatedData, err := yaml.Marshal(config) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to marshal config: %v\n", err) + return false + } + + if err := os.WriteFile(configPath, updatedData, 0600); err != nil { + fmt.Fprintf(os.Stderr, "Failed to write config: %v\n", err) + return false + } + + fmt.Fprintf(os.Stderr, "✓ ops-sop path updated in config: %s\n\n", opsSopPath) + } else { + fmt.Fprintf(os.Stderr, "✓ ops-sop path is correct: %s\n\n", opsSopPath) + } + + return true +} diff --git a/cmd/cmd.go b/cmd/cmd.go index 5b0b912f6..bb76f5f64 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -19,6 +19,7 @@ import ( "github.com/openshift/osdctl/cmd/aao" "github.com/openshift/osdctl/cmd/account" + "github.com/openshift/osdctl/cmd/ai" "github.com/openshift/osdctl/cmd/alerts" "github.com/openshift/osdctl/cmd/cloudtrail" "github.com/openshift/osdctl/cmd/cluster" @@ -99,6 +100,7 @@ func NewCmdRoot(streams genericclioptions.IOStreams) *cobra.Command { // add sub commands addToRootCmdWithOtherGlobalOpts(aao.NewCmdAao(kubeClient)) addToRootCmdWithOtherGlobalOpts(account.NewCmdAccount(streams, kubeClient, globalOpts)) + rootCmd.AddCommand(ai.NewCmdAI()) addToRootCmdWithOtherGlobalOpts(alerts.NewCmdAlerts()) addToRootCmdWithOtherGlobalOpts(cloudtrail.NewCloudtrailCmd()) addToRootCmdWithOtherGlobalOpts(cluster.NewCmdCluster(streams, kubeClient, globalOpts)) From a7a328baa8e853a92f8a1d5ee354796e72f3281d Mon Sep 17 00:00:00 2001 From: Anwardeen A Date: Thu, 14 May 2026 11:31:50 +0530 Subject: [PATCH 2/2] Fixing PR failure and code rabbit comment --- cmd/ai/sre_agent/helper.go | 9 ++-- cmd/ai/sre_agent/sre_agent.go | 5 +- cmd/ai/sre_agent/validate_sre_agent.go | 7 ++- cmd/ai/sre_agent/validate_sre_agent_config.go | 12 ++++- docs/README.md | 39 +++++++++++++++ docs/osdctl.md | 1 + docs/osdctl_ai.md | 21 ++++++++ docs/osdctl_ai_sre-agent.md | 50 +++++++++++++++++++ 8 files changed, 135 insertions(+), 9 deletions(-) create mode 100644 docs/osdctl_ai.md create mode 100644 docs/osdctl_ai_sre-agent.md diff --git a/cmd/ai/sre_agent/helper.go b/cmd/ai/sre_agent/helper.go index a7bc83d80..163376718 100644 --- a/cmd/ai/sre_agent/helper.go +++ b/cmd/ai/sre_agent/helper.go @@ -23,8 +23,11 @@ func copyRepository(sourcePath, destPath string) error { } // promptUserInput reads a line of user input from stdin -func promptUserInput() string { +func promptUserInput() (string, error) { reader := bufio.NewReader(os.Stdin) - input, _ := reader.ReadString('\n') - return strings.ToLower(strings.TrimSpace(input)) + input, err := reader.ReadString('\n') + if err != nil { + return "", fmt.Errorf("failed to read input: %w", err) + } + return strings.ToLower(strings.TrimSpace(input)), nil } diff --git a/cmd/ai/sre_agent/sre_agent.go b/cmd/ai/sre_agent/sre_agent.go index 569942037..d0eb1b2db 100644 --- a/cmd/ai/sre_agent/sre_agent.go +++ b/cmd/ai/sre_agent/sre_agent.go @@ -48,7 +48,6 @@ func NewCmdSreAgent() *cobra.Command { homeDir, err := os.UserHomeDir() if err != nil { cmdutil.CheckErr(fmt.Errorf("failed to get home directory: %w", err)) - return } // Step 1: Validate sre-agent installation @@ -77,7 +76,9 @@ func NewCmdSreAgent() *cobra.Command { sreAgentCmd.Flags().StringVar(&outputDir, "output", "", "Output directory for sre-agent files (default: current directory)") // Mark pd-url as required - sreAgentCmd.MarkFlagRequired("pd-url") + if err := sreAgentCmd.MarkFlagRequired("pd-url"); err != nil { + fmt.Fprintf(os.Stderr, "Failed to mark pd-url as required: %v\n", err) + } return sreAgentCmd } diff --git a/cmd/ai/sre_agent/validate_sre_agent.go b/cmd/ai/sre_agent/validate_sre_agent.go index f27343ac4..884bb1026 100644 --- a/cmd/ai/sre_agent/validate_sre_agent.go +++ b/cmd/ai/sre_agent/validate_sre_agent.go @@ -23,7 +23,11 @@ func validateSreAgent(homeDir string) bool { // Ask for path to sre-agent venv fmt.Fprint(os.Stderr, "Enter the absolute path to sre-agent venv directory: ") - userVenvPath := promptUserInput() + userVenvPath, err := promptUserInput() + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to read input: %v\n", err) + return false + } // Validate venv binary exists in provided path userVenvBinary := filepath.Join(userVenvPath, "bin/sre-agent") @@ -35,7 +39,6 @@ func validateSreAgent(homeDir string) bool { // Create base directory if err := os.MkdirAll(baseDir, 0755); err != nil { cmdutil.CheckErr(fmt.Errorf("failed to create base directory: %w", err)) - return false } // Copy venv to ~/.local/share/sre-agent/venv diff --git a/cmd/ai/sre_agent/validate_sre_agent_config.go b/cmd/ai/sre_agent/validate_sre_agent_config.go index 16b715cf9..bc3f7efe0 100644 --- a/cmd/ai/sre_agent/validate_sre_agent_config.go +++ b/cmd/ai/sre_agent/validate_sre_agent_config.go @@ -42,12 +42,20 @@ func checkSreAgentConfig(homeDir string) bool { return false } - currentSopDir, _ := sop["directory"].(string) + currentSopDir, ok := sop["directory"].(string) + if !ok { + fmt.Fprintln(os.Stderr, "Invalid config: sop directory is not a string") + return false + } // Ask user for ops-sop repository path fmt.Fprintln(os.Stderr, "\nChecking ops-sop repository...") fmt.Fprint(os.Stderr, "Enter the absolute path to ops-sop repository: ") - userOpsSopPath := promptUserInput() + userOpsSopPath, err := promptUserInput() + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to read input: %v\n", err) + return false + } // Validate path exists if !utils.FolderExists(userOpsSopPath) { diff --git a/docs/README.md b/docs/README.md index bbaa4e80c..786db4b09 100644 --- a/docs/README.md +++ b/docs/README.md @@ -29,6 +29,8 @@ - `describe` - Describe AWS service-quotas - `set ` - Set AWS Account CR status - `verify-secrets []` - Verify AWS Account CR IAM User credentials +- `ai` - AI-powered tools for SRE automation + - `sre-agent` - Run SRE Agent for automated incident investigation - `alert` - List alerts - `list --cluster-id --level [warning, critical, firing, pending, all]` - List all alerts or based on severity - `silence` - add, expire and list silence associated with alerts @@ -908,6 +910,43 @@ osdctl account verify-secrets [] [flags] --verbose Verbose output ``` +### osdctl ai + +AI-powered tools for SRE automation + +``` +osdctl ai [flags] +``` + +#### Flags + +``` + -h, --help help for ai + -S, --skip-version-check skip checking to see if this is the most recent release +``` + +### osdctl ai sre-agent + + + SRE Agent is an AI-powered tool that helps SREs triage alerts and diagnose issues. + It automatically fetches incident details from PagerDuty, finds relevant SOPs, + and executes diagnostic commands on clusters. + + +``` +osdctl ai sre-agent [flags] +``` + +#### Flags + +``` + --auto-execute Fully automated mode without confirmations + -h, --help help for sre-agent + --output string Output directory for sre-agent files (default: current directory) + --pd-url string PagerDuty incident URL (required) + -S, --skip-version-check skip checking to see if this is the most recent release +``` + ### osdctl alert List alerts diff --git a/docs/osdctl.md b/docs/osdctl.md index 19307be76..65bbfa91a 100644 --- a/docs/osdctl.md +++ b/docs/osdctl.md @@ -17,6 +17,7 @@ CLI tool to provide OSD related utilities * [osdctl aao](osdctl_aao.md) - AWS Account Operator Debugging Utilities * [osdctl account](osdctl_account.md) - AWS Account related utilities +* [osdctl ai](osdctl_ai.md) - AI-powered tools for SRE automation * [osdctl alert](osdctl_alert.md) - List alerts * [osdctl cloudtrail](osdctl_cloudtrail.md) - AWS CloudTrail related utilities * [osdctl cluster](osdctl_cluster.md) - Provides information for a specified cluster diff --git a/docs/osdctl_ai.md b/docs/osdctl_ai.md new file mode 100644 index 000000000..8910f41d9 --- /dev/null +++ b/docs/osdctl_ai.md @@ -0,0 +1,21 @@ +## osdctl ai + +AI-powered tools for SRE automation + +### Options + +``` + -h, --help help for ai +``` + +### Options inherited from parent commands + +``` + -S, --skip-version-check skip checking to see if this is the most recent release +``` + +### SEE ALSO + +* [osdctl](osdctl.md) - OSD CLI +* [osdctl ai sre-agent](osdctl_ai_sre-agent.md) - Run SRE Agent for automated incident investigation + diff --git a/docs/osdctl_ai_sre-agent.md b/docs/osdctl_ai_sre-agent.md new file mode 100644 index 000000000..ee0cd9986 --- /dev/null +++ b/docs/osdctl_ai_sre-agent.md @@ -0,0 +1,50 @@ +## osdctl ai sre-agent + +Run SRE Agent for automated incident investigation + +### Synopsis + + + SRE Agent is an AI-powered tool that helps SREs triage alerts and diagnose issues. + It automatically fetches incident details from PagerDuty, finds relevant SOPs, + and executes diagnostic commands on clusters. + + +``` +osdctl ai sre-agent [flags] +``` + +### Examples + +``` + + # Interactive mode (asks for confirmation at each step) + osdctl ai sre-agent --pd-url "${PD_URL}" + + # Fully automated mode (no confirmations) + osdctl ai sre-agent --pd-url "${PD_URL}" --auto-execute + + # Specify output directory for sre-agent files + osdctl ai sre-agent --pd-url "${PD_URL}" --output /tmp/sre-agent-output + +``` + +### Options + +``` + --auto-execute Fully automated mode without confirmations + -h, --help help for sre-agent + --output string Output directory for sre-agent files (default: current directory) + --pd-url string PagerDuty incident URL (required) +``` + +### Options inherited from parent commands + +``` + -S, --skip-version-check skip checking to see if this is the most recent release +``` + +### SEE ALSO + +* [osdctl ai](osdctl_ai.md) - AI-powered tools for SRE automation +