diff --git a/go.mod b/go.mod index c0167e95b..8c1dddc36 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,7 @@ require ( go.opentelemetry.io/otel/trace v1.40.0 golang.org/x/oauth2 v0.35.0 golang.org/x/sync v0.19.0 + gopkg.in/yaml.v3 v3.0.1 helm.sh/helm/v3 v3.20.0 k8s.io/api v0.35.1 k8s.io/apiextensions-apiserver v0.35.1 @@ -148,7 +149,6 @@ require ( google.golang.org/protobuf v1.36.11 // indirect gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiserver v0.35.1 // indirect k8s.io/component-base v0.35.1 // indirect k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect diff --git a/internal/tools/update-readme/main.go b/internal/tools/update-readme/main.go index 4163318a3..f5aff3fc2 100644 --- a/internal/tools/update-readme/main.go +++ b/internal/tools/update-readme/main.go @@ -19,6 +19,7 @@ import ( _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kcp" _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kiali" _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt" + _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/mustgather" _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/observability" ) diff --git a/pkg/api/toolsets.go b/pkg/api/toolsets.go index 3fdc81e28..7b394dbbb 100644 --- a/pkg/api/toolsets.go +++ b/pkg/api/toolsets.go @@ -97,12 +97,21 @@ func NewToolCallResultStructured(structured any, err error) *ToolCallResult { } } +// ResourceRegistrar allows tools to register MCP resources at runtime. +// For example, the mustgather_use tool registers a must-gather:// resource +// after loading an archive. +type ResourceRegistrar interface { + AddResource(uri, name, description, mimeType, content string) + RemoveResources(uris ...string) +} + type ToolHandlerParams struct { context.Context ExtendedConfigProvider KubernetesClient ToolCallRequest - ListOutput output.Output + ListOutput output.Output + ResourceRegistrar ResourceRegistrar } type ToolHandlerFunc func(params ToolHandlerParams) (*ToolCallResult, error) diff --git a/pkg/mcp/gosdk.go b/pkg/mcp/gosdk.go index 02f94a284..c766a238a 100644 --- a/pkg/mcp/gosdk.go +++ b/pkg/mcp/gosdk.go @@ -13,6 +13,27 @@ import ( "k8s.io/utils/ptr" ) +// resourceRegistrar adapts the MCP go-sdk Server for the api.ResourceRegistrar interface +type resourceRegistrar struct { + server *mcp.Server +} + +func (r *resourceRegistrar) AddResource(uri, name, description, mimeType, content string) { + resourceContent := content // capture for closure + r.server.AddResource( + &mcp.Resource{URI: uri, Name: name, Description: description, MIMEType: mimeType}, + func(_ context.Context, _ *mcp.ReadResourceRequest) (*mcp.ReadResourceResult, error) { + return &mcp.ReadResourceResult{ + Contents: []*mcp.ResourceContents{{URI: uri, Text: resourceContent}}, + }, nil + }, + ) +} + +func (r *resourceRegistrar) RemoveResources(uris ...string) { + r.server.RemoveResources(uris...) +} + func ServerToolToGoSdkTool(s *Server, tool api.ServerTool) (*mcp.Tool, mcp.ToolHandler, error) { // Ensure InputSchema.Properties is initialized for OpenAI API compatibility // https://github.com/containers/kubernetes-mcp-server/issues/717 @@ -52,6 +73,7 @@ func ServerToolToGoSdkTool(s *Server, tool api.ServerTool) (*mcp.Tool, mcp.ToolH KubernetesClient: k, ToolCallRequest: toolCallRequest, ListOutput: s.configuration.ListOutput(), + ResourceRegistrar: &resourceRegistrar{server: s.server}, }) if err != nil { return nil, err diff --git a/pkg/mcp/mcp.go b/pkg/mcp/mcp.go index 9cdd190aa..f053b7412 100644 --- a/pkg/mcp/mcp.go +++ b/pkg/mcp/mcp.go @@ -83,10 +83,9 @@ func NewServer(configuration Configuration, targetProvider internalk8s.Provider) }, &mcp.ServerOptions{ Capabilities: &mcp.ServerCapabilities{ - Resources: nil, - Prompts: &mcp.PromptCapabilities{ListChanged: !configuration.Stateless}, - Tools: &mcp.ToolCapabilities{ListChanged: !configuration.Stateless}, - Logging: &mcp.LoggingCapabilities{}, + Prompts: &mcp.PromptCapabilities{ListChanged: !configuration.Stateless}, + Tools: &mcp.ToolCapabilities{ListChanged: !configuration.Stateless}, + Logging: &mcp.LoggingCapabilities{}, }, Instructions: configuration.ServerInstructions, }), diff --git a/pkg/mcp/modules.go b/pkg/mcp/modules.go index b0204a6d5..a712547f2 100644 --- a/pkg/mcp/modules.go +++ b/pkg/mcp/modules.go @@ -7,6 +7,7 @@ import ( _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kcp" _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kiali" _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt" + _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/mustgather" _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/netedge" _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/observability" _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/openshift" diff --git a/pkg/mcp/testdata/toolsets-openshift-mustgather-tools.json b/pkg/mcp/testdata/toolsets-openshift-mustgather-tools.json new file mode 100644 index 000000000..540234fad --- /dev/null +++ b/pkg/mcp/testdata/toolsets-openshift-mustgather-tools.json @@ -0,0 +1,486 @@ +[ + { + "annotations": { + "readOnlyHint": true, + "title": "ETCD Endpoint Status" + }, + "description": "Get ETCD endpoint status including database size, raft index, and quota from the must-gather archive", + "inputSchema": { + "properties": {}, + "type": "object" + }, + "name": "mustgather_etcd_endpoint_status", + "title": "ETCD Endpoint Status" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "ETCD Health" + }, + "description": "Get ETCD cluster health status including endpoint health and active alarms from the must-gather archive", + "inputSchema": { + "properties": {}, + "type": "object" + }, + "name": "mustgather_etcd_health", + "title": "ETCD Health" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "ETCD Members" + }, + "description": "List ETCD cluster members from the must-gather archive", + "inputSchema": { + "properties": {}, + "type": "object" + }, + "name": "mustgather_etcd_members_list", + "title": "ETCD Members" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "ETCD Object Count" + }, + "description": "Get ETCD object counts by resource type from the must-gather archive", + "inputSchema": { + "properties": { + "limit": { + "description": "Maximum number of resource types to show (default: 50, sorted by count descending)", + "type": "integer" + } + }, + "type": "object" + }, + "name": "mustgather_etcd_object_count", + "title": "ETCD Object Count" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "Events by Resource" + }, + "description": "Get all events related to a specific Kubernetes resource from the must-gather archive", + "inputSchema": { + "properties": { + "kind": { + "description": "Resource kind (optional, narrows search)", + "type": "string" + }, + "name": { + "description": "Resource name", + "type": "string" + }, + "namespace": { + "description": "Resource namespace", + "type": "string" + } + }, + "required": [ + "name" + ], + "type": "object" + }, + "name": "mustgather_events_by_resource", + "title": "Events by Resource" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "List Events" + }, + "description": "List Kubernetes events from the must-gather archive with optional filtering by type, namespace, resource, and reason", + "inputSchema": { + "properties": { + "limit": { + "description": "Maximum number of events to return (default: 100)", + "type": "integer" + }, + "namespace": { + "description": "Filter by namespace", + "type": "string" + }, + "reason": { + "description": "Filter by event reason (partial match)", + "type": "string" + }, + "resource": { + "description": "Filter by involved resource name (partial match)", + "type": "string" + }, + "type": { + "description": "Event type filter: all, Warning, Normal", + "enum": [ + "all", + "Warning", + "Normal" + ], + "type": "string" + } + }, + "type": "object" + }, + "name": "mustgather_events_list", + "title": "List Events" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "AlertManager Status" + }, + "description": "Get AlertManager cluster status from the must-gather archive", + "inputSchema": { + "properties": {}, + "type": "object" + }, + "name": "mustgather_monitoring_alertmanager_status", + "title": "AlertManager Status" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "Prometheus Alerts" + }, + "description": "Get active Prometheus alerts from the must-gather archive", + "inputSchema": { + "properties": { + "state": { + "description": "Filter by alert state: firing, pending (default: all)", + "type": "string" + } + }, + "type": "object" + }, + "name": "mustgather_monitoring_prometheus_alerts", + "title": "Prometheus Alerts" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "Prometheus Config" + }, + "description": "Get a summary of Prometheus configuration from the must-gather archive", + "inputSchema": { + "properties": {}, + "type": "object" + }, + "name": "mustgather_monitoring_prometheus_config_summary", + "title": "Prometheus Config" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "Prometheus Rules" + }, + "description": "Get Prometheus alerting and recording rules from the must-gather archive", + "inputSchema": { + "properties": { + "type": { + "description": "Filter by rule type: alerting, recording (default: all)", + "type": "string" + } + }, + "type": "object" + }, + "name": "mustgather_monitoring_prometheus_rules", + "title": "Prometheus Rules" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "Prometheus Status" + }, + "description": "Get Prometheus TSDB and runtime status from the must-gather archive", + "inputSchema": { + "properties": { + "replica": { + "description": "Prometheus replica (0, 1, or all). Default: all", + "type": "string" + } + }, + "type": "object" + }, + "name": "mustgather_monitoring_prometheus_status", + "title": "Prometheus Status" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "Prometheus Targets" + }, + "description": "Get Prometheus scrape targets and their health status from the must-gather archive", + "inputSchema": { + "properties": { + "health": { + "description": "Filter by health status: up, down, unknown (default: all)", + "type": "string" + }, + "replica": { + "description": "Prometheus replica (0, 1, or all). Default: 0", + "type": "string" + } + }, + "type": "object" + }, + "name": "mustgather_monitoring_prometheus_targets", + "title": "Prometheus Targets" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "Prometheus TSDB" + }, + "description": "Get detailed Prometheus TSDB statistics including top metrics by series count and label cardinality", + "inputSchema": { + "properties": { + "limit": { + "description": "Number of top entries to show per category (default: 10)", + "type": "integer" + }, + "replica": { + "description": "Prometheus replica (0, 1, or all). Default: 0", + "type": "string" + } + }, + "type": "object" + }, + "name": "mustgather_monitoring_prometheus_tsdb", + "title": "Prometheus TSDB" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "List Namespaces" + }, + "description": "List all namespaces found in the must-gather archive", + "inputSchema": { + "properties": {}, + "type": "object" + }, + "name": "mustgather_namespaces_list", + "title": "List Namespaces" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "Node Diagnostics" + }, + "description": "Get comprehensive diagnostic information for a specific node including kubelet logs, system info, CPU/IRQ affinities, and hardware details", + "inputSchema": { + "properties": { + "include": { + "description": "Comma-separated diagnostics to include: kubelet,sysinfo,cpu,irq,pods,lscpu,lspci,dmesg,cmdline (default: all)", + "type": "string" + }, + "kubeletTail": { + "description": "Number of lines from end of kubelet log (0 for all, default: 100)", + "type": "integer" + }, + "node": { + "description": "Node name", + "type": "string" + } + }, + "required": [ + "node" + ], + "type": "object" + }, + "name": "mustgather_node_diagnostics_get", + "title": "Node Diagnostics" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "Kubelet Logs" + }, + "description": "Get kubelet logs for a specific node (decompressed from .gz file)", + "inputSchema": { + "properties": { + "node": { + "description": "Node name", + "type": "string" + }, + "tail": { + "description": "Number of lines from end (0 for all)", + "type": "integer" + } + }, + "required": [ + "node" + ], + "type": "object" + }, + "name": "mustgather_node_kubelet_logs", + "title": "Kubelet Logs" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "Kubelet Logs Grep" + }, + "description": "Filter kubelet logs for a specific node by a search string. Returns only matching lines.", + "inputSchema": { + "properties": { + "caseInsensitive": { + "description": "Perform case-insensitive search (default: false)", + "type": "boolean" + }, + "filter": { + "description": "String to search for in log lines", + "type": "string" + }, + "node": { + "description": "Node name", + "type": "string" + }, + "tail": { + "description": "Maximum number of matching lines to return (0 for all)", + "type": "integer" + } + }, + "required": [ + "node", + "filter" + ], + "type": "object" + }, + "name": "mustgather_node_kubelet_logs_grep", + "title": "Kubelet Logs Grep" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "Get Pod Logs" + }, + "description": "Get container logs for a specific pod from the must-gather archive. Returns current or previous logs.", + "inputSchema": { + "properties": { + "container": { + "description": "Container name (uses first container if not specified)", + "type": "string" + }, + "namespace": { + "description": "Pod namespace", + "type": "string" + }, + "pod": { + "description": "Pod name", + "type": "string" + }, + "previous": { + "description": "Get previous container logs (from crash/restart)", + "type": "boolean" + }, + "tail": { + "description": "Number of lines from end of logs (0 for all)", + "type": "integer" + } + }, + "required": [ + "namespace", + "pod" + ], + "type": "object" + }, + "name": "mustgather_pod_logs_get", + "title": "Get Pod Logs" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "Get Resource" + }, + "description": "Get a specific Kubernetes resource from the must-gather archive by kind, name, and optional namespace", + "inputSchema": { + "properties": { + "apiVersion": { + "description": "API version (default: v1)", + "type": "string" + }, + "kind": { + "description": "Resource kind (e.g., Pod, Deployment, Service)", + "type": "string" + }, + "name": { + "description": "Resource name", + "type": "string" + }, + "namespace": { + "description": "Resource namespace (omit for cluster-scoped resources)", + "type": "string" + } + }, + "required": [ + "kind", + "name" + ], + "type": "object" + }, + "name": "mustgather_resources_get", + "title": "Get Resource" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "List Resources" + }, + "description": "List Kubernetes resources from the must-gather archive with optional filtering by namespace, labels, and fields", + "inputSchema": { + "properties": { + "apiVersion": { + "description": "API version (default: v1)", + "type": "string" + }, + "fieldSelector": { + "description": "Field selector (e.g., metadata.name=foo)", + "type": "string" + }, + "kind": { + "description": "Resource kind (e.g., Pod, Deployment, Service)", + "type": "string" + }, + "labelSelector": { + "description": "Label selector (e.g., app=nginx,tier=frontend)", + "type": "string" + }, + "limit": { + "description": "Maximum number of resources to return (0 for all)", + "type": "integer" + }, + "namespace": { + "description": "Filter by namespace", + "type": "string" + } + }, + "required": [ + "kind" + ], + "type": "object" + }, + "name": "mustgather_resources_list", + "title": "List Resources" + }, + { + "annotations": { + "readOnlyHint": true, + "title": "Load Must-Gather Archive" + }, + "description": "Load a must-gather archive from a given filesystem path for analysis. Must be called before any other mustgather_* tools.", + "inputSchema": { + "properties": { + "path": { + "description": "Absolute path to the must-gather archive directory", + "type": "string" + } + }, + "required": [ + "path" + ], + "type": "object" + }, + "name": "mustgather_use", + "title": "Load Must-Gather Archive" + } +] diff --git a/pkg/mcp/testdata/toolsets-openshift-prompts.json b/pkg/mcp/testdata/toolsets-openshift-prompts.json index 0d7162dcc..2de56afac 100644 --- a/pkg/mcp/testdata/toolsets-openshift-prompts.json +++ b/pkg/mcp/testdata/toolsets-openshift-prompts.json @@ -1,7 +1,5 @@ [ { - "name": "plan_mustgather", - "description": "Plan for collecting a must-gather archive from an OpenShift cluster. Must-gather is a tool for collecting cluster data related to debugging and troubleshooting like logs, kubernetes resources, etc.", "arguments": [ { "name": "node_name", @@ -47,6 +45,8 @@ "name": "images", "description": "Comma-separated list of custom must-gather container images" } - ] + ], + "description": "Plan for collecting a must-gather archive from an OpenShift cluster. Must-gather is a tool for collecting cluster data related to debugging and troubleshooting like logs, kubernetes resources, etc.", + "name": "plan_mustgather" } ] diff --git a/pkg/mcp/toolsets_test.go b/pkg/mcp/toolsets_test.go index 125d48a72..a318ec717 100644 --- a/pkg/mcp/toolsets_test.go +++ b/pkg/mcp/toolsets_test.go @@ -19,6 +19,7 @@ import ( "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kcp" "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kiali" "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt" + mgToolset "github.com/containers/kubernetes-mcp-server/pkg/toolsets/mustgather" "github.com/containers/kubernetes-mcp-server/pkg/toolsets/openshift" "github.com/modelcontextprotocol/go-sdk/mcp" "github.com/stretchr/testify/suite" @@ -217,6 +218,23 @@ func (s *ToolsetsSuite) TestOpenShiftToolsetPrompts() { }) } +func (s *ToolsetsSuite) TestOpenShiftMustGatherToolset() { + s.Run("OpenShift must-gather toolset", func() { + toolsets.Clear() + toolsets.Register(&mgToolset.Toolset{}) + s.Cfg.Toolsets = []string{"openshift/mustgather"} + s.InitMcpClient() + tools, err := s.ListTools() + s.Run("ListTools returns tools", func() { + s.NotNil(tools, "Expected tools from ListTools") + s.NoError(err, "Expected no error from ListTools") + }) + s.Run("ListTools returns correct Tool metadata", func() { + s.assertJsonSnapshot("toolsets-openshift-mustgather-tools.json", tools.Tools) + }) + }) +} + func (s *ToolsetsSuite) TestInputSchemaEdgeCases() { //https://github.com/containers/kubernetes-mcp-server/issues/340 s.Run("InputSchema for no-arg tool is object with empty properties", func() { diff --git a/pkg/ocp/mustgather/etcd.go b/pkg/ocp/mustgather/etcd.go new file mode 100644 index 000000000..c0eb93a59 --- /dev/null +++ b/pkg/ocp/mustgather/etcd.go @@ -0,0 +1,85 @@ +package mustgather + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" +) + +// GetETCDHealth reads ETCD health from the archive +func (p *Provider) GetETCDHealth() (*ETCDHealth, error) { + etcdDir := filepath.Join(p.metadata.ContainerDir, "etcd_info") + + health := &ETCDHealth{Healthy: true} + + // Read endpoint health + healthFile := filepath.Join(etcdDir, "endpoint_health.json") + healthData, err := os.ReadFile(healthFile) + if err != nil { + return nil, fmt.Errorf("ETCD health data not found: %w", err) + } + + var endpoints []struct { + Endpoint string `json:"endpoint"` + Health string `json:"health"` + } + if err := json.Unmarshal(healthData, &endpoints); err != nil { + return nil, fmt.Errorf("failed to parse ETCD health: %w", err) + } + + for _, ep := range endpoints { + health.Endpoints = append(health.Endpoints, ETCDEndpoint{ + Address: ep.Endpoint, + Health: ep.Health, + }) + if ep.Health != "true" && ep.Health != "healthy" { + health.Healthy = false + } + } + + // Read alarms + alarmFile := filepath.Join(etcdDir, "alarm_list.json") + if alarmData, err := os.ReadFile(alarmFile); err == nil { + var alarmResponse struct { + Alarms []struct { + MemberID uint64 `json:"memberID"` + Alarm string `json:"alarm"` + } `json:"alarms"` + } + if err := json.Unmarshal(alarmData, &alarmResponse); err == nil { + for _, alarm := range alarmResponse.Alarms { + health.Alarms = append(health.Alarms, fmt.Sprintf("Member %d: %s", alarm.MemberID, alarm.Alarm)) + health.Healthy = false + } + } + } + + return health, nil +} + +// GetETCDObjectCount reads ETCD object counts from the archive +func (p *Provider) GetETCDObjectCount() (map[string]int64, error) { + countFile := filepath.Join(p.metadata.ContainerDir, "etcd_info", "object_count.json") + data, err := os.ReadFile(countFile) + if err != nil { + return nil, fmt.Errorf("ETCD object count data not found: %w", err) + } + + var counts map[string]int64 + if err := json.Unmarshal(data, &counts); err != nil { + return nil, fmt.Errorf("failed to parse ETCD object count: %w", err) + } + + return counts, nil +} + +// ReadETCDFile reads a raw ETCD info JSON file and returns its content +func (p *Provider) ReadETCDFile(filename string) ([]byte, error) { + filePath := filepath.Join(p.metadata.ContainerDir, "etcd_info", filename) + data, err := os.ReadFile(filePath) + if err != nil { + return nil, fmt.Errorf("ETCD file %s not found: %w", filename, err) + } + return data, nil +} diff --git a/pkg/ocp/mustgather/index.go b/pkg/ocp/mustgather/index.go new file mode 100644 index 000000000..f9494b021 --- /dev/null +++ b/pkg/ocp/mustgather/index.go @@ -0,0 +1,217 @@ +package mustgather + +import ( + "context" + "sort" + "strings" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +// ResourceIndex provides fast O(1) lookups for must-gather resources +type ResourceIndex struct { + // byGVK maps GVK -> key -> resource (key is "namespace/name" or "name") + byGVK map[schema.GroupVersionKind]map[string]*unstructured.Unstructured + // byNamespace maps namespace -> GVK -> name -> resource + byNamespace map[string]map[schema.GroupVersionKind]map[string]*unstructured.Unstructured + // namespaces is a sorted list of all namespaces + namespaces []string + // count is the total number of resources + count int +} + +// NewResourceIndex creates an empty resource index +func NewResourceIndex() *ResourceIndex { + return &ResourceIndex{ + byGVK: make(map[schema.GroupVersionKind]map[string]*unstructured.Unstructured), + byNamespace: make(map[string]map[schema.GroupVersionKind]map[string]*unstructured.Unstructured), + } +} + +// BuildIndex creates a resource index from a list of unstructured resources +func BuildIndex(resources []*unstructured.Unstructured) *ResourceIndex { + idx := NewResourceIndex() + for _, r := range resources { + idx.Add(r) + } + idx.buildNamespaceList() + return idx +} + +// Add adds a resource to the index +func (idx *ResourceIndex) Add(obj *unstructured.Unstructured) { + gvk := obj.GetObjectKind().GroupVersionKind() + name := obj.GetName() + namespace := obj.GetNamespace() + + key := name + if namespace != "" { + key = namespace + "/" + name + } + + // Index by GVK + if _, ok := idx.byGVK[gvk]; !ok { + idx.byGVK[gvk] = make(map[string]*unstructured.Unstructured) + } + idx.byGVK[gvk][key] = obj + + // Index by namespace + if namespace != "" { + if _, ok := idx.byNamespace[namespace]; !ok { + idx.byNamespace[namespace] = make(map[schema.GroupVersionKind]map[string]*unstructured.Unstructured) + } + if _, ok := idx.byNamespace[namespace][gvk]; !ok { + idx.byNamespace[namespace][gvk] = make(map[string]*unstructured.Unstructured) + } + idx.byNamespace[namespace][gvk][name] = obj + } + + idx.count++ +} + +func (idx *ResourceIndex) buildNamespaceList() { + nsSet := make(map[string]struct{}) + for ns := range idx.byNamespace { + nsSet[ns] = struct{}{} + } + idx.namespaces = make([]string, 0, len(nsSet)) + for ns := range nsSet { + idx.namespaces = append(idx.namespaces, ns) + } + sort.Strings(idx.namespaces) +} + +// Get retrieves a specific resource by GVK, name, and namespace +func (idx *ResourceIndex) Get(gvk schema.GroupVersionKind, name, namespace string) *unstructured.Unstructured { + gvkMap, ok := idx.byGVK[gvk] + if !ok { + return nil + } + + key := name + if namespace != "" { + key = namespace + "/" + name + } + + obj, ok := gvkMap[key] + if !ok { + return nil + } + return obj.DeepCopy() +} + +// List returns all resources matching the given GVK and optional namespace +func (idx *ResourceIndex) List(_ context.Context, gvk schema.GroupVersionKind, namespace string, opts ListOptions) *unstructured.UnstructuredList { + result := &unstructured.UnstructuredList{} + + if namespace != "" { + // Namespace-scoped query + nsMap, ok := idx.byNamespace[namespace] + if !ok { + return result + } + gvkMap, ok := nsMap[gvk] + if !ok { + return result + } + for _, obj := range gvkMap { + if matchesListOptions(obj, opts) { + result.Items = append(result.Items, *obj.DeepCopy()) + } + } + } else { + // Cluster-wide query + gvkMap, ok := idx.byGVK[gvk] + if !ok { + return result + } + for _, obj := range gvkMap { + if matchesListOptions(obj, opts) { + result.Items = append(result.Items, *obj.DeepCopy()) + } + } + } + + // Apply limit + if opts.Limit > 0 && len(result.Items) > opts.Limit { + result.Items = result.Items[:opts.Limit] + } + + return result +} + +// ListNamespaces returns all namespaces in the index +func (idx *ResourceIndex) ListNamespaces() []string { + ns := make([]string, len(idx.namespaces)) + copy(ns, idx.namespaces) + return ns +} + +// Count returns the total number of indexed resources +func (idx *ResourceIndex) Count() int { + return idx.count +} + +func matchesListOptions(obj *unstructured.Unstructured, opts ListOptions) bool { + if opts.LabelSelector != "" { + labels := obj.GetLabels() + if !matchesLabelSelector(labels, opts.LabelSelector) { + return false + } + } + if opts.FieldSelector != "" { + if !matchesFieldSelector(obj, opts.FieldSelector) { + return false + } + } + return true +} + +func matchesLabelSelector(labels map[string]string, selector string) bool { + parts := strings.Split(selector, ",") + for _, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + kv := strings.SplitN(part, "=", 2) + if len(kv) != 2 { + continue + } + key := strings.TrimSpace(kv[0]) + value := strings.TrimSpace(kv[1]) + if labels[key] != value { + return false + } + } + return true +} + +func matchesFieldSelector(obj *unstructured.Unstructured, selector string) bool { + parts := strings.Split(selector, ",") + for _, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + kv := strings.SplitN(part, "=", 2) + if len(kv) != 2 { + continue + } + key := strings.TrimSpace(kv[0]) + value := strings.TrimSpace(kv[1]) + + switch key { + case "metadata.name": + if obj.GetName() != value { + return false + } + case "metadata.namespace": + if obj.GetNamespace() != value { + return false + } + } + } + return true +} diff --git a/pkg/ocp/mustgather/loader.go b/pkg/ocp/mustgather/loader.go new file mode 100644 index 000000000..d150a7089 --- /dev/null +++ b/pkg/ocp/mustgather/loader.go @@ -0,0 +1,264 @@ +package mustgather + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "gopkg.in/yaml.v3" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +// LoadResult contains the results of loading a must-gather archive +type LoadResult struct { + Metadata MustGatherMetadata + Resources []*unstructured.Unstructured +} + +// Load reads and parses a must-gather archive from the given path +func Load(path string) (*LoadResult, error) { + containerDir, err := FindContainerDir(path) + if err != nil { + // If no container dir found, use the path directly + containerDir = path + } + + metadata := MustGatherMetadata{ + Path: path, + ContainerDir: containerDir, + } + + // Load metadata files + loadMetadata(containerDir, &metadata) + + // Load all resources + var resources []*unstructured.Unstructured + + clusterResources, err := loadClusterScopedResources(containerDir) + if err == nil { + resources = append(resources, clusterResources...) + } + + namespacedResources, err := loadNamespacedResources(containerDir) + if err == nil { + resources = append(resources, namespacedResources...) + } + + metadata.ResourceCount = len(resources) + + return &LoadResult{ + Metadata: metadata, + Resources: resources, + }, nil +} + +// FindContainerDir locates the must-gather container directory +// (e.g., quay-io-okd-scos-content-sha256-...) +func FindContainerDir(basePath string) (string, error) { + entries, err := os.ReadDir(basePath) + if err != nil { + return "", err + } + + for _, entry := range entries { + if entry.IsDir() { + name := entry.Name() + if strings.HasPrefix(name, "quay") || strings.Contains(name, "sha256") { + return filepath.Join(basePath, name), nil + } + } + } + + return "", fmt.Errorf("container directory not found in %s", basePath) +} + +func loadMetadata(containerDir string, metadata *MustGatherMetadata) { + if data, err := os.ReadFile(filepath.Join(containerDir, "version")); err == nil { + metadata.Version = strings.TrimSpace(string(data)) + } + if data, err := os.ReadFile(filepath.Join(containerDir, "timestamp")); err == nil { + metadata.Timestamp = strings.TrimSpace(string(data)) + } +} + +func loadClusterScopedResources(containerDir string) ([]*unstructured.Unstructured, error) { + clusterDir := filepath.Join(containerDir, "cluster-scoped-resources") + if _, err := os.Stat(clusterDir); os.IsNotExist(err) { + return nil, nil + } + + var resources []*unstructured.Unstructured + err := filepath.Walk(clusterDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return nil // skip errors + } + if info.IsDir() || (!strings.HasSuffix(path, ".yaml") && !strings.HasSuffix(path, ".yml")) { + return nil + } + + loaded, loadErr := loadYAMLFile(path, "") + if loadErr != nil { + return nil // skip files that can't be parsed + } + resources = append(resources, loaded...) + return nil + }) + + return resources, err +} + +func loadNamespacedResources(containerDir string) ([]*unstructured.Unstructured, error) { + namespacesDir := filepath.Join(containerDir, "namespaces") + if _, err := os.Stat(namespacesDir); os.IsNotExist(err) { + return nil, nil + } + + var resources []*unstructured.Unstructured + + nsEntries, err := os.ReadDir(namespacesDir) + if err != nil { + return nil, err + } + + for _, nsEntry := range nsEntries { + if !nsEntry.IsDir() { + continue + } + namespace := nsEntry.Name() + nsDir := filepath.Join(namespacesDir, namespace) + + err := filepath.Walk(nsDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return nil + } + if info.IsDir() || (!strings.HasSuffix(path, ".yaml") && !strings.HasSuffix(path, ".yml")) { + return nil + } + + // Skip pod log directories + rel, _ := filepath.Rel(nsDir, path) + if strings.HasPrefix(rel, "pods"+string(filepath.Separator)) { + return nil + } + + loaded, loadErr := loadYAMLFile(path, namespace) + if loadErr != nil { + return nil + } + resources = append(resources, loaded...) + return nil + }) + if err != nil { + continue + } + } + + return resources, nil +} + +func loadYAMLFile(path, namespace string) ([]*unstructured.Unstructured, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + + var raw map[string]interface{} + if err := yaml.Unmarshal(data, &raw); err != nil { + return nil, err + } + if raw == nil { + return nil, nil + } + + normalizeYAMLTypes(raw) + + // Check if it's a List + kind, _ := raw["kind"].(string) + if strings.HasSuffix(kind, "List") { + return loadList(raw, namespace) + } + + obj := &unstructured.Unstructured{Object: raw} + if namespace != "" && obj.GetNamespace() == "" { + obj.SetNamespace(namespace) + } + + // Set GVK from apiVersion + kind if not already set + if obj.GetObjectKind().GroupVersionKind().Kind == "" { + apiVersion, _ := raw["apiVersion"].(string) + kind, _ := raw["kind"].(string) + if apiVersion != "" && kind != "" { + gv, _ := schema.ParseGroupVersion(apiVersion) + obj.SetGroupVersionKind(schema.GroupVersionKind{ + Group: gv.Group, + Version: gv.Version, + Kind: kind, + }) + } + } + + return []*unstructured.Unstructured{obj}, nil +} + +func loadList(raw map[string]interface{}, namespace string) ([]*unstructured.Unstructured, error) { + items, ok := raw["items"].([]interface{}) + if !ok { + return nil, nil + } + + var resources []*unstructured.Unstructured + for _, item := range items { + itemMap, ok := item.(map[string]interface{}) + if !ok { + continue + } + normalizeYAMLTypes(itemMap) + obj := &unstructured.Unstructured{Object: itemMap} + if namespace != "" && obj.GetNamespace() == "" { + obj.SetNamespace(namespace) + } + // Set GVK + apiVersion, _ := itemMap["apiVersion"].(string) + kind, _ := itemMap["kind"].(string) + if apiVersion != "" && kind != "" { + gv, _ := schema.ParseGroupVersion(apiVersion) + obj.SetGroupVersionKind(schema.GroupVersionKind{ + Group: gv.Group, + Version: gv.Version, + Kind: kind, + }) + } + resources = append(resources, obj) + } + return resources, nil +} + +// normalizeYAMLTypes converts YAML int types to int64 for JSON compatibility. +// YAML v3 unmarshals integers as int, but JSON expects int64/float64. +func normalizeYAMLTypes(m map[string]interface{}) { + for k, v := range m { + switch val := v.(type) { + case int: + m[k] = int64(val) + case map[string]interface{}: + normalizeYAMLTypes(val) + case []interface{}: + normalizeSlice(val) + } + } +} + +func normalizeSlice(s []interface{}) { + for i, v := range s { + switch val := v.(type) { + case int: + s[i] = int64(val) + case map[string]interface{}: + normalizeYAMLTypes(val) + case []interface{}: + normalizeSlice(val) + } + } +} diff --git a/pkg/ocp/mustgather/monitoring.go b/pkg/ocp/mustgather/monitoring.go new file mode 100644 index 000000000..9868d5700 --- /dev/null +++ b/pkg/ocp/mustgather/monitoring.go @@ -0,0 +1,128 @@ +package mustgather + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" +) + +// GetPrometheusReplicaPath builds path to Prometheus replica data +func (p *Provider) GetPrometheusReplicaPath(replicaNum int) string { + return filepath.Join(p.metadata.ContainerDir, "monitoring", "prometheus", + fmt.Sprintf("prometheus-k8s-%d", replicaNum)) +} + +// GetPrometheusCommonPath builds path to common Prometheus data +func (p *Provider) GetPrometheusCommonPath() string { + return filepath.Join(p.metadata.ContainerDir, "monitoring", "prometheus") +} + +// GetAlertManagerPath builds path to AlertManager data +func (p *Provider) GetAlertManagerPath() string { + return filepath.Join(p.metadata.ContainerDir, "monitoring", "alertmanager") +} + +// ReadPrometheusJSON reads and parses a JSON file from a Prometheus replica directory +func (p *Provider) ReadPrometheusJSON(replicaPath, filename string, v any) error { + dataFile := filepath.Join(replicaPath, filename) + return ReadJSON(dataFile, v) +} + +// GetPrometheusTSDB reads TSDB status for a replica +func (p *Provider) GetPrometheusTSDB(replicaNum int) (*TSDBStatus, error) { + replicaPath := p.GetPrometheusReplicaPath(replicaNum) + var resp TSDBStatusResponse + if err := p.ReadPrometheusJSON(replicaPath, "status/tsdb.json", &resp); err != nil { + return nil, err + } + return &resp.Data, nil +} + +// GetPrometheusRuntimeInfo reads runtime info for a replica +func (p *Provider) GetPrometheusRuntimeInfo(replicaNum int) (*RuntimeInfo, error) { + replicaPath := p.GetPrometheusReplicaPath(replicaNum) + var resp RuntimeInfoResponse + if err := p.ReadPrometheusJSON(replicaPath, "status/runtimeinfo.json", &resp); err != nil { + return nil, err + } + return &resp.Data, nil +} + +// GetPrometheusActiveTargets reads active targets for a replica +func (p *Provider) GetPrometheusActiveTargets(replicaNum int) ([]ActiveTarget, error) { + replicaPath := p.GetPrometheusReplicaPath(replicaNum) + var resp ActiveTargetsAPIResponse + if err := p.ReadPrometheusJSON(replicaPath, "active-targets.json", &resp); err != nil { + return nil, err + } + return resp.Data.ActiveTargets, nil +} + +// GetPrometheusRules reads Prometheus rules from the common directory +func (p *Provider) GetPrometheusRules() (*RuleGroupsResponse, error) { + promPath := p.GetPrometheusCommonPath() + rulesFile := filepath.Join(promPath, "rules.json") + var resp RuleGroupsAPIResponse + if err := ReadJSON(rulesFile, &resp); err != nil { + return nil, err + } + return &resp.Data, nil +} + +// GetAlertManagerStatus reads AlertManager status +func (p *Provider) GetAlertManagerStatus() (*AlertManagerStatus, error) { + amPath := p.GetAlertManagerPath() + statusFile := filepath.Join(amPath, "status.json") + var status AlertManagerStatus + if err := ReadJSON(statusFile, &status); err != nil { + return nil, err + } + return &status, nil +} + +// GetPrometheusConfig reads Prometheus config from the common directory +func (p *Provider) GetPrometheusConfig() (*ConfigResponse, error) { + promPath := p.GetPrometheusCommonPath() + configFile := filepath.Join(promPath, "status", "config.json") + var config ConfigResponse + if err := ReadJSON(configFile, &config); err != nil { + return nil, err + } + return &config, nil +} + +// GetPrometheusFlags reads Prometheus flags +func (p *Provider) GetPrometheusFlags() (FlagsResponse, error) { + promPath := p.GetPrometheusCommonPath() + flagsFile := filepath.Join(promPath, "status", "flags.json") + var flags FlagsResponse + if err := ReadJSON(flagsFile, &flags); err != nil { + return nil, err + } + return flags, nil +} + +// ReadJSON reads and unmarshals a JSON file +func ReadJSON(filePath string, v any) error { + data, err := os.ReadFile(filePath) + if err != nil { + return fmt.Errorf("failed to read file %s: %w", filepath.Base(filePath), err) + } + if err := json.Unmarshal(data, v); err != nil { + return fmt.Errorf("failed to parse JSON from %s: %w", filepath.Base(filePath), err) + } + return nil +} + +// GetReplicaNumbers converts a replica parameter to replica numbers +func GetReplicaNumbers(replicaParam string) []int { + switch replicaParam { + case "prometheus-k8s-0", "0": + return []int{0} + case "prometheus-k8s-1", "1": + return []int{1} + default: + return []int{0, 1} + } +} diff --git a/pkg/ocp/mustgather/provider.go b/pkg/ocp/mustgather/provider.go new file mode 100644 index 000000000..f8c349e01 --- /dev/null +++ b/pkg/ocp/mustgather/provider.go @@ -0,0 +1,211 @@ +package mustgather + +import ( + "compress/gzip" + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +// Provider gives access to must-gather archive data +type Provider struct { + metadata MustGatherMetadata + index *ResourceIndex +} + +// NewProvider loads a must-gather archive and builds an in-memory index +func NewProvider(path string) (*Provider, error) { + result, err := Load(path) + if err != nil { + return nil, fmt.Errorf("failed to load must-gather: %w", err) + } + + index := BuildIndex(result.Resources) + result.Metadata.NamespaceCount = len(index.ListNamespaces()) + + return &Provider{ + metadata: result.Metadata, + index: index, + }, nil +} + +// GetMetadata returns the must-gather metadata +func (p *Provider) GetMetadata() MustGatherMetadata { + return p.metadata +} + +// GetResource retrieves a specific resource +func (p *Provider) GetResource(gvk schema.GroupVersionKind, name, namespace string) *unstructured.Unstructured { + return p.index.Get(gvk, name, namespace) +} + +// ListResources returns resources matching the given criteria +func (p *Provider) ListResources(ctx context.Context, gvk schema.GroupVersionKind, namespace string, opts ListOptions) *unstructured.UnstructuredList { + return p.index.List(ctx, gvk, namespace, opts) +} + +// ListNamespaces returns all namespaces found in the archive +func (p *Provider) ListNamespaces() []string { + return p.index.ListNamespaces() +} + +// GetPodLog reads pod container logs from the archive +func (p *Provider) GetPodLog(opts PodLogOptions) (string, error) { + container := opts.Container + + // If container not specified, try to find first available + if container == "" { + containers, err := p.ListPodContainers(opts.Namespace, opts.Pod) + if err != nil { + return "", err + } + if len(containers) == 0 { + return "", fmt.Errorf("no containers found for pod %s/%s", opts.Namespace, opts.Pod) + } + container = containers[0] + } + + logType := string(opts.LogType) + if logType == "" { + logType = string(LogTypeCurrent) + } + + logPath := filepath.Join( + p.metadata.ContainerDir, "namespaces", opts.Namespace, + "pods", opts.Pod, container, container, "logs", logType+".log", + ) + + content, err := readTextFile(logPath) + if err != nil { + return "", fmt.Errorf("failed to read log file: %w", err) + } + + if opts.TailLines > 0 { + content = TailLines(content, opts.TailLines) + } + + return content, nil +} + +// ListPodContainers returns container names that have logs available +func (p *Provider) ListPodContainers(namespace, pod string) ([]string, error) { + podDir := filepath.Join(p.metadata.ContainerDir, "namespaces", namespace, "pods", pod) + entries, err := os.ReadDir(podDir) + if err != nil { + return nil, fmt.Errorf("pod directory not found: %w", err) + } + + var containers []string + for _, entry := range entries { + if entry.IsDir() { + containers = append(containers, entry.Name()) + } + } + return containers, nil +} + +// GetNodeDiagnostics reads all diagnostic data for a node +func (p *Provider) GetNodeDiagnostics(nodeName string) (*NodeDiagnostics, error) { + nodeDir := filepath.Join(p.metadata.ContainerDir, "nodes", nodeName) + if _, err := os.Stat(nodeDir); os.IsNotExist(err) { + return nil, fmt.Errorf("node directory not found: %s", nodeName) + } + + diag := &NodeDiagnostics{NodeName: nodeName} + + // Read kubelet log (may be gzipped) + kubeletGz := filepath.Join(nodeDir, nodeName+"_logs_kubelet.gz") + if content, err := readGzipFile(kubeletGz); err == nil { + diag.KubeletLog = content + } + + // Read text diagnostic files + textFiles := map[string]*string{ + "sysinfo.log": &diag.SysInfo, + "cpu_affinities.json": &diag.CPUAffinities, + "irq_affinities.json": &diag.IRQAffinities, + "pods_info.json": &diag.PodsInfo, + "podresources.json": &diag.PodResources, + "lscpu": &diag.Lscpu, + "lspci": &diag.Lspci, + "dmesg": &diag.Dmesg, + "proc_cmdline": &diag.ProcCmdline, + } + + for filename, target := range textFiles { + if content, err := readTextFile(filepath.Join(nodeDir, filename)); err == nil { + *target = content + } + } + + return diag, nil +} + +// ListNodes returns all node names with diagnostic data +func (p *Provider) ListNodes() ([]string, error) { + nodesDir := filepath.Join(p.metadata.ContainerDir, "nodes") + entries, err := os.ReadDir(nodesDir) + if err != nil { + return nil, nil // no nodes directory is not an error + } + + var nodes []string + for _, entry := range entries { + if entry.IsDir() { + nodes = append(nodes, entry.Name()) + } + } + return nodes, nil +} + +// readTextFile reads a text file and returns its content +func readTextFile(path string) (string, error) { + data, err := os.ReadFile(path) + if err != nil { + return "", err + } + return string(data), nil +} + +// readGzipFile reads and decompresses a gzipped file +func readGzipFile(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer func() { _ = f.Close() }() + + reader, err := gzip.NewReader(f) + if err != nil { + return "", err + } + defer func() { _ = reader.Close() }() + + var sb strings.Builder + buf := make([]byte, 32*1024) + for { + n, readErr := reader.Read(buf) + if n > 0 { + sb.Write(buf[:n]) + } + if readErr != nil { + break + } + } + + return sb.String(), nil +} + +// TailLines returns the last n lines of a string +func TailLines(content string, n int) string { + lines := strings.Split(content, "\n") + if len(lines) <= n { + return content + } + return strings.Join(lines[len(lines)-n:], "\n") +} diff --git a/pkg/ocp/mustgather/types.go b/pkg/ocp/mustgather/types.go new file mode 100644 index 000000000..118366dc2 --- /dev/null +++ b/pkg/ocp/mustgather/types.go @@ -0,0 +1,225 @@ +package mustgather + +// LogType represents the type of log +type LogType string + +const ( + LogTypeCurrent LogType = "current" + LogTypePrevious LogType = "previous" +) + +// PodLogOptions contains options for retrieving pod logs +type PodLogOptions struct { + Namespace string + Pod string + Container string + LogType LogType + TailLines int // Number of lines from end (0 = all) +} + +// NodeDiagnostics contains node diagnostic information +type NodeDiagnostics struct { + NodeName string + KubeletLog string + SysInfo string + CPUAffinities string + IRQAffinities string + PodsInfo string + PodResources string + Lscpu string + Lspci string + Dmesg string + ProcCmdline string +} + +// MustGatherMetadata contains metadata about the must-gather archive +type MustGatherMetadata struct { + Path string + ContainerDir string + Version string + Timestamp string + ResourceCount int + NamespaceCount int +} + +// ListOptions contains options for listing resources +type ListOptions struct { + LabelSelector string + FieldSelector string + Limit int +} + +// ETCDHealth represents ETCD cluster health information +type ETCDHealth struct { + Healthy bool + Endpoints []ETCDEndpoint + Alarms []string +} + +// ETCDEndpoint represents a single ETCD endpoint health status +type ETCDEndpoint struct { + Address string + Health string +} + +// Monitoring types + +// TSDBStatusResponse wraps TSDB status +type TSDBStatusResponse struct { + Status string `json:"status"` + Data TSDBStatus `json:"data"` +} + +// RuntimeInfoResponse wraps runtime info +type RuntimeInfoResponse struct { + Status string `json:"status"` + Data RuntimeInfo `json:"data"` +} + +// ActiveTargetsAPIResponse wraps active targets response +type ActiveTargetsAPIResponse struct { + Status string `json:"status"` + Data ActiveTargetsResponse `json:"data"` +} + +// RuleGroupsAPIResponse wraps rules response +type RuleGroupsAPIResponse struct { + Status string `json:"status"` + Data RuleGroupsResponse `json:"data"` +} + +// TSDBStatus represents Prometheus TSDB status data +type TSDBStatus struct { + SeriesCountByMetricName []NameValue `json:"seriesCountByMetricName"` + LabelValueCountByLabelName []NameValue `json:"labelValueCountByLabelName"` + MemoryInBytesByLabelName []NameValue `json:"memoryInBytesByLabelName"` + HeadStats HeadStats `json:"headStats"` +} + +// NameValue represents a name-value pair used for metric/label counts +type NameValue struct { + Name string `json:"name"` + Value int64 `json:"value"` +} + +// HeadStats represents TSDB head statistics +type HeadStats struct { + NumSeries int64 `json:"numSeries"` + NumLabelPairs int64 `json:"numLabelPairs"` + ChunkCount int64 `json:"chunkCount"` + MinTime int64 `json:"minTime"` + MaxTime int64 `json:"maxTime"` +} + +// RuntimeInfo represents Prometheus runtime information +type RuntimeInfo struct { + StartTime string `json:"startTime"` + CWD string `json:"CWD"` + ReloadConfigSuccess bool `json:"reloadConfigSuccess"` + LastConfigTime string `json:"lastConfigTime"` + CorruptionCount int64 `json:"corruptionCount"` + GoroutineCount int64 `json:"goroutineCount"` + GOMAXPROCS int64 `json:"GOMAXPROCS"` + GOGC string `json:"GOGC"` + GOMEMLIMIT int64 `json:"GOMEMLIMIT"` + StorageRetention string `json:"storageRetention"` +} + +// ActiveTargetsResponse represents the active targets API response +type ActiveTargetsResponse struct { + ActiveTargets []ActiveTarget `json:"activeTargets"` +} + +// ActiveTarget represents a single scrape target +type ActiveTarget struct { + DiscoveredLabels map[string]string `json:"discoveredLabels"` + Labels map[string]string `json:"labels"` + ScrapePool string `json:"scrapePool"` + ScrapeURL string `json:"scrapeUrl"` + GlobalURL string `json:"globalUrl"` + LastError string `json:"lastError"` + LastScrape string `json:"lastScrape"` + LastScrapeDuration float64 `json:"lastScrapeDuration"` + Health string `json:"health"` + ScrapeInterval string `json:"scrapeInterval"` + ScrapeTimeout string `json:"scrapeTimeout"` +} + +// RuleGroupsResponse represents the rules API response +type RuleGroupsResponse struct { + Groups []RuleGroup `json:"groups"` +} + +// RuleGroup represents a group of rules +type RuleGroup struct { + Name string `json:"name"` + File string `json:"file"` + Rules []Rule `json:"rules"` + Interval float64 `json:"interval"` + Limit int64 `json:"limit"` + EvaluationTime float64 `json:"evaluationTime"` + LastEvaluation string `json:"lastEvaluation"` +} + +// Rule represents a single alerting or recording rule +type Rule struct { + State string `json:"state,omitempty"` + Name string `json:"name"` + Query string `json:"query"` + Duration float64 `json:"duration,omitempty"` + KeepFiringFor float64 `json:"keepFiringFor,omitempty"` + Labels map[string]string `json:"labels,omitempty"` + Annotations map[string]string `json:"annotations,omitempty"` + Alerts []Alert `json:"alerts,omitempty"` + Health string `json:"health"` + Type string `json:"type"` + LastError string `json:"lastError,omitempty"` + EvaluationTime float64 `json:"evaluationTime,omitempty"` + LastEvaluation string `json:"lastEvaluation,omitempty"` +} + +// Alert represents an active alert +type Alert struct { + Labels map[string]string `json:"labels"` + Annotations map[string]string `json:"annotations"` + State string `json:"state"` + ActiveAt string `json:"activeAt"` + Value string `json:"value"` +} + +// AlertManagerStatus represents AlertManager status +type AlertManagerStatus struct { + Cluster AlertManagerCluster `json:"cluster"` + VersionInfo VersionInfo `json:"versionInfo"` + Uptime string `json:"uptime"` +} + +// AlertManagerCluster represents AlertManager cluster info +type AlertManagerCluster struct { + Status string `json:"status"` + Peers []AlertManagerPeer `json:"peers"` +} + +// AlertManagerPeer represents a single AlertManager peer +type AlertManagerPeer struct { + Name string `json:"name"` + Address string `json:"address"` +} + +// VersionInfo represents version information +type VersionInfo struct { + Version string `json:"version"` + Revision string `json:"revision"` + Branch string `json:"branch"` + BuildUser string `json:"buildUser"` + BuildDate string `json:"buildDate"` + GoVersion string `json:"goVersion"` +} + +// ConfigResponse wraps the config YAML +type ConfigResponse struct { + YAML string `json:"yaml"` +} + +// FlagsResponse represents Prometheus flags +type FlagsResponse map[string]string diff --git a/pkg/toolsets/mustgather/etcd.go b/pkg/toolsets/mustgather/etcd.go new file mode 100644 index 000000000..af298a0ea --- /dev/null +++ b/pkg/toolsets/mustgather/etcd.go @@ -0,0 +1,200 @@ +package mustgather + +import ( + "fmt" + "sort" + + "github.com/containers/kubernetes-mcp-server/pkg/api" + "github.com/google/jsonschema-go/jsonschema" + "k8s.io/utils/ptr" +) + +func initEtcd() []api.ServerTool { + return []api.ServerTool{ + { + Tool: api.Tool{ + Name: "mustgather_etcd_health", + Description: "Get ETCD cluster health status including endpoint health and active alarms from the must-gather archive", + Annotations: api.ToolAnnotations{ + Title: "ETCD Health", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + }, + }, + Handler: mustgatherETCDHealth, + ClusterAware: ptr.To(false), + }, + { + Tool: api.Tool{ + Name: "mustgather_etcd_members_list", + Description: "List ETCD cluster members from the must-gather archive", + Annotations: api.ToolAnnotations{ + Title: "ETCD Members", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + }, + }, + Handler: mustgatherETCDMembersList, + ClusterAware: ptr.To(false), + }, + { + Tool: api.Tool{ + Name: "mustgather_etcd_endpoint_status", + Description: "Get ETCD endpoint status including database size, raft index, and quota from the must-gather archive", + Annotations: api.ToolAnnotations{ + Title: "ETCD Endpoint Status", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + }, + }, + Handler: mustgatherETCDEndpointStatus, + ClusterAware: ptr.To(false), + }, + { + Tool: api.Tool{ + Name: "mustgather_etcd_object_count", + Description: "Get ETCD object counts by resource type from the must-gather archive", + Annotations: api.ToolAnnotations{ + Title: "ETCD Object Count", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "limit": {Type: "integer", Description: "Maximum number of resource types to show (default: 50, sorted by count descending)"}, + }, + }, + }, + Handler: mustgatherETCDObjectCount, + ClusterAware: ptr.To(false), + }, + } +} + +func mustgatherETCDHealth(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + health, err := p.GetETCDHealth() + if err != nil { + return api.NewToolCallResult("", fmt.Errorf("failed to get ETCD health: %w", err)), nil + } + + output := "## ETCD Cluster Health\n\n" + if health.Healthy { + output += "Status: [OK] Healthy\n\n" + } else { + output += "Status: [FAIL] Unhealthy\n\n" + } + + output += "### Endpoints\n\n" + for _, ep := range health.Endpoints { + output += fmt.Sprintf("- %s %s\n", healthSymbol(ep.Health), ep.Address) + } + + if len(health.Alarms) > 0 { + output += "\n### Active Alarms\n\n" + for _, alarm := range health.Alarms { + output += fmt.Sprintf("- [WARNING] %s\n", alarm) + } + } else { + output += "\nNo active alarms.\n" + } + + return api.NewToolCallResult(output, nil), nil +} + +func mustgatherETCDMembersList(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + data, err := p.ReadETCDFile("member_list.json") + if err != nil { + return api.NewToolCallResult("", fmt.Errorf("failed to read ETCD member list: %w", err)), nil + } + + output := "## ETCD Members\n\n" + output += string(data) + "\n" + + return api.NewToolCallResult(output, nil), nil +} + +func mustgatherETCDEndpointStatus(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + data, err := p.ReadETCDFile("endpoint_status.json") + if err != nil { + return api.NewToolCallResult("", fmt.Errorf("failed to read ETCD endpoint status: %w", err)), nil + } + + output := "## ETCD Endpoint Status\n\n" + output += string(data) + "\n" + + return api.NewToolCallResult(output, nil), nil +} + +func mustgatherETCDObjectCount(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + limit := getInt(params.GetArguments(), "limit", 50) + + counts, err := p.GetETCDObjectCount() + if err != nil { + return api.NewToolCallResult("", fmt.Errorf("failed to get ETCD object counts: %w", err)), nil + } + + // Sort by count descending + type entry struct { + resource string + count int64 + } + entries := make([]entry, 0, len(counts)) + var total int64 + for k, v := range counts { + entries = append(entries, entry{resource: k, count: v}) + total += v + } + sort.Slice(entries, func(i, j int) bool { + return entries[i].count > entries[j].count + }) + + if limit > 0 && len(entries) > limit { + entries = entries[:limit] + } + + output := fmt.Sprintf("## ETCD Object Counts\n\nTotal objects: %s across %d resource types\n\n", formatNumber(total), len(counts)) + + // Find max resource name length for alignment + maxLen := 0 + for _, e := range entries { + if len(e.resource) > maxLen { + maxLen = len(e.resource) + } + } + + for _, e := range entries { + output += fmt.Sprintf("%-*s %s\n", maxLen, e.resource, formatNumber(e.count)) + } + + if limit > 0 && len(counts) > limit { + output += fmt.Sprintf("\n... showing top %d of %d resource types\n", limit, len(counts)) + } + + return api.NewToolCallResult(output, nil), nil +} diff --git a/pkg/toolsets/mustgather/events.go b/pkg/toolsets/mustgather/events.go new file mode 100644 index 000000000..0f396526e --- /dev/null +++ b/pkg/toolsets/mustgather/events.go @@ -0,0 +1,209 @@ +package mustgather + +import ( + "fmt" + "sort" + "strings" + + "github.com/containers/kubernetes-mcp-server/pkg/api" + mg "github.com/containers/kubernetes-mcp-server/pkg/ocp/mustgather" + "github.com/google/jsonschema-go/jsonschema" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/utils/ptr" +) + +var eventGVK = schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Event"} + +func initEvents() []api.ServerTool { + return []api.ServerTool{ + { + Tool: api.Tool{ + Name: "mustgather_events_list", + Description: "List Kubernetes events from the must-gather archive with optional filtering by type, namespace, resource, and reason", + Annotations: api.ToolAnnotations{ + Title: "List Events", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "type": {Type: "string", Description: "Event type filter: all, Warning, Normal", Enum: []any{"all", "Warning", "Normal"}}, + "namespace": {Type: "string", Description: "Filter by namespace"}, + "resource": {Type: "string", Description: "Filter by involved resource name (partial match)"}, + "reason": {Type: "string", Description: "Filter by event reason (partial match)"}, + "limit": {Type: "integer", Description: "Maximum number of events to return (default: 100)"}, + }, + }, + }, + Handler: mustgatherEventsList, + ClusterAware: ptr.To(false), + }, + { + Tool: api.Tool{ + Name: "mustgather_events_by_resource", + Description: "Get all events related to a specific Kubernetes resource from the must-gather archive", + Annotations: api.ToolAnnotations{ + Title: "Events by Resource", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "name": {Type: "string", Description: "Resource name"}, + "namespace": {Type: "string", Description: "Resource namespace"}, + "kind": {Type: "string", Description: "Resource kind (optional, narrows search)"}, + }, + Required: []string{"name"}, + }, + }, + Handler: mustgatherEventsByResource, + ClusterAware: ptr.To(false), + }, + } +} + +func mustgatherEventsList(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + args := params.GetArguments() + typeFilter := getString(args, "type", "all") + namespace := getString(args, "namespace", "") + resourceFilter := getString(args, "resource", "") + reasonFilter := getString(args, "reason", "") + limit := getInt(args, "limit", 100) + + list := p.ListResources(params.Context, eventGVK, namespace, mg.ListOptions{}) + + // Filter and collect events + var filtered []unstructured.Unstructured + for i := range list.Items { + event := &list.Items[i] + + if typeFilter != "all" { + eventType, _, _ := unstructured.NestedString(event.Object, "type") + if eventType != typeFilter { + continue + } + } + + if resourceFilter != "" { + involvedName, _, _ := unstructured.NestedString(event.Object, "involvedObject", "name") + if !strings.Contains(strings.ToLower(involvedName), strings.ToLower(resourceFilter)) { + continue + } + } + + if reasonFilter != "" { + reason, _, _ := unstructured.NestedString(event.Object, "reason") + if !strings.Contains(strings.ToLower(reason), strings.ToLower(reasonFilter)) { + continue + } + } + + filtered = append(filtered, *event) + } + + // Sort by lastTimestamp descending + sort.Slice(filtered, func(i, j int) bool { + ti, _, _ := unstructured.NestedString(filtered[i].Object, "lastTimestamp") + tj, _, _ := unstructured.NestedString(filtered[j].Object, "lastTimestamp") + return ti > tj + }) + + // Apply limit + if limit > 0 && len(filtered) > limit { + filtered = filtered[:limit] + } + + if len(filtered) == 0 { + return api.NewToolCallResult("No events found matching the criteria", nil), nil + } + + output := fmt.Sprintf("Found %d events:\n\n", len(filtered)) + for i := range filtered { + output += formatEvent(&filtered[i]) + } + + return api.NewToolCallResult(output, nil), nil +} + +func mustgatherEventsByResource(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + args := params.GetArguments() + name := getString(args, "name", "") + namespace := getString(args, "namespace", "") + kindFilter := getString(args, "kind", "") + + if name == "" { + return api.NewToolCallResult("", fmt.Errorf("name is required")), nil + } + + list := p.ListResources(params.Context, eventGVK, namespace, mg.ListOptions{}) + + // Filter events for the specific resource + var matched []unstructured.Unstructured + for i := range list.Items { + event := &list.Items[i] + involvedName, _, _ := unstructured.NestedString(event.Object, "involvedObject", "name") + if involvedName != name { + continue + } + if kindFilter != "" { + involvedKind, _, _ := unstructured.NestedString(event.Object, "involvedObject", "kind") + if !strings.EqualFold(involvedKind, kindFilter) { + continue + } + } + matched = append(matched, *event) + } + + // Sort chronologically + sort.Slice(matched, func(i, j int) bool { + ti, _, _ := unstructured.NestedString(matched[i].Object, "lastTimestamp") + tj, _, _ := unstructured.NestedString(matched[j].Object, "lastTimestamp") + return ti < tj + }) + + if len(matched) == 0 { + return api.NewToolCallResult(fmt.Sprintf("No events found for resource %s", name), nil), nil + } + + output := fmt.Sprintf("Found %d events for %s:\n\n", len(matched), name) + for i := range matched { + output += formatEvent(&matched[i]) + } + + return api.NewToolCallResult(output, nil), nil +} + +func formatEvent(event *unstructured.Unstructured) string { + eventType, _, _ := unstructured.NestedString(event.Object, "type") + reason, _, _ := unstructured.NestedString(event.Object, "reason") + message, _, _ := unstructured.NestedString(event.Object, "message") + lastTimestamp, _, _ := unstructured.NestedString(event.Object, "lastTimestamp") + involvedName, _, _ := unstructured.NestedString(event.Object, "involvedObject", "name") + involvedKind, _, _ := unstructured.NestedString(event.Object, "involvedObject", "kind") + ns := event.GetNamespace() + + marker := "[Normal]" + if eventType == "Warning" { + marker = "[Warning]" + } + + output := fmt.Sprintf("%s %s %s/%s", marker, lastTimestamp, involvedKind, involvedName) + if ns != "" { + output += fmt.Sprintf(" (ns: %s)", ns) + } + output += "\n" + output += fmt.Sprintf(" Reason: %s\n", reason) + output += fmt.Sprintf(" Message: %s\n\n", message) + return output +} diff --git a/pkg/toolsets/mustgather/helpers.go b/pkg/toolsets/mustgather/helpers.go new file mode 100644 index 000000000..379592576 --- /dev/null +++ b/pkg/toolsets/mustgather/helpers.go @@ -0,0 +1,149 @@ +package mustgather + +import ( + "fmt" + "strings" + "sync" + + mg "github.com/containers/kubernetes-mcp-server/pkg/ocp/mustgather" +) + +var ( + providerMu sync.RWMutex + provider *mg.Provider +) + +// setProvider stores the loaded must-gather provider +func setProvider(p *mg.Provider) { + providerMu.Lock() + defer providerMu.Unlock() + provider = p +} + +// getProvider returns the loaded must-gather provider or an error +func getProvider() (*mg.Provider, error) { + providerMu.RLock() + defer providerMu.RUnlock() + if provider == nil { + return nil, fmt.Errorf("no must-gather archive loaded. Call mustgather_use first with a path to a must-gather archive") + } + return provider, nil +} + +// getString extracts a string argument with a default +func getString(args map[string]any, key, defaultValue string) string { + if v, ok := args[key]; ok { + if s, ok := v.(string); ok && s != "" { + return s + } + } + return defaultValue +} + +// getInt extracts an integer argument with a default +func getInt(args map[string]any, key string, defaultValue int) int { + if v, ok := args[key]; ok { + switch n := v.(type) { + case float64: + return int(n) + case int: + return n + case int64: + return int(n) + } + } + return defaultValue +} + +// getBool extracts a boolean argument with a default +func getBool(args map[string]any, key string, defaultValue bool) bool { + if v, ok := args[key]; ok { + if b, ok := v.(bool); ok { + return b + } + } + return defaultValue +} + +// formatBytes formats bytes as human-readable string +func formatBytes(bytes int64) string { + const unit = 1024 + if bytes < unit { + return fmt.Sprintf("%d B", bytes) + } + div, exp := int64(unit), 0 + for n := bytes / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f %cB", float64(bytes)/float64(div), "KMGTPE"[exp]) +} + +// formatNumber formats a number with thousands separators +func formatNumber(n int64) string { + str := fmt.Sprintf("%d", n) + if len(str) <= 3 { + return str + } + result := "" + for i, c := range str { + if i > 0 && (len(str)-i)%3 == 0 { + result += "," + } + result += string(c) + } + return result +} + +// formatDuration formats duration in seconds to human-readable string +func formatDuration(seconds float64) string { + if seconds < 0.001 { + return fmt.Sprintf("%.2fus", seconds*1000000) + } else if seconds < 1 { + return fmt.Sprintf("%.2fms", seconds*1000) + } else if seconds < 60 { + return fmt.Sprintf("%.2fs", seconds) + } else if seconds < 3600 { + return fmt.Sprintf("%.1fm", seconds/60) + } else if seconds < 86400 { + return fmt.Sprintf("%.1fh", seconds/3600) + } + return fmt.Sprintf("%.1fd", seconds/86400) +} + +// truncate truncates a string to maxLen +func truncate(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + if maxLen <= 3 { + return s[:maxLen] + } + return s[:maxLen-3] + "..." +} + +// healthSymbol returns a symbol for health status +func healthSymbol(health string) string { + switch strings.ToLower(health) { + case "up", "healthy", "ok", "true", "firing": + return "[OK]" + case "down", "unhealthy", "error", "false": + return "[FAIL]" + default: + return "[WARN]" + } +} + +// severitySymbol returns a symbol for severity level +func severitySymbol(severity string) string { + switch strings.ToLower(severity) { + case "critical": + return "[CRITICAL]" + case "warning": + return "[WARNING]" + case "info": + return "[INFO]" + default: + return "[UNKNOWN]" + } +} diff --git a/pkg/toolsets/mustgather/monitoring.go b/pkg/toolsets/mustgather/monitoring.go new file mode 100644 index 000000000..1f3fb094c --- /dev/null +++ b/pkg/toolsets/mustgather/monitoring.go @@ -0,0 +1,526 @@ +package mustgather + +import ( + "fmt" + "strings" + + "github.com/containers/kubernetes-mcp-server/pkg/api" + mg "github.com/containers/kubernetes-mcp-server/pkg/ocp/mustgather" + "github.com/google/jsonschema-go/jsonschema" + "k8s.io/utils/ptr" +) + +func initMonitoring() []api.ServerTool { + return []api.ServerTool{ + { + Tool: api.Tool{ + Name: "mustgather_monitoring_prometheus_status", + Description: "Get Prometheus TSDB and runtime status from the must-gather archive", + Annotations: api.ToolAnnotations{ + Title: "Prometheus Status", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "replica": {Type: "string", Description: "Prometheus replica (0, 1, or all). Default: all"}, + }, + }, + }, + Handler: mustgatherMonitoringPrometheusStatus, + ClusterAware: ptr.To(false), + }, + { + Tool: api.Tool{ + Name: "mustgather_monitoring_prometheus_targets", + Description: "Get Prometheus scrape targets and their health status from the must-gather archive", + Annotations: api.ToolAnnotations{ + Title: "Prometheus Targets", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "replica": {Type: "string", Description: "Prometheus replica (0, 1, or all). Default: 0"}, + "health": {Type: "string", Description: "Filter by health status: up, down, unknown (default: all)"}, + }, + }, + }, + Handler: mustgatherMonitoringPrometheusTargets, + ClusterAware: ptr.To(false), + }, + { + Tool: api.Tool{ + Name: "mustgather_monitoring_prometheus_tsdb", + Description: "Get detailed Prometheus TSDB statistics including top metrics by series count and label cardinality", + Annotations: api.ToolAnnotations{ + Title: "Prometheus TSDB", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "replica": {Type: "string", Description: "Prometheus replica (0, 1, or all). Default: 0"}, + "limit": {Type: "integer", Description: "Number of top entries to show per category (default: 10)"}, + }, + }, + }, + Handler: mustgatherMonitoringPrometheusTSDB, + ClusterAware: ptr.To(false), + }, + { + Tool: api.Tool{ + Name: "mustgather_monitoring_prometheus_alerts", + Description: "Get active Prometheus alerts from the must-gather archive", + Annotations: api.ToolAnnotations{ + Title: "Prometheus Alerts", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "state": {Type: "string", Description: "Filter by alert state: firing, pending (default: all)"}, + }, + }, + }, + Handler: mustgatherMonitoringPrometheusAlerts, + ClusterAware: ptr.To(false), + }, + { + Tool: api.Tool{ + Name: "mustgather_monitoring_prometheus_rules", + Description: "Get Prometheus alerting and recording rules from the must-gather archive", + Annotations: api.ToolAnnotations{ + Title: "Prometheus Rules", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "type": {Type: "string", Description: "Filter by rule type: alerting, recording (default: all)"}, + }, + }, + }, + Handler: mustgatherMonitoringPrometheusRules, + ClusterAware: ptr.To(false), + }, + { + Tool: api.Tool{ + Name: "mustgather_monitoring_prometheus_config_summary", + Description: "Get a summary of Prometheus configuration from the must-gather archive", + Annotations: api.ToolAnnotations{ + Title: "Prometheus Config", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + }, + }, + Handler: mustgatherMonitoringPrometheusConfigSummary, + ClusterAware: ptr.To(false), + }, + { + Tool: api.Tool{ + Name: "mustgather_monitoring_alertmanager_status", + Description: "Get AlertManager cluster status from the must-gather archive", + Annotations: api.ToolAnnotations{ + Title: "AlertManager Status", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + }, + }, + Handler: mustgatherMonitoringAlertManagerStatus, + ClusterAware: ptr.To(false), + }, + } +} + +func mustgatherMonitoringPrometheusStatus(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + replica := getString(params.GetArguments(), "replica", "all") + replicas := mg.GetReplicaNumbers(replica) + + output := "## Prometheus Status\n\n" + + for _, r := range replicas { + output += fmt.Sprintf("### Replica prometheus-k8s-%d\n\n", r) + + runtime, err := p.GetPrometheusRuntimeInfo(r) + if err != nil { + output += fmt.Sprintf("Runtime info not available: %v\n\n", err) + } else { + output += "#### Runtime Info\n\n" + output += fmt.Sprintf("Start Time: %s\n", runtime.StartTime) + output += fmt.Sprintf("Storage Retention: %s\n", runtime.StorageRetention) + output += fmt.Sprintf("Config Reload: %s (last: %s)\n", + healthSymbol(fmt.Sprintf("%v", runtime.ReloadConfigSuccess)), runtime.LastConfigTime) + output += fmt.Sprintf("Goroutines: %d\n", runtime.GoroutineCount) + output += fmt.Sprintf("GOMAXPROCS: %d\n", runtime.GOMAXPROCS) + if runtime.CorruptionCount > 0 { + output += fmt.Sprintf("Corruptions: [WARNING] %d\n", runtime.CorruptionCount) + } + output += "\n" + } + + tsdb, err := p.GetPrometheusTSDB(r) + if err != nil { + output += fmt.Sprintf("TSDB info not available: %v\n\n", err) + } else { + output += "#### TSDB Head Stats\n\n" + output += fmt.Sprintf("Series: %s\n", formatNumber(tsdb.HeadStats.NumSeries)) + output += fmt.Sprintf("Label Pairs: %s\n", formatNumber(tsdb.HeadStats.NumLabelPairs)) + output += fmt.Sprintf("Chunks: %s\n", formatNumber(tsdb.HeadStats.ChunkCount)) + output += "\n" + } + } + + return api.NewToolCallResult(output, nil), nil +} + +func mustgatherMonitoringPrometheusTargets(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + args := params.GetArguments() + replica := getString(args, "replica", "0") + healthFilter := getString(args, "health", "") + + replicas := mg.GetReplicaNumbers(replica) + r := replicas[0] + + targets, err := p.GetPrometheusActiveTargets(r) + if err != nil { + return api.NewToolCallResult("", fmt.Errorf("failed to get Prometheus targets: %w", err)), nil + } + + var upCount, downCount, unknownCount int + var filtered []mg.ActiveTarget + for i := range targets { + t := &targets[i] + switch strings.ToLower(t.Health) { + case "up": + upCount++ + case "down": + downCount++ + default: + unknownCount++ + } + if healthFilter == "" || strings.EqualFold(t.Health, healthFilter) { + filtered = append(filtered, *t) + } + } + + output := fmt.Sprintf("## Prometheus Targets (replica %d)\n\n", r) + output += fmt.Sprintf("Total: %d targets (Up: %d, Down: %d, Unknown: %d)\n\n", + len(targets), upCount, downCount, unknownCount) + + if healthFilter != "" { + output += fmt.Sprintf("Showing targets with health: %s\n\n", healthFilter) + } + + for i := range filtered { + t := &filtered[i] + output += fmt.Sprintf("%s %s\n", healthSymbol(t.Health), t.ScrapePool) + output += fmt.Sprintf(" URL: %s\n", t.ScrapeURL) + output += fmt.Sprintf(" Last Scrape: %s (duration: %s)\n", t.LastScrape, formatDuration(t.LastScrapeDuration)) + if t.LastError != "" { + output += fmt.Sprintf(" Error: %s\n", t.LastError) + } + output += "\n" + } + + return api.NewToolCallResult(output, nil), nil +} + +func mustgatherMonitoringPrometheusTSDB(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + args := params.GetArguments() + replica := getString(args, "replica", "0") + limit := getInt(args, "limit", 10) + + replicas := mg.GetReplicaNumbers(replica) + + output := "## Prometheus TSDB Statistics\n\n" + + for _, r := range replicas { + tsdb, err := p.GetPrometheusTSDB(r) + if err != nil { + output += fmt.Sprintf("### Replica %d: not available (%v)\n\n", r, err) + continue + } + + output += fmt.Sprintf("### Replica prometheus-k8s-%d\n\n", r) + output += "#### Head Stats\n\n" + output += fmt.Sprintf("Series: %s\n", formatNumber(tsdb.HeadStats.NumSeries)) + output += fmt.Sprintf("Label Pairs: %s\n", formatNumber(tsdb.HeadStats.NumLabelPairs)) + output += fmt.Sprintf("Chunks: %s\n\n", formatNumber(tsdb.HeadStats.ChunkCount)) + + output += formatNameValueSection("Top Metrics by Series Count", tsdb.SeriesCountByMetricName, limit) + output += formatNameValueSection("Top Labels by Value Count", tsdb.LabelValueCountByLabelName, limit) + output += formatNameValueBytesSection("Top Labels by Memory Usage", tsdb.MemoryInBytesByLabelName, limit) + } + + return api.NewToolCallResult(output, nil), nil +} + +func mustgatherMonitoringPrometheusAlerts(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + stateFilter := getString(params.GetArguments(), "state", "") + + rules, err := p.GetPrometheusRules() + if err != nil { + return api.NewToolCallResult("", fmt.Errorf("failed to get Prometheus rules: %w", err)), nil + } + + var alerts []alertEntry + for _, group := range rules.Groups { + for i := range group.Rules { + rule := &group.Rules[i] + if rule.Type != "alerting" { + continue + } + for j := range rule.Alerts { + alert := &rule.Alerts[j] + if stateFilter != "" && !strings.EqualFold(alert.State, stateFilter) { + continue + } + alerts = append(alerts, alertEntry{ + name: rule.Name, + state: alert.State, + severity: alert.Labels["severity"], + activeAt: alert.ActiveAt, + labels: alert.Labels, + summary: alert.Annotations["summary"], + message: alert.Annotations["message"], + }) + } + } + } + + output := "## Prometheus Alerts\n\n" + if stateFilter != "" { + output += fmt.Sprintf("Filter: %s\n\n", stateFilter) + } + output += fmt.Sprintf("Found %d active alert(s)\n\n", len(alerts)) + + if len(alerts) == 0 { + return api.NewToolCallResult(output+"No active alerts found.", nil), nil + } + + for _, a := range alerts { + stateTag := "[" + strings.ToUpper(a.state) + "]" + output += fmt.Sprintf("%s %s %s\n", severitySymbol(a.severity), stateTag, a.name) + output += fmt.Sprintf(" Active Since: %s\n", a.activeAt) + if a.summary != "" { + output += fmt.Sprintf(" Summary: %s\n", a.summary) + } else if a.message != "" { + output += fmt.Sprintf(" Message: %s\n", a.message) + } + // Show namespace if present + if ns, ok := a.labels["namespace"]; ok { + output += fmt.Sprintf(" Namespace: %s\n", ns) + } + output += "\n" + } + + return api.NewToolCallResult(output, nil), nil +} + +func mustgatherMonitoringPrometheusRules(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + typeFilter := getString(params.GetArguments(), "type", "") + + rules, err := p.GetPrometheusRules() + if err != nil { + return api.NewToolCallResult("", fmt.Errorf("failed to get Prometheus rules: %w", err)), nil + } + + output := "## Prometheus Rules\n\n" + if typeFilter != "" { + output += fmt.Sprintf("Filter: %s rules only\n\n", typeFilter) + } + + totalGroups := 0 + totalRules := 0 + + for _, group := range rules.Groups { + var groupRules []mg.Rule + for i := range group.Rules { + rule := &group.Rules[i] + if typeFilter == "" || strings.EqualFold(rule.Type, typeFilter) { + groupRules = append(groupRules, *rule) + } + } + if len(groupRules) == 0 { + continue + } + + totalGroups++ + totalRules += len(groupRules) + + output += fmt.Sprintf("### %s (%s)\n\n", group.Name, group.File) + output += fmt.Sprintf("Eval Interval: %.0fs | Last Eval: %s | Eval Time: %s\n\n", + group.Interval, group.LastEvaluation, formatDuration(group.EvaluationTime)) + + for i := range groupRules { + r := &groupRules[i] + output += fmt.Sprintf("- %s [%s] %s %s\n", healthSymbol(r.Health), r.Type, r.Name, r.State) + if r.LastError != "" { + output += fmt.Sprintf(" Error: %s\n", r.LastError) + } + } + output += "\n" + } + + summary := fmt.Sprintf("Total: %d groups, %d rules\n\n", totalGroups, totalRules) + // Insert summary after header + output = strings.Replace(output, "\n\n", "\n\n"+summary, 1) + + return api.NewToolCallResult(output, nil), nil +} + +func mustgatherMonitoringPrometheusConfigSummary(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + output := "## Prometheus Configuration Summary\n\n" + + config, err := p.GetPrometheusConfig() + if err != nil { + output += fmt.Sprintf("Config not available: %v\n\n", err) + } else { + // Show a truncated version of the config YAML + configYAML := config.YAML + lines := strings.Split(configYAML, "\n") + if len(lines) > 100 { + output += fmt.Sprintf("Configuration (%d lines, showing first 100):\n\n", len(lines)) + output += strings.Join(lines[:100], "\n") + "\n...\n\n" + } else { + output += "Configuration:\n\n" + configYAML + "\n\n" + } + } + + flags, err := p.GetPrometheusFlags() + if err != nil { + output += fmt.Sprintf("Flags not available: %v\n\n", err) + } else { + output += "### Key Flags\n\n" + keyFlags := []string{ + "storage.tsdb.retention.time", + "storage.tsdb.retention.size", + "storage.tsdb.path", + "web.listen-address", + "web.external-url", + "rules.alert.for-outage-tolerance", + "rules.alert.for-grace-period", + } + for _, key := range keyFlags { + if val, ok := flags[key]; ok { + output += fmt.Sprintf("%-40s %s\n", key, val) + } + } + output += "\n" + } + + return api.NewToolCallResult(output, nil), nil +} + +func mustgatherMonitoringAlertManagerStatus(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + status, err := p.GetAlertManagerStatus() + if err != nil { + return api.NewToolCallResult("", fmt.Errorf("failed to get AlertManager status: %w", err)), nil + } + + output := "## AlertManager Status\n\n" + output += fmt.Sprintf("Cluster Status: %s\n", healthSymbol(status.Cluster.Status)) + output += fmt.Sprintf("Uptime: %s\n\n", status.Uptime) + + output += "### Version\n\n" + output += fmt.Sprintf("Version: %s\n", status.VersionInfo.Version) + output += fmt.Sprintf("Revision: %s\n", status.VersionInfo.Revision) + output += fmt.Sprintf("Branch: %s\n", status.VersionInfo.Branch) + output += fmt.Sprintf("Build Date: %s\n", status.VersionInfo.BuildDate) + output += fmt.Sprintf("Go Version: %s\n\n", status.VersionInfo.GoVersion) + + if len(status.Cluster.Peers) > 0 { + output += "### Cluster Peers\n\n" + for _, peer := range status.Cluster.Peers { + output += fmt.Sprintf("- %s (%s)\n", peer.Name, peer.Address) + } + output += "\n" + } + + return api.NewToolCallResult(output, nil), nil +} + +// alertEntry holds formatted alert data for display +type alertEntry struct { + name string + state string + severity string + activeAt string + labels map[string]string + summary string + message string +} + +// formatNameValueSection formats a list of name-value pairs as a section +func formatNameValueSection(title string, items []mg.NameValue, limit int) string { + output := fmt.Sprintf("#### %s\n\n", title) + if len(items) == 0 { + return output + "No data available.\n\n" + } + count := len(items) + if limit > 0 && count > limit { + count = limit + } + for i := 0; i < count; i++ { + output += fmt.Sprintf("%-60s %s\n", truncate(items[i].Name, 60), formatNumber(items[i].Value)) + } + output += "\n" + return output +} + +// formatNameValueBytesSection formats a list of name-value pairs with byte formatting +func formatNameValueBytesSection(title string, items []mg.NameValue, limit int) string { + output := fmt.Sprintf("#### %s\n\n", title) + if len(items) == 0 { + return output + "No data available.\n\n" + } + count := len(items) + if limit > 0 && count > limit { + count = limit + } + for i := 0; i < count; i++ { + output += fmt.Sprintf("%-60s %s\n", truncate(items[i].Name, 60), formatBytes(items[i].Value)) + } + output += "\n" + return output +} diff --git a/pkg/toolsets/mustgather/namespaces.go b/pkg/toolsets/mustgather/namespaces.go new file mode 100644 index 000000000..6c93d1fcb --- /dev/null +++ b/pkg/toolsets/mustgather/namespaces.go @@ -0,0 +1,47 @@ +package mustgather + +import ( + "fmt" + "sort" + "strings" + + "github.com/containers/kubernetes-mcp-server/pkg/api" + "github.com/google/jsonschema-go/jsonschema" + "k8s.io/utils/ptr" +) + +func initNamespaces() []api.ServerTool { + return []api.ServerTool{ + { + Tool: api.Tool{ + Name: "mustgather_namespaces_list", + Description: "List all namespaces found in the must-gather archive", + Annotations: api.ToolAnnotations{ + Title: "List Namespaces", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + }, + }, + Handler: mustgatherNamespacesList, + ClusterAware: ptr.To(false), + }, + } +} + +func mustgatherNamespacesList(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + namespaces := p.ListNamespaces() + sort.Strings(namespaces) + + output := fmt.Sprintf("Found %d namespaces:\n\n", len(namespaces)) + output += strings.Join(namespaces, "\n") + output += "\n" + + return api.NewToolCallResult(output, nil), nil +} diff --git a/pkg/toolsets/mustgather/nodes.go b/pkg/toolsets/mustgather/nodes.go new file mode 100644 index 000000000..e09f08c8d --- /dev/null +++ b/pkg/toolsets/mustgather/nodes.go @@ -0,0 +1,249 @@ +package mustgather + +import ( + "fmt" + "strings" + + "github.com/containers/kubernetes-mcp-server/pkg/api" + mg "github.com/containers/kubernetes-mcp-server/pkg/ocp/mustgather" + "github.com/google/jsonschema-go/jsonschema" + "k8s.io/utils/ptr" +) + +func initNodes() []api.ServerTool { + return []api.ServerTool{ + { + Tool: api.Tool{ + Name: "mustgather_node_diagnostics_get", + Description: "Get comprehensive diagnostic information for a specific node including kubelet logs, system info, CPU/IRQ affinities, and hardware details", + Annotations: api.ToolAnnotations{ + Title: "Node Diagnostics", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "node": {Type: "string", Description: "Node name"}, + "include": {Type: "string", Description: "Comma-separated diagnostics to include: kubelet,sysinfo,cpu,irq,pods,lscpu,lspci,dmesg,cmdline (default: all)"}, + "kubeletTail": {Type: "integer", Description: "Number of lines from end of kubelet log (0 for all, default: 100)"}, + }, + Required: []string{"node"}, + }, + }, + Handler: mustgatherNodeDiagnosticsGet, + ClusterAware: ptr.To(false), + }, + { + Tool: api.Tool{ + Name: "mustgather_node_kubelet_logs", + Description: "Get kubelet logs for a specific node (decompressed from .gz file)", + Annotations: api.ToolAnnotations{ + Title: "Kubelet Logs", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "node": {Type: "string", Description: "Node name"}, + "tail": {Type: "integer", Description: "Number of lines from end (0 for all)"}, + }, + Required: []string{"node"}, + }, + }, + Handler: mustgatherNodeKubeletLogs, + ClusterAware: ptr.To(false), + }, + { + Tool: api.Tool{ + Name: "mustgather_node_kubelet_logs_grep", + Description: "Filter kubelet logs for a specific node by a search string. Returns only matching lines.", + Annotations: api.ToolAnnotations{ + Title: "Kubelet Logs Grep", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "node": {Type: "string", Description: "Node name"}, + "filter": {Type: "string", Description: "String to search for in log lines"}, + "tail": {Type: "integer", Description: "Maximum number of matching lines to return (0 for all)"}, + "caseInsensitive": {Type: "boolean", Description: "Perform case-insensitive search (default: false)"}, + }, + Required: []string{"node", "filter"}, + }, + }, + Handler: mustgatherNodeKubeletLogsGrep, + ClusterAware: ptr.To(false), + }, + } +} + +func mustgatherNodeDiagnosticsGet(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + args := params.GetArguments() + node := getString(args, "node", "") + include := getString(args, "include", "all") + kubeletTail := getInt(args, "kubeletTail", 100) + + if node == "" { + return api.NewToolCallResult("", fmt.Errorf("node is required")), nil + } + + diag, err := p.GetNodeDiagnostics(node) + if err != nil { + return api.NewToolCallResult("", fmt.Errorf("failed to get node diagnostics: %w", err)), nil + } + + includeAll := include == "all" + includeMap := make(map[string]bool) + if !includeAll { + for _, item := range strings.Split(include, ",") { + includeMap[strings.TrimSpace(item)] = true + } + } + shouldInclude := func(name string) bool { + return includeAll || includeMap[name] + } + + output := fmt.Sprintf("Node Diagnostics for %s\n", node) + output += strings.Repeat("=", 80) + "\n\n" + + if shouldInclude("kubelet") && diag.KubeletLog != "" { + output += "## Kubelet Logs" + log := diag.KubeletLog + if kubeletTail > 0 { + output += fmt.Sprintf(" (last %d lines)", kubeletTail) + log = mg.TailLines(log, kubeletTail) + } + output += "\n\n" + log + "\n\n" + } + + sections := []struct { + key, title, content string + }{ + {"sysinfo", "System Info", diag.SysInfo}, + {"lscpu", "CPU Info (lscpu)", diag.Lscpu}, + {"cpu", "CPU Affinities", diag.CPUAffinities}, + {"irq", "IRQ Affinities", diag.IRQAffinities}, + {"lspci", "PCI Devices (lspci)", diag.Lspci}, + {"dmesg", "Kernel Messages (dmesg)", diag.Dmesg}, + {"cmdline", "Kernel Boot Parameters", diag.ProcCmdline}, + {"pods", "Pods Info", diag.PodsInfo}, + {"pods", "Pod Resources", diag.PodResources}, + } + + for _, s := range sections { + if shouldInclude(s.key) && s.content != "" { + output += fmt.Sprintf("## %s\n\n%s\n\n", s.title, s.content) + } + } + + return api.NewToolCallResult(output, nil), nil +} + +func mustgatherNodeKubeletLogs(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + args := params.GetArguments() + node := getString(args, "node", "") + tail := getInt(args, "tail", 0) + + if node == "" { + return api.NewToolCallResult("", fmt.Errorf("node is required")), nil + } + + diag, err := p.GetNodeDiagnostics(node) + if err != nil { + return api.NewToolCallResult("", fmt.Errorf("failed to get node diagnostics: %w", err)), nil + } + + if diag.KubeletLog == "" { + return api.NewToolCallResult("", fmt.Errorf("kubelet log not found for node %s", node)), nil + } + + logs := diag.KubeletLog + if tail > 0 { + logs = mg.TailLines(logs, tail) + } + + header := fmt.Sprintf("Kubelet logs for node %s", node) + if tail > 0 { + header += fmt.Sprintf(" (last %d lines)", tail) + } + header += ":\n\n" + + return api.NewToolCallResult(header+logs, nil), nil +} + +func mustgatherNodeKubeletLogsGrep(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + args := params.GetArguments() + node := getString(args, "node", "") + filter := getString(args, "filter", "") + tail := getInt(args, "tail", 0) + caseInsensitive := getBool(args, "caseInsensitive", false) + + if node == "" { + return api.NewToolCallResult("", fmt.Errorf("node is required")), nil + } + if filter == "" { + return api.NewToolCallResult("", fmt.Errorf("filter string is required")), nil + } + + diag, err := p.GetNodeDiagnostics(node) + if err != nil { + return api.NewToolCallResult("", fmt.Errorf("failed to get node diagnostics: %w", err)), nil + } + + if diag.KubeletLog == "" { + return api.NewToolCallResult("", fmt.Errorf("kubelet log not found for node %s", node)), nil + } + + lines := strings.Split(diag.KubeletLog, "\n") + searchFilter := filter + if caseInsensitive { + searchFilter = strings.ToLower(filter) + } + + var matchingLines []string + for _, line := range lines { + compareLine := line + if caseInsensitive { + compareLine = strings.ToLower(line) + } + if strings.Contains(compareLine, searchFilter) { + matchingLines = append(matchingLines, line) + } + } + + // Apply tail from the end of matches + if tail > 0 && len(matchingLines) > tail { + matchingLines = matchingLines[len(matchingLines)-tail:] + } + + header := fmt.Sprintf("Kubelet logs for node %s filtered by '%s'", node, filter) + if caseInsensitive { + header += " (case-insensitive)" + } + if tail > 0 { + header += fmt.Sprintf(" (last %d matches)", tail) + } + header += fmt.Sprintf(":\n\nFound %d matching line(s)\n\n", len(matchingLines)) + + if len(matchingLines) == 0 { + return api.NewToolCallResult(header+"No matching lines found.", nil), nil + } + + return api.NewToolCallResult(header+strings.Join(matchingLines, "\n"), nil), nil +} diff --git a/pkg/toolsets/mustgather/pod_logs.go b/pkg/toolsets/mustgather/pod_logs.go new file mode 100644 index 000000000..9b2d9089d --- /dev/null +++ b/pkg/toolsets/mustgather/pod_logs.go @@ -0,0 +1,86 @@ +package mustgather + +import ( + "fmt" + + "github.com/containers/kubernetes-mcp-server/pkg/api" + mg "github.com/containers/kubernetes-mcp-server/pkg/ocp/mustgather" + "github.com/google/jsonschema-go/jsonschema" + "k8s.io/utils/ptr" +) + +func initPodLogs() []api.ServerTool { + return []api.ServerTool{ + { + Tool: api.Tool{ + Name: "mustgather_pod_logs_get", + Description: "Get container logs for a specific pod from the must-gather archive. Returns current or previous logs.", + Annotations: api.ToolAnnotations{ + Title: "Get Pod Logs", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "namespace": {Type: "string", Description: "Pod namespace"}, + "pod": {Type: "string", Description: "Pod name"}, + "container": {Type: "string", Description: "Container name (uses first container if not specified)"}, + "previous": {Type: "boolean", Description: "Get previous container logs (from crash/restart)"}, + "tail": {Type: "integer", Description: "Number of lines from end of logs (0 for all)"}, + }, + Required: []string{"namespace", "pod"}, + }, + }, + Handler: mustgatherPodLogsGet, + ClusterAware: ptr.To(false), + }, + } +} + +func mustgatherPodLogsGet(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + args := params.GetArguments() + namespace := getString(args, "namespace", "") + pod := getString(args, "pod", "") + container := getString(args, "container", "") + previous := getBool(args, "previous", false) + tail := getInt(args, "tail", 0) + + if namespace == "" || pod == "" { + return api.NewToolCallResult("", fmt.Errorf("namespace and pod are required")), nil + } + + logType := mg.LogTypeCurrent + if previous { + logType = mg.LogTypePrevious + } + + logs, err := p.GetPodLog(mg.PodLogOptions{ + Namespace: namespace, + Pod: pod, + Container: container, + LogType: logType, + TailLines: tail, + }) + if err != nil { + return api.NewToolCallResult("", fmt.Errorf("failed to get pod logs: %w", err)), nil + } + + header := fmt.Sprintf("Logs for pod %s/%s", namespace, pod) + if container != "" { + header += fmt.Sprintf(", container %s", container) + } + if previous { + header += " (previous)" + } + if tail > 0 { + header += fmt.Sprintf(" (last %d lines)", tail) + } + header += ":\n\n" + + return api.NewToolCallResult(header+logs, nil), nil +} diff --git a/pkg/toolsets/mustgather/resources.go b/pkg/toolsets/mustgather/resources.go new file mode 100644 index 000000000..32787440d --- /dev/null +++ b/pkg/toolsets/mustgather/resources.go @@ -0,0 +1,157 @@ +package mustgather + +import ( + "fmt" + "strings" + + "github.com/containers/kubernetes-mcp-server/pkg/api" + mg "github.com/containers/kubernetes-mcp-server/pkg/ocp/mustgather" + "github.com/google/jsonschema-go/jsonschema" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/utils/ptr" + "sigs.k8s.io/yaml" +) + +func initResources() []api.ServerTool { + return []api.ServerTool{ + { + Tool: api.Tool{ + Name: "mustgather_resources_get", + Description: "Get a specific Kubernetes resource from the must-gather archive by kind, name, and optional namespace", + Annotations: api.ToolAnnotations{ + Title: "Get Resource", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "kind": {Type: "string", Description: "Resource kind (e.g., Pod, Deployment, Service)"}, + "name": {Type: "string", Description: "Resource name"}, + "namespace": {Type: "string", Description: "Resource namespace (omit for cluster-scoped resources)"}, + "apiVersion": {Type: "string", Description: "API version (default: v1)"}, + }, + Required: []string{"kind", "name"}, + }, + }, + Handler: mustgatherResourcesGet, + ClusterAware: ptr.To(false), + }, + { + Tool: api.Tool{ + Name: "mustgather_resources_list", + Description: "List Kubernetes resources from the must-gather archive with optional filtering by namespace, labels, and fields", + Annotations: api.ToolAnnotations{ + Title: "List Resources", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "kind": {Type: "string", Description: "Resource kind (e.g., Pod, Deployment, Service)"}, + "namespace": {Type: "string", Description: "Filter by namespace"}, + "apiVersion": {Type: "string", Description: "API version (default: v1)"}, + "labelSelector": {Type: "string", Description: "Label selector (e.g., app=nginx,tier=frontend)"}, + "fieldSelector": {Type: "string", Description: "Field selector (e.g., metadata.name=foo)"}, + "limit": {Type: "integer", Description: "Maximum number of resources to return (0 for all)"}, + }, + Required: []string{"kind"}, + }, + }, + Handler: mustgatherResourcesList, + ClusterAware: ptr.To(false), + }, + } +} + +func mustgatherResourcesGet(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + args := params.GetArguments() + kind := getString(args, "kind", "") + name := getString(args, "name", "") + namespace := getString(args, "namespace", "") + apiVersion := getString(args, "apiVersion", "v1") + + if kind == "" || name == "" { + return api.NewToolCallResult("", fmt.Errorf("kind and name are required")), nil + } + + gvk := parseGVK(apiVersion, kind) + obj := p.GetResource(gvk, name, namespace) + if obj == nil { + return api.NewToolCallResult("", fmt.Errorf("resource %s/%s not found", kind, name)), nil + } + + yamlBytes, err := yaml.Marshal(obj.Object) + if err != nil { + return api.NewToolCallResult("", fmt.Errorf("failed to marshal resource: %w", err)), nil + } + + return api.NewToolCallResult(string(yamlBytes), nil), nil +} + +func mustgatherResourcesList(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + p, err := getProvider() + if err != nil { + return api.NewToolCallResult("", err), nil + } + + args := params.GetArguments() + kind := getString(args, "kind", "") + namespace := getString(args, "namespace", "") + apiVersion := getString(args, "apiVersion", "v1") + labelSelector := getString(args, "labelSelector", "") + fieldSelector := getString(args, "fieldSelector", "") + limit := getInt(args, "limit", 0) + + if kind == "" { + return api.NewToolCallResult("", fmt.Errorf("kind is required")), nil + } + + gvk := parseGVK(apiVersion, kind) + list := p.ListResources(params.Context, gvk, namespace, mg.ListOptions{ + LabelSelector: labelSelector, + FieldSelector: fieldSelector, + Limit: limit, + }) + + if len(list.Items) == 0 { + return api.NewToolCallResult(fmt.Sprintf("No %s resources found", kind), nil), nil + } + + output := fmt.Sprintf("Found %d %s resource(s):\n\n", len(list.Items), kind) + for i := range list.Items { + item := &list.Items[i] + ns := item.GetNamespace() + name := item.GetName() + if ns != "" { + output += fmt.Sprintf("- %s/%s\n", ns, name) + } else { + output += fmt.Sprintf("- %s\n", name) + } + } + + // Show full YAML for small result sets + if len(list.Items) <= 5 { + output += "\n---\n\n" + for i := range list.Items { + yamlBytes, err := yaml.Marshal(list.Items[i].Object) + if err == nil { + output += string(yamlBytes) + "\n---\n\n" + } + } + } + + return api.NewToolCallResult(output, nil), nil +} + +func parseGVK(apiVersion, kind string) schema.GroupVersionKind { + parts := strings.SplitN(apiVersion, "/", 2) + if len(parts) == 2 { + return schema.GroupVersionKind{Group: parts[0], Version: parts[1], Kind: kind} + } + return schema.GroupVersionKind{Group: "", Version: apiVersion, Kind: kind} +} diff --git a/pkg/toolsets/mustgather/toolset.go b/pkg/toolsets/mustgather/toolset.go new file mode 100644 index 000000000..77fc10fd3 --- /dev/null +++ b/pkg/toolsets/mustgather/toolset.go @@ -0,0 +1,40 @@ +package mustgather + +import ( + "slices" + + "github.com/containers/kubernetes-mcp-server/pkg/api" + "github.com/containers/kubernetes-mcp-server/pkg/toolsets" +) + +// Toolset provides tools for analyzing OpenShift must-gather archives offline. +type Toolset struct{} + +func (t *Toolset) GetName() string { + return "openshift/mustgather" +} + +func (t *Toolset) GetDescription() string { + return "Analyze OpenShift must-gather archives offline without a live cluster connection" +} + +func (t *Toolset) GetTools(_ api.Openshift) []api.ServerTool { + return slices.Concat( + initUse(), + initResources(), + initNamespaces(), + initEvents(), + initPodLogs(), + initNodes(), + initEtcd(), + initMonitoring(), + ) +} + +func (t *Toolset) GetPrompts() []api.ServerPrompt { + return nil +} + +func init() { + toolsets.Register(&Toolset{}) +} diff --git a/pkg/toolsets/mustgather/use.go b/pkg/toolsets/mustgather/use.go new file mode 100644 index 000000000..6bca9b62b --- /dev/null +++ b/pkg/toolsets/mustgather/use.go @@ -0,0 +1,89 @@ +package mustgather + +import ( + "fmt" + + "github.com/containers/kubernetes-mcp-server/pkg/api" + mg "github.com/containers/kubernetes-mcp-server/pkg/ocp/mustgather" + "github.com/google/jsonschema-go/jsonschema" + "k8s.io/utils/ptr" +) + +func initUse() []api.ServerTool { + return []api.ServerTool{ + { + Tool: api.Tool{ + Name: "mustgather_use", + Description: "Load a must-gather archive from a given filesystem path for analysis. Must be called before any other mustgather_* tools.", + Annotations: api.ToolAnnotations{ + Title: "Load Must-Gather Archive", + ReadOnlyHint: ptr.To(true), + }, + InputSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "path": { + Type: "string", + Description: "Absolute path to the must-gather archive directory", + }, + }, + Required: []string{"path"}, + }, + }, + Handler: mustgatherUse, + ClusterAware: ptr.To(false), + }, + } +} + +func mustgatherUse(params api.ToolHandlerParams) (*api.ToolCallResult, error) { + args := params.GetArguments() + path := getString(args, "path", "") + if path == "" { + return api.NewToolCallResult("", fmt.Errorf("path is required")), nil + } + + p, err := mg.NewProvider(path) + if err != nil { + return api.NewToolCallResult("", fmt.Errorf("failed to load must-gather archive: %w", err)), nil + } + + setProvider(p) + + metadata := p.GetMetadata() + + // Register MCP resource + if params.ResourceRegistrar != nil { + resourceURI := "must-gather://" + path + content := fmt.Sprintf("Must-Gather Archive\n"+ + "Path: %s\n"+ + "Version: %s\n"+ + "Timestamp: %s\n"+ + "Resources: %d\n"+ + "Namespaces: %d\n", + metadata.Path, metadata.Version, metadata.Timestamp, + metadata.ResourceCount, metadata.NamespaceCount) + params.ResourceRegistrar.AddResource( + resourceURI, + "must-gather", + "Loaded must-gather archive metadata", + "text/plain", + content, + ) + } + + // Build summary + output := "Must-gather archive loaded successfully\n" + output += "=======================================\n\n" + output += fmt.Sprintf("Path: %s\n", metadata.Path) + if metadata.Version != "" { + output += fmt.Sprintf("Version: %s\n", metadata.Version) + } + if metadata.Timestamp != "" { + output += fmt.Sprintf("Timestamp: %s\n", metadata.Timestamp) + } + output += fmt.Sprintf("Resources indexed: %d\n", metadata.ResourceCount) + output += fmt.Sprintf("Namespaces: %d\n", metadata.NamespaceCount) + + return api.NewToolCallResult(output, nil), nil +}