From b616279d7d91ea6da9cfe8d6903da8b9a899ba67 Mon Sep 17 00:00:00 2001
From: Harshal Patil <12152047+harche@users.noreply.github.com>
Date: Tue, 7 Oct 2025 11:01:37 -0400
Subject: [PATCH 1/7] Add nodes_debug_exec tool in pkg/ocp package
---
README.md | 14 +
internal/tools/update-readme/main.go | 1 +
pkg/config/config_default_overrides.go | 3 +-
pkg/config/config_test.go | 2 +-
pkg/kubernetes-mcp-server/cmd/root_test.go | 2 +-
pkg/mcp/nodes_test.go | 253 +++++++++++
pkg/mcp/openshift_modules.go | 3 +
...toolsets-full-tools-multicluster-enum.json | 46 +-
.../toolsets-full-tools-multicluster.json | 46 +-
.../toolsets-full-tools-openshift.json | 46 +-
pkg/mcp/testdata/toolsets-full-tools.json | 46 +-
.../toolsets-openshift-core-tools.json | 46 ++
pkg/ocp/nodes_debug.go | 326 +++++++++++++
pkg/ocp/nodes_debug_test.go | 429 ++++++++++++++++++
pkg/ocp/ocp_client.go | 44 ++
pkg/ocp/testhelpers.go | 163 +++++++
pkg/toolsets/openshift/nodes.go | 126 +++++
pkg/toolsets/openshift/nodes_test.go | 95 ++++
pkg/toolsets/openshift/toolset.go | 31 ++
19 files changed, 1715 insertions(+), 7 deletions(-)
create mode 100644 pkg/mcp/openshift_modules.go
create mode 100644 pkg/mcp/testdata/toolsets-openshift-core-tools.json
create mode 100644 pkg/ocp/nodes_debug.go
create mode 100644 pkg/ocp/nodes_debug_test.go
create mode 100644 pkg/ocp/ocp_client.go
create mode 100644 pkg/ocp/testhelpers.go
create mode 100644 pkg/toolsets/openshift/nodes.go
create mode 100644 pkg/toolsets/openshift/nodes_test.go
create mode 100644 pkg/toolsets/openshift/toolset.go
diff --git a/README.md b/README.md
index 126b52ada..375bbdd07 100644
--- a/README.md
+++ b/README.md
@@ -407,6 +407,7 @@ The following sets of tools are available (toolsets marked with ✓ in the Defau
| kubevirt | KubeVirt virtual machine management tools | |
| observability | Cluster observability tools for querying Prometheus metrics and Alertmanager alerts | ✓ |
| helm | Tools for managing Helm charts and releases | ✓ |
+| openshift-core | Core OpenShift-specific tools (Node debugging, etc.) | |
@@ -696,6 +697,19 @@ Common use cases:
+
+
+openshift-core
+
+- **nodes_debug_exec** - Run commands on an OpenShift node using a privileged debug pod with comprehensive troubleshooting utilities. The debug pod uses the UBI9 toolbox image which includes: systemd tools (systemctl, journalctl), networking tools (ss, ip, ping, traceroute, nmap), process tools (ps, top, lsof, strace), file system tools (find, tar, rsync), and debugging tools (gdb). The host filesystem is mounted at /host, allowing commands to chroot /host if needed to access node-level resources. Output is truncated to the most recent 100 lines, so prefer filters like grep when expecting large logs.
+ - `command` (`array`) **(required)** - Command to execute on the node. All standard debugging utilities from the UBI9 toolbox are available. The host filesystem is mounted at /host - use 'chroot /host ' to access node-level resources, or run commands directly in the toolbox environment. Provide each argument as a separate array item (e.g. ['chroot', '/host', 'systemctl', 'status', 'kubelet'] or ['journalctl', '-u', 'kubelet', '--since', '1 hour ago']).
+ - `image` (`string`) - Container image to use for the debug pod (optional). Defaults to registry.access.redhat.com/ubi9/toolbox:latest which provides comprehensive debugging and troubleshooting utilities.
+ - `namespace` (`string`) - Namespace to create the temporary debug pod in (optional, defaults to the current namespace or 'default').
+ - `node` (`string`) **(required)** - Name of the node to debug (e.g. worker-0).
+ - `timeout_seconds` (`integer`) - Maximum time to wait for the command to complete before timing out (optional, defaults to 60 seconds).
+
+
+
diff --git a/internal/tools/update-readme/main.go b/internal/tools/update-readme/main.go
index 4163318a3..4807dfb4a 100644
--- a/internal/tools/update-readme/main.go
+++ b/internal/tools/update-readme/main.go
@@ -20,6 +20,7 @@ import (
_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kiali"
_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt"
_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/observability"
+ _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/openshift"
)
type OpenShift struct{}
diff --git a/pkg/config/config_default_overrides.go b/pkg/config/config_default_overrides.go
index 70d065bce..05c3b4ac4 100644
--- a/pkg/config/config_default_overrides.go
+++ b/pkg/config/config_default_overrides.go
@@ -3,6 +3,7 @@ package config
func defaultOverrides() StaticConfig {
return StaticConfig{
// IMPORTANT: this file is used to override default config values in downstream builds.
- // This is intentionally left blank.
+ // OpenShift-specific defaults: add openshift-core toolset
+ Toolsets: []string{"core", "config", "helm", "openshift-core"},
}
}
diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go
index e3a88f75c..c49bb25a0 100644
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -248,7 +248,7 @@ func (s *ConfigSuite) TestReadConfigValidPreservesDefaultsForMissingFields() {
})
s.Run("toolsets defaulted correctly", func() {
s.Require().Lenf(config.Toolsets, 4, "Expected 4 toolsets, got %d", len(config.Toolsets))
- for _, toolset := range []string{"core", "config", "helm", "observability"} {
+ for _, toolset := range []string{"core", "config", "helm", "observability", "openshift-core"} {
s.Containsf(config.Toolsets, toolset, "Expected toolsets to contain %s", toolset)
}
})
diff --git a/pkg/kubernetes-mcp-server/cmd/root_test.go b/pkg/kubernetes-mcp-server/cmd/root_test.go
index a374534b8..a40dd3068 100644
--- a/pkg/kubernetes-mcp-server/cmd/root_test.go
+++ b/pkg/kubernetes-mcp-server/cmd/root_test.go
@@ -302,7 +302,7 @@ func TestToolsets(t *testing.T) {
ioStreams, out := testStream()
rootCmd := NewMCPServer(ioStreams)
rootCmd.SetArgs([]string{"--version", "--port=1337", "--log-level=1"})
- if err := rootCmd.Execute(); !strings.Contains(out.String(), "- Toolsets: core, config, helm") {
+ if err := rootCmd.Execute(); !strings.Contains(out.String(), "- Toolsets: core, config, helm, openshift-core") {
t.Fatalf("Expected toolsets 'full', got %s %v", out, err)
}
})
diff --git a/pkg/mcp/nodes_test.go b/pkg/mcp/nodes_test.go
index 5d788612b..12cc8bb56 100644
--- a/pkg/mcp/nodes_test.go
+++ b/pkg/mcp/nodes_test.go
@@ -1,14 +1,21 @@
package mcp
import (
+ "encoding/json"
+ "io"
"net/http"
"strconv"
+ "strings"
"testing"
"github.com/BurntSushi/toml"
"github.com/containers/kubernetes-mcp-server/internal/test"
"github.com/mark3labs/mcp-go/mcp"
"github.com/stretchr/testify/suite"
+ v1 "k8s.io/api/core/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/serializer"
)
type NodesSuite struct {
@@ -339,3 +346,249 @@ func (s *NodesSuite) TestNodesStatsSummaryDenied() {
func TestNodes(t *testing.T) {
suite.Run(t, new(NodesSuite))
}
+
+// Tests below are for the nodes_debug_exec tool (OpenShift-specific)
+
+type NodesDebugExecSuite struct {
+ BaseMcpSuite
+ mockServer *test.MockServer
+}
+
+func (s *NodesDebugExecSuite) SetupTest() {
+ s.BaseMcpSuite.SetupTest()
+ s.mockServer = test.NewMockServer()
+ s.Cfg.KubeConfig = s.mockServer.KubeconfigFile(s.T())
+}
+
+func (s *NodesDebugExecSuite) TearDownTest() {
+ s.BaseMcpSuite.TearDownTest()
+ if s.mockServer != nil {
+ s.mockServer.Close()
+ }
+}
+
+func (s *NodesDebugExecSuite) TestNodesDebugExecTool() {
+ s.Run("nodes_debug_exec with successful execution", func() {
+
+ var (
+ createdPod v1.Pod
+ deleteCalled bool
+ )
+ const namespace = "debug"
+ const logOutput = "filesystem repaired"
+
+ scheme := runtime.NewScheme()
+ _ = v1.AddToScheme(scheme)
+ codec := serializer.NewCodecFactory(scheme).UniversalDeserializer()
+
+ s.mockServer.Handle(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
+ switch {
+ case req.URL.Path == "/api":
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"kind":"APIVersions","versions":["v1"],"serverAddressByClientCIDRs":[{"clientCIDR":"0.0.0.0/0"}]}`))
+ case req.URL.Path == "/apis":
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"kind":"APIGroupList","apiVersion":"v1","groups":[]}`))
+ case req.URL.Path == "/api/v1":
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"kind":"APIResourceList","apiVersion":"v1","resources":[{"name":"pods","singularName":"","namespaced":true,"kind":"Pod","verbs":["get","list","watch","create","update","patch","delete"]}]}`))
+ case req.Method == http.MethodPatch && strings.HasPrefix(req.URL.Path, "/api/v1/namespaces/"+namespace+"/pods/"):
+ // Handle server-side apply (PATCH with fieldManager query param)
+ body, err := io.ReadAll(req.Body)
+ if err != nil {
+ s.T().Fatalf("failed to read apply body: %v", err)
+ }
+ created := &v1.Pod{}
+ if _, _, err = codec.Decode(body, nil, created); err != nil {
+ s.T().Fatalf("failed to decode apply body: %v", err)
+ }
+ createdPod = *created
+ // Keep the name from the request URL if it was provided
+ pathParts := strings.Split(req.URL.Path, "/")
+ if len(pathParts) > 0 {
+ createdPod.Name = pathParts[len(pathParts)-1]
+ }
+ createdPod.Namespace = namespace
+ w.Header().Set("Content-Type", "application/json")
+ _ = json.NewEncoder(w).Encode(&createdPod)
+ case req.Method == http.MethodPost && req.URL.Path == "/api/v1/namespaces/"+namespace+"/pods":
+ body, err := io.ReadAll(req.Body)
+ if err != nil {
+ s.T().Fatalf("failed to read create body: %v", err)
+ }
+ created := &v1.Pod{}
+ if _, _, err = codec.Decode(body, nil, created); err != nil {
+ s.T().Fatalf("failed to decode create body: %v", err)
+ }
+ createdPod = *created
+ createdPod.ObjectMeta = metav1.ObjectMeta{
+ Namespace: namespace,
+ Name: createdPod.GenerateName + "abc",
+ }
+ w.Header().Set("Content-Type", "application/json")
+ _ = json.NewEncoder(w).Encode(&createdPod)
+ case req.Method == http.MethodGet && createdPod.Name != "" && req.URL.Path == "/api/v1/namespaces/"+namespace+"/pods/"+createdPod.Name:
+ podStatus := createdPod.DeepCopy()
+ podStatus.Status = v1.PodStatus{
+ Phase: v1.PodSucceeded,
+ ContainerStatuses: []v1.ContainerStatus{{
+ Name: "debug",
+ State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{
+ ExitCode: 0,
+ }},
+ }},
+ }
+ w.Header().Set("Content-Type", "application/json")
+ _ = json.NewEncoder(w).Encode(podStatus)
+ case req.Method == http.MethodDelete && createdPod.Name != "" && req.URL.Path == "/api/v1/namespaces/"+namespace+"/pods/"+createdPod.Name:
+ deleteCalled = true
+ w.Header().Set("Content-Type", "application/json")
+ _ = json.NewEncoder(w).Encode(&metav1.Status{Status: "Success"})
+ case req.Method == http.MethodGet && createdPod.Name != "" && req.URL.Path == "/api/v1/namespaces/"+namespace+"/pods/"+createdPod.Name+"/log":
+ w.Header().Set("Content-Type", "text/plain")
+ _, _ = w.Write([]byte(logOutput))
+ }
+ }))
+
+ s.InitMcpClient()
+ toolResult, err := s.CallTool("nodes_debug_exec", map[string]interface{}{
+ "node": "worker-0",
+ "namespace": namespace,
+ "command": []interface{}{"uname", "-a"},
+ })
+
+ s.Run("call succeeds", func() {
+ s.Nilf(err, "call tool should not error: %v", err)
+ s.Falsef(toolResult.IsError, "tool should not return error: %v", toolResult.Content)
+ s.NotEmpty(toolResult.Content, "expected output content")
+ text := toolResult.Content[0].(mcp.TextContent).Text
+ s.Equalf(logOutput, text, "unexpected tool output %q", text)
+ })
+
+ s.Run("debug pod shaped correctly", func() {
+ s.Require().NotNil(createdPod.Spec.Containers, "expected containers in debug pod")
+ s.Require().Len(createdPod.Spec.Containers, 1, "expected single container in debug pod")
+ container := createdPod.Spec.Containers[0]
+ expectedCommand := []string{"uname", "-a"}
+ s.Truef(equalStringSlices(container.Command, expectedCommand),
+ "unexpected debug command: %v", container.Command)
+ s.Require().NotNil(container.SecurityContext, "expected security context")
+ s.Require().NotNil(container.SecurityContext.Privileged, "expected privileged field")
+ s.Truef(*container.SecurityContext.Privileged, "expected privileged container")
+ s.Require().NotEmpty(createdPod.Spec.Volumes, "expected volumes on debug pod")
+ s.Require().NotNil(createdPod.Spec.Volumes[0].HostPath, "expected hostPath volume")
+ s.Truef(deleteCalled, "expected debug pod to be deleted")
+ })
+ })
+}
+
+func equalStringSlices(a, b []string) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for i := range a {
+ if a[i] != b[i] {
+ return false
+ }
+ }
+ return true
+}
+
+func (s *NodesDebugExecSuite) TestNodesDebugExecToolNonZeroExit() {
+ s.Run("nodes_debug_exec with non-zero exit code", func() {
+ const namespace = "default"
+ const errorMessage = "failed"
+
+ scheme := runtime.NewScheme()
+ _ = v1.AddToScheme(scheme)
+ codec := serializer.NewCodecFactory(scheme).UniversalDeserializer()
+
+ s.mockServer.Handle(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
+ switch {
+ case req.URL.Path == "/api":
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"kind":"APIVersions","versions":["v1"],"serverAddressByClientCIDRs":[{"clientCIDR":"0.0.0.0/0"}]}`))
+ case req.URL.Path == "/apis":
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"kind":"APIGroupList","apiVersion":"v1","groups":[]}`))
+ case req.URL.Path == "/api/v1":
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"kind":"APIResourceList","apiVersion":"v1","resources":[{"name":"pods","singularName":"","namespaced":true,"kind":"Pod","verbs":["get","list","watch","create","update","patch","delete"]}]}`))
+ case req.Method == http.MethodPatch && strings.HasPrefix(req.URL.Path, "/api/v1/namespaces/"+namespace+"/pods/"):
+ // Handle server-side apply (PATCH with fieldManager query param)
+ body, err := io.ReadAll(req.Body)
+ if err != nil {
+ s.T().Fatalf("failed to read apply body: %v", err)
+ }
+ pod := &v1.Pod{}
+ if _, _, err = codec.Decode(body, nil, pod); err != nil {
+ s.T().Fatalf("failed to decode apply body: %v", err)
+ }
+ // Keep the name from the request URL if it was provided
+ pathParts := strings.Split(req.URL.Path, "/")
+ if len(pathParts) > 0 {
+ pod.Name = pathParts[len(pathParts)-1]
+ }
+ pod.Namespace = namespace
+ w.Header().Set("Content-Type", "application/json")
+ _ = json.NewEncoder(w).Encode(pod)
+ case req.Method == http.MethodPost && req.URL.Path == "/api/v1/namespaces/"+namespace+"/pods":
+ body, err := io.ReadAll(req.Body)
+ if err != nil {
+ s.T().Fatalf("failed to read create body: %v", err)
+ }
+ pod := &v1.Pod{}
+ if _, _, err = codec.Decode(body, nil, pod); err != nil {
+ s.T().Fatalf("failed to decode create body: %v", err)
+ }
+ pod.ObjectMeta = metav1.ObjectMeta{Name: pod.GenerateName + "xyz", Namespace: namespace}
+ w.Header().Set("Content-Type", "application/json")
+ _ = json.NewEncoder(w).Encode(pod)
+ case strings.HasPrefix(req.URL.Path, "/api/v1/namespaces/"+namespace+"/pods/") && strings.HasSuffix(req.URL.Path, "/log"):
+ w.Header().Set("Content-Type", "text/plain")
+ _, _ = w.Write([]byte(errorMessage))
+ case req.Method == http.MethodGet && strings.HasPrefix(req.URL.Path, "/api/v1/namespaces/"+namespace+"/pods/"):
+ pathParts := strings.Split(req.URL.Path, "/")
+ podName := pathParts[len(pathParts)-1]
+ pod := &v1.Pod{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "v1",
+ Kind: "Pod",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: podName,
+ Namespace: namespace,
+ },
+ }
+ pod.Status = v1.PodStatus{
+ Phase: v1.PodSucceeded,
+ ContainerStatuses: []v1.ContainerStatus{{
+ Name: "debug",
+ State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{
+ ExitCode: 2,
+ Reason: "Error",
+ }},
+ }},
+ }
+ w.Header().Set("Content-Type", "application/json")
+ _ = json.NewEncoder(w).Encode(pod)
+ }
+ }))
+
+ s.InitMcpClient()
+ toolResult, err := s.CallTool("nodes_debug_exec", map[string]interface{}{
+ "node": "infra-1",
+ "command": []interface{}{"journalctl"},
+ })
+
+ s.Nilf(err, "call tool should not error: %v", err)
+ s.Truef(toolResult.IsError, "expected tool to return error")
+ text := toolResult.Content[0].(mcp.TextContent).Text
+ s.Containsf(text, "command exited with code 2", "expected exit code message, got %q", text)
+ s.Containsf(text, "Error", "expected error reason included, got %q", text)
+ })
+}
+
+func TestNodesDebugExec(t *testing.T) {
+ suite.Run(t, new(NodesDebugExecSuite))
+}
diff --git a/pkg/mcp/openshift_modules.go b/pkg/mcp/openshift_modules.go
new file mode 100644
index 000000000..84d982f4a
--- /dev/null
+++ b/pkg/mcp/openshift_modules.go
@@ -0,0 +1,3 @@
+package mcp
+
+import _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/openshift"
diff --git a/pkg/mcp/testdata/toolsets-full-tools-multicluster-enum.json b/pkg/mcp/testdata/toolsets-full-tools-multicluster-enum.json
index ce0ef9a61..ea3412f1a 100644
--- a/pkg/mcp/testdata/toolsets-full-tools-multicluster-enum.json
+++ b/pkg/mcp/testdata/toolsets-full-tools-multicluster-enum.json
@@ -232,6 +232,50 @@
},
"name": "namespaces_list"
},
+ {
+ "annotations": {
+ "title": "Nodes: Debug Exec",
+ "readOnlyHint": false,
+ "destructiveHint": true,
+ "idempotentHint": false,
+ "openWorldHint": true
+ },
+ "description": "Run commands on an OpenShift node using a privileged debug pod with comprehensive troubleshooting utilities. The debug pod uses the UBI9 toolbox image which includes: systemd tools (systemctl, journalctl), networking tools (ss, ip, ping, traceroute, nmap), process tools (ps, top, lsof, strace), file system tools (find, tar, rsync), and debugging tools (gdb). The host filesystem is mounted at /host, allowing commands to chroot /host if needed to access node-level resources. Output is truncated to the most recent 100 lines, so prefer filters like grep when expecting large logs.",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "node": {
+ "description": "Name of the node to debug (e.g. worker-0).",
+ "type": "string"
+ },
+ "command": {
+ "description": "Command to execute on the node. All standard debugging utilities from the UBI9 toolbox are available. The host filesystem is mounted at /host - use 'chroot /host ' to access node-level resources, or run commands directly in the toolbox environment. Provide each argument as a separate array item (e.g. ['chroot', '/host', 'systemctl', 'status', 'kubelet'] or ['journalctl', '-u', 'kubelet', '--since', '1 hour ago']).",
+ "items": {
+ "type": "string"
+ },
+ "type": "array"
+ },
+ "namespace": {
+ "description": "Namespace to create the temporary debug pod in (optional, defaults to the current namespace or 'default').",
+ "type": "string"
+ },
+ "image": {
+ "description": "Container image to use for the debug pod (optional). Defaults to registry.access.redhat.com/ubi9/toolbox:latest which provides comprehensive debugging and troubleshooting utilities.",
+ "type": "string"
+ },
+ "timeout_seconds": {
+ "description": "Maximum time to wait for the command to complete before timing out (optional, defaults to 60 seconds).",
+ "minimum": 1,
+ "type": "integer"
+ }
+ },
+ "required": [
+ "node",
+ "command"
+ ]
+ },
+ "name": "nodes_debug_exec"
+ },
{
"annotations": {
"title": "Node: Log",
@@ -256,7 +300,7 @@
"type": "string"
},
"query": {
- "description": "query specifies services(s) or files from which to return logs (required). Example: \"kubelet\" to fetch kubelet logs, \"/\u003clog-file-name\u003e\" to fetch a specific log file from the node (e.g., \"/var/log/kubelet.log\" or \"/var/log/kube-proxy.log\")",
+ "description": "query specifies services(s) or files from which to return logs (required). Example: \"kubelet\" to fetch kubelet logs, \"/\" to fetch a specific log file from the node (e.g., \"/var/log/kubelet.log\" or \"/var/log/kube-proxy.log\")",
"type": "string"
},
"tailLines": {
diff --git a/pkg/mcp/testdata/toolsets-full-tools-multicluster.json b/pkg/mcp/testdata/toolsets-full-tools-multicluster.json
index 559573122..6ce241eb0 100644
--- a/pkg/mcp/testdata/toolsets-full-tools-multicluster.json
+++ b/pkg/mcp/testdata/toolsets-full-tools-multicluster.json
@@ -208,6 +208,50 @@
},
"name": "namespaces_list"
},
+ {
+ "annotations": {
+ "title": "Nodes: Debug Exec",
+ "readOnlyHint": false,
+ "destructiveHint": true,
+ "idempotentHint": false,
+ "openWorldHint": true
+ },
+ "description": "Run commands on an OpenShift node using a privileged debug pod with comprehensive troubleshooting utilities. The debug pod uses the UBI9 toolbox image which includes: systemd tools (systemctl, journalctl), networking tools (ss, ip, ping, traceroute, nmap), process tools (ps, top, lsof, strace), file system tools (find, tar, rsync), and debugging tools (gdb). The host filesystem is mounted at /host, allowing commands to chroot /host if needed to access node-level resources. Output is truncated to the most recent 100 lines, so prefer filters like grep when expecting large logs.",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "node": {
+ "description": "Name of the node to debug (e.g. worker-0).",
+ "type": "string"
+ },
+ "command": {
+ "description": "Command to execute on the node. All standard debugging utilities from the UBI9 toolbox are available. The host filesystem is mounted at /host - use 'chroot /host ' to access node-level resources, or run commands directly in the toolbox environment. Provide each argument as a separate array item (e.g. ['chroot', '/host', 'systemctl', 'status', 'kubelet'] or ['journalctl', '-u', 'kubelet', '--since', '1 hour ago']).",
+ "items": {
+ "type": "string"
+ },
+ "type": "array"
+ },
+ "namespace": {
+ "description": "Namespace to create the temporary debug pod in (optional, defaults to the current namespace or 'default').",
+ "type": "string"
+ },
+ "image": {
+ "description": "Container image to use for the debug pod (optional). Defaults to registry.access.redhat.com/ubi9/toolbox:latest which provides comprehensive debugging and troubleshooting utilities.",
+ "type": "string"
+ },
+ "timeout_seconds": {
+ "description": "Maximum time to wait for the command to complete before timing out (optional, defaults to 60 seconds).",
+ "minimum": 1,
+ "type": "integer"
+ }
+ },
+ "required": [
+ "node",
+ "command"
+ ]
+ },
+ "name": "nodes_debug_exec"
+ },
{
"annotations": {
"title": "Node: Log",
@@ -228,7 +272,7 @@
"type": "string"
},
"query": {
- "description": "query specifies services(s) or files from which to return logs (required). Example: \"kubelet\" to fetch kubelet logs, \"/\u003clog-file-name\u003e\" to fetch a specific log file from the node (e.g., \"/var/log/kubelet.log\" or \"/var/log/kube-proxy.log\")",
+ "description": "query specifies services(s) or files from which to return logs (required). Example: \"kubelet\" to fetch kubelet logs, \"/\" to fetch a specific log file from the node (e.g., \"/var/log/kubelet.log\" or \"/var/log/kube-proxy.log\")",
"type": "string"
},
"tailLines": {
diff --git a/pkg/mcp/testdata/toolsets-full-tools-openshift.json b/pkg/mcp/testdata/toolsets-full-tools-openshift.json
index 6b5ef3112..ae4a57f24 100644
--- a/pkg/mcp/testdata/toolsets-full-tools-openshift.json
+++ b/pkg/mcp/testdata/toolsets-full-tools-openshift.json
@@ -168,6 +168,50 @@
},
"name": "namespaces_list"
},
+ {
+ "annotations": {
+ "title": "Nodes: Debug Exec",
+ "readOnlyHint": false,
+ "destructiveHint": true,
+ "idempotentHint": false,
+ "openWorldHint": true
+ },
+ "description": "Run commands on an OpenShift node using a privileged debug pod with comprehensive troubleshooting utilities. The debug pod uses the UBI9 toolbox image which includes: systemd tools (systemctl, journalctl), networking tools (ss, ip, ping, traceroute, nmap), process tools (ps, top, lsof, strace), file system tools (find, tar, rsync), and debugging tools (gdb). The host filesystem is mounted at /host, allowing commands to chroot /host if needed to access node-level resources. Output is truncated to the most recent 100 lines, so prefer filters like grep when expecting large logs.",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "node": {
+ "description": "Name of the node to debug (e.g. worker-0).",
+ "type": "string"
+ },
+ "command": {
+ "description": "Command to execute on the node. All standard debugging utilities from the UBI9 toolbox are available. The host filesystem is mounted at /host - use 'chroot /host ' to access node-level resources, or run commands directly in the toolbox environment. Provide each argument as a separate array item (e.g. ['chroot', '/host', 'systemctl', 'status', 'kubelet'] or ['journalctl', '-u', 'kubelet', '--since', '1 hour ago']).",
+ "items": {
+ "type": "string"
+ },
+ "type": "array"
+ },
+ "namespace": {
+ "description": "Namespace to create the temporary debug pod in (optional, defaults to the current namespace or 'default').",
+ "type": "string"
+ },
+ "image": {
+ "description": "Container image to use for the debug pod (optional). Defaults to registry.access.redhat.com/ubi9/toolbox:latest which provides comprehensive debugging and troubleshooting utilities.",
+ "type": "string"
+ },
+ "timeout_seconds": {
+ "description": "Maximum time to wait for the command to complete before timing out (optional, defaults to 60 seconds).",
+ "minimum": 1,
+ "type": "integer"
+ }
+ },
+ "required": [
+ "node",
+ "command"
+ ]
+ },
+ "name": "nodes_debug_exec"
+ },
{
"annotations": {
"title": "Node: Log",
@@ -184,7 +228,7 @@
"type": "string"
},
"query": {
- "description": "query specifies services(s) or files from which to return logs (required). Example: \"kubelet\" to fetch kubelet logs, \"/\u003clog-file-name\u003e\" to fetch a specific log file from the node (e.g., \"/var/log/kubelet.log\" or \"/var/log/kube-proxy.log\")",
+ "description": "query specifies services(s) or files from which to return logs (required). Example: \"kubelet\" to fetch kubelet logs, \"/\" to fetch a specific log file from the node (e.g., \"/var/log/kubelet.log\" or \"/var/log/kube-proxy.log\")",
"type": "string"
},
"tailLines": {
diff --git a/pkg/mcp/testdata/toolsets-full-tools.json b/pkg/mcp/testdata/toolsets-full-tools.json
index 73e53dc78..27948487e 100644
--- a/pkg/mcp/testdata/toolsets-full-tools.json
+++ b/pkg/mcp/testdata/toolsets-full-tools.json
@@ -168,6 +168,50 @@
},
"name": "namespaces_list"
},
+ {
+ "annotations": {
+ "title": "Nodes: Debug Exec",
+ "readOnlyHint": false,
+ "destructiveHint": true,
+ "idempotentHint": false,
+ "openWorldHint": true
+ },
+ "description": "Run commands on an OpenShift node using a privileged debug pod with comprehensive troubleshooting utilities. The debug pod uses the UBI9 toolbox image which includes: systemd tools (systemctl, journalctl), networking tools (ss, ip, ping, traceroute, nmap), process tools (ps, top, lsof, strace), file system tools (find, tar, rsync), and debugging tools (gdb). The host filesystem is mounted at /host, allowing commands to chroot /host if needed to access node-level resources. Output is truncated to the most recent 100 lines, so prefer filters like grep when expecting large logs.",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "node": {
+ "description": "Name of the node to debug (e.g. worker-0).",
+ "type": "string"
+ },
+ "command": {
+ "description": "Command to execute on the node. All standard debugging utilities from the UBI9 toolbox are available. The host filesystem is mounted at /host - use 'chroot /host ' to access node-level resources, or run commands directly in the toolbox environment. Provide each argument as a separate array item (e.g. ['chroot', '/host', 'systemctl', 'status', 'kubelet'] or ['journalctl', '-u', 'kubelet', '--since', '1 hour ago']).",
+ "items": {
+ "type": "string"
+ },
+ "type": "array"
+ },
+ "namespace": {
+ "description": "Namespace to create the temporary debug pod in (optional, defaults to the current namespace or 'default').",
+ "type": "string"
+ },
+ "image": {
+ "description": "Container image to use for the debug pod (optional). Defaults to registry.access.redhat.com/ubi9/toolbox:latest which provides comprehensive debugging and troubleshooting utilities.",
+ "type": "string"
+ },
+ "timeout_seconds": {
+ "description": "Maximum time to wait for the command to complete before timing out (optional, defaults to 60 seconds).",
+ "minimum": 1,
+ "type": "integer"
+ }
+ },
+ "required": [
+ "node",
+ "command"
+ ]
+ },
+ "name": "nodes_debug_exec"
+ },
{
"annotations": {
"title": "Node: Log",
@@ -184,7 +228,7 @@
"type": "string"
},
"query": {
- "description": "query specifies services(s) or files from which to return logs (required). Example: \"kubelet\" to fetch kubelet logs, \"/\u003clog-file-name\u003e\" to fetch a specific log file from the node (e.g., \"/var/log/kubelet.log\" or \"/var/log/kube-proxy.log\")",
+ "description": "query specifies services(s) or files from which to return logs (required). Example: \"kubelet\" to fetch kubelet logs, \"/\" to fetch a specific log file from the node (e.g., \"/var/log/kubelet.log\" or \"/var/log/kube-proxy.log\")",
"type": "string"
},
"tailLines": {
diff --git a/pkg/mcp/testdata/toolsets-openshift-core-tools.json b/pkg/mcp/testdata/toolsets-openshift-core-tools.json
new file mode 100644
index 000000000..65f3203d7
--- /dev/null
+++ b/pkg/mcp/testdata/toolsets-openshift-core-tools.json
@@ -0,0 +1,46 @@
+[
+ {
+ "annotations": {
+ "title": "Nodes: Debug Exec",
+ "readOnlyHint": false,
+ "destructiveHint": true,
+ "idempotentHint": false,
+ "openWorldHint": true
+ },
+ "description": "Run commands on an OpenShift node using a privileged debug pod with comprehensive troubleshooting utilities. The debug pod uses the UBI9 toolbox image which includes: systemd tools (systemctl, journalctl), networking tools (ss, ip, ping, traceroute, nmap), process tools (ps, top, lsof, strace), file system tools (find, tar, rsync), and debugging tools (gdb). Commands execute in a chroot of the host filesystem, providing full access to node-level diagnostics. Output is truncated to the most recent 100 lines, so prefer filters like grep when expecting large logs.",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "node": {
+ "description": "Name of the node to debug (e.g. worker-0).",
+ "type": "string"
+ },
+ "command": {
+ "description": "Command to execute on the node via chroot. All standard debugging utilities are available including systemctl, journalctl, ss, ip, ping, traceroute, nmap, ps, top, lsof, strace, find, tar, rsync, gdb, and more. Provide each argument as a separate array item (e.g. ['systemctl', 'status', 'kubelet'] or ['journalctl', '-u', 'kubelet', '--since', '1 hour ago']).",
+ "items": {
+ "type": "string"
+ },
+ "type": "array"
+ },
+ "namespace": {
+ "description": "Namespace to create the temporary debug pod in (optional, defaults to the current namespace or 'default').",
+ "type": "string"
+ },
+ "image": {
+ "description": "Container image to use for the debug pod (optional). Defaults to registry.access.redhat.com/ubi9/toolbox:latest which provides comprehensive debugging and troubleshooting utilities.",
+ "type": "string"
+ },
+ "timeout_seconds": {
+ "description": "Maximum time to wait for the command to complete before timing out (optional, defaults to 60 seconds).",
+ "minimum": 1,
+ "type": "integer"
+ }
+ },
+ "required": [
+ "node",
+ "command"
+ ]
+ },
+ "name": "nodes_debug_exec"
+ }
+]
diff --git a/pkg/ocp/nodes_debug.go b/pkg/ocp/nodes_debug.go
new file mode 100644
index 000000000..cb079a9ae
--- /dev/null
+++ b/pkg/ocp/nodes_debug.go
@@ -0,0 +1,326 @@
+package ocp
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "strings"
+ "time"
+
+ "github.com/containers/kubernetes-mcp-server/pkg/kubernetes"
+ "github.com/containers/kubernetes-mcp-server/pkg/version"
+
+ corev1 "k8s.io/api/core/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ utilrand "k8s.io/apimachinery/pkg/util/rand"
+ "k8s.io/utils/ptr"
+)
+
+const (
+ // DefaultNodeDebugImage is the UBI9 toolbox image that provides comprehensive debugging and troubleshooting utilities.
+ // This image includes: systemd tools (systemctl, journalctl), networking tools (ss, ip, ping, traceroute, nmap),
+ // process tools (ps, top, lsof, strace), file system tools (find, tar, rsync), debugging tools (gdb),
+ // and many other utilities commonly needed for node-level debugging and diagnostics.
+ DefaultNodeDebugImage = "registry.access.redhat.com/ubi9/toolbox:latest"
+ // NodeDebugContainerName is the name used for the debug container, matching 'oc debug node' defaults.
+ NodeDebugContainerName = "debug"
+ // DefaultNodeDebugTimeout is the maximum time to wait for the debug pod to finish executing.
+ DefaultNodeDebugTimeout = 1 * time.Minute
+)
+
+// NodesDebugExec mimics `oc debug node/ -- ` by creating a privileged pod on the target
+// node, running the provided command, collecting its output, and removing the pod afterwards.
+// The host filesystem is mounted at /host, allowing commands to chroot /host if needed to access node resources.
+//
+// When namespace is empty, the configured namespace (or "default" if none) is used. When image is empty the
+// default debug image is used. Timeout controls how long we wait for the pod to complete.
+func NodesDebugExec(
+ ctx context.Context,
+ k OpenshiftClient,
+ namespace string,
+ nodeName string,
+ image string,
+ command []string,
+ timeout time.Duration,
+) (string, error) {
+ if nodeName == "" {
+ return "", errors.New("node name is required")
+ }
+ if len(command) == 0 {
+ return "", errors.New("command is required")
+ }
+
+ ns := k.NamespaceOrDefault(namespace)
+ if ns == "" {
+ ns = "default"
+ }
+ debugImage := image
+ if debugImage == "" {
+ debugImage = DefaultNodeDebugImage
+ }
+ if timeout <= 0 {
+ timeout = DefaultNodeDebugTimeout
+ }
+
+ // Create the debug pod
+ created, err := createDebugPod(ctx, k, nodeName, ns, debugImage, command)
+ if err != nil {
+ return "", err
+ }
+
+ // Ensure the pod is deleted regardless of completion state.
+ defer func() {
+ deleteCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+ defer cancel()
+ pods, err := k.AccessControlClientset().Pods(ns)
+ if err == nil {
+ _ = pods.Delete(deleteCtx, created.Name, metav1.DeleteOptions{})
+ }
+ }()
+
+ // Poll for debug pod completion
+ terminated, lastPod, waitMsg, err := pollForCompletion(ctx, k, ns, created.Name, timeout)
+ if err != nil {
+ return "", err
+ }
+
+ // Retrieve the logs
+ logs, err := retrieveLogs(ctx, k, ns, created.Name)
+ if err != nil {
+ return "", err
+ }
+
+ // Process the results
+ return processResults(terminated, lastPod, waitMsg, logs)
+}
+
+// createDebugPod creates a privileged pod on the target node to run debug commands.
+func createDebugPod(
+ ctx context.Context,
+ k OpenshiftClient,
+ nodeName string,
+ namespace string,
+ image string,
+ command []string,
+) (*corev1.Pod, error) {
+ sanitizedNode := sanitizeForName(nodeName)
+ hostPathType := corev1.HostPathDirectory
+
+ // Generate a unique name
+ suffix := utilrand.String(5)
+ maxNodeLen := 63 - len("node-debug-") - 1 - len(suffix)
+ if maxNodeLen < 1 {
+ maxNodeLen = 1
+ }
+ if len(sanitizedNode) > maxNodeLen {
+ sanitizedNode = sanitizedNode[:maxNodeLen]
+ }
+ podName := fmt.Sprintf("node-debug-%s-%s", sanitizedNode, suffix)
+
+ debugPod := &corev1.Pod{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: podName,
+ Namespace: namespace,
+ Labels: map[string]string{
+ kubernetes.AppKubernetesManagedBy: version.BinaryName,
+ kubernetes.AppKubernetesComponent: "node-debug",
+ kubernetes.AppKubernetesName: fmt.Sprintf("node-debug-%s", sanitizedNode),
+ },
+ },
+ Spec: corev1.PodSpec{
+ AutomountServiceAccountToken: ptr.To(false),
+ HostNetwork: true,
+ HostPID: true,
+ HostIPC: true,
+ NodeName: nodeName,
+ RestartPolicy: corev1.RestartPolicyNever,
+ SecurityContext: &corev1.PodSecurityContext{
+ RunAsUser: ptr.To[int64](0),
+ },
+ Tolerations: []corev1.Toleration{
+ {Operator: corev1.TolerationOpExists},
+ {Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoSchedule},
+ {Operator: corev1.TolerationOpExists, Effect: corev1.TaintEffectNoExecute},
+ },
+ Volumes: []corev1.Volume{
+ {
+ Name: "host-root",
+ VolumeSource: corev1.VolumeSource{
+ HostPath: &corev1.HostPathVolumeSource{
+ Path: "/",
+ Type: &hostPathType,
+ },
+ },
+ },
+ },
+ Containers: []corev1.Container{
+ {
+ Name: NodeDebugContainerName,
+ Image: image,
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Command: command,
+ SecurityContext: &corev1.SecurityContext{
+ Privileged: ptr.To(true),
+ RunAsUser: ptr.To[int64](0),
+ },
+ VolumeMounts: []corev1.VolumeMount{
+ {Name: "host-root", MountPath: "/host"},
+ },
+ },
+ },
+ },
+ }
+
+ // Create the pod using AccessControlClientset
+ pods, err := k.AccessControlClientset().Pods(namespace)
+ if err != nil {
+ return nil, fmt.Errorf("failed to get pods interface: %w", err)
+ }
+
+ created, err := pods.Create(ctx, debugPod, metav1.CreateOptions{})
+ if err != nil {
+ return nil, fmt.Errorf("failed to create debug pod: %w", err)
+ }
+
+ return created, nil
+}
+
+// pollForCompletion polls the debug pod until it completes or times out.
+func pollForCompletion(
+ ctx context.Context,
+ k OpenshiftClient,
+ namespace string,
+ podName string,
+ timeout time.Duration,
+) (*corev1.ContainerStateTerminated, *corev1.Pod, string, error) {
+ pollCtx, cancel := context.WithTimeout(ctx, timeout)
+ defer cancel()
+
+ ticker := time.NewTicker(2 * time.Second)
+ defer ticker.Stop()
+
+ var (
+ lastPod *corev1.Pod
+ terminated *corev1.ContainerStateTerminated
+ waitMsg string
+ )
+
+ for {
+ // Get pod status using AccessControlClientset
+ pods, getErr := k.AccessControlClientset().Pods(namespace)
+ if getErr != nil {
+ return nil, nil, "", fmt.Errorf("failed to get pods interface: %w", getErr)
+ }
+
+ current, err := pods.Get(pollCtx, podName, metav1.GetOptions{})
+ if err != nil {
+ return nil, nil, "", fmt.Errorf("failed to get debug pod status: %w", err)
+ }
+ lastPod = current
+
+ if status := containerStatusByName(current.Status.ContainerStatuses, NodeDebugContainerName); status != nil {
+ if status.State.Waiting != nil {
+ waitMsg = fmt.Sprintf("container waiting: %s", status.State.Waiting.Reason)
+ // Image pull issues should fail fast.
+ if status.State.Waiting.Reason == "ErrImagePull" || status.State.Waiting.Reason == "ImagePullBackOff" {
+ return nil, nil, "", fmt.Errorf("debug container failed to start (%s): %s", status.State.Waiting.Reason, status.State.Waiting.Message)
+ }
+ }
+ if status.State.Terminated != nil {
+ terminated = status.State.Terminated
+ break
+ }
+ }
+
+ if current.Status.Phase == corev1.PodFailed {
+ break
+ }
+
+ // Wait for the next tick interval before checking pod status again, or timeout if context is done.
+ select {
+ case <-pollCtx.Done():
+ return nil, nil, "", fmt.Errorf("timed out waiting for debug pod %s to complete: %w", podName, pollCtx.Err())
+ case <-ticker.C:
+ }
+ }
+
+ return terminated, lastPod, waitMsg, nil
+}
+
+// retrieveLogs retrieves the logs from the debug pod.
+func retrieveLogs(ctx context.Context, k OpenshiftClient, namespace, podName string) (string, error) {
+ logCtx, logCancel := context.WithTimeout(ctx, 30*time.Second)
+ defer logCancel()
+ logs, logErr := k.PodsLog(logCtx, namespace, podName, NodeDebugContainerName, false, 0)
+ if logErr != nil {
+ return "", fmt.Errorf("failed to retrieve debug pod logs: %w", logErr)
+ }
+ return strings.TrimSpace(logs), nil
+}
+
+// processResults processes the debug pod completion status and returns the appropriate result.
+func processResults(terminated *corev1.ContainerStateTerminated, lastPod *corev1.Pod, waitMsg, logs string) (string, error) {
+ if terminated != nil {
+ if terminated.ExitCode != 0 {
+ errMsg := fmt.Sprintf("command exited with code %d", terminated.ExitCode)
+ if terminated.Reason != "" {
+ errMsg = fmt.Sprintf("%s (%s)", errMsg, terminated.Reason)
+ }
+ if terminated.Message != "" {
+ errMsg = fmt.Sprintf("%s: %s", errMsg, terminated.Message)
+ }
+ if logs != "" {
+ errMsg = fmt.Sprintf("%s\nOutput:\n%s", errMsg, logs)
+ }
+ return "", errors.New(errMsg)
+ }
+ return logs, nil
+ }
+
+ if lastPod != nil && lastPod.Status.Reason != "" {
+ if logs != "" {
+ return "", fmt.Errorf("debug pod failed: %s\nOutput:\n%s", lastPod.Status.Reason, logs)
+ }
+ return "", fmt.Errorf("debug pod failed: %s", lastPod.Status.Reason)
+ }
+ if waitMsg != "" {
+ if logs != "" {
+ return "", fmt.Errorf("debug container did not complete: %s\nOutput:\n%s", waitMsg, logs)
+ }
+ return "", fmt.Errorf("debug container did not complete: %s", waitMsg)
+ }
+ if logs != "" {
+ return "", fmt.Errorf("debug container did not reach a terminal state\nOutput:\n%s", logs)
+ }
+ return "", errors.New("debug container did not reach a terminal state")
+}
+
+func sanitizeForName(name string) string {
+ lower := strings.ToLower(name)
+ var b strings.Builder
+ b.Grow(len(lower))
+ for _, r := range lower {
+ if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' {
+ b.WriteRune(r)
+ continue
+ }
+ b.WriteRune('-')
+ }
+ sanitized := strings.Trim(b.String(), "-")
+ if sanitized == "" {
+ sanitized = "node"
+ }
+ if len(sanitized) > 40 {
+ sanitized = sanitized[:40]
+ }
+ return sanitized
+}
+
+func containerStatusByName(statuses []corev1.ContainerStatus, name string) *corev1.ContainerStatus {
+ for idx := range statuses {
+ if statuses[idx].Name == name {
+ return &statuses[idx]
+ }
+ }
+ return nil
+}
diff --git a/pkg/ocp/nodes_debug_test.go b/pkg/ocp/nodes_debug_test.go
new file mode 100644
index 000000000..1c86c92f8
--- /dev/null
+++ b/pkg/ocp/nodes_debug_test.go
@@ -0,0 +1,429 @@
+package ocp
+
+import (
+ "context"
+ "fmt"
+ "strings"
+ "testing"
+ "time"
+
+ corev1 "k8s.io/api/core/v1"
+)
+
+func TestNodesDebugExecCreatesPrivilegedPod(t *testing.T) {
+ env := NewNodeDebugTestEnv(t)
+ env.Pods.Logs = "kernel 6.8"
+
+ out, err := NodesDebugExec(context.Background(), env.Kubernetes, "", "worker-0", "", []string{"uname", "-a"}, 2*time.Minute)
+ if err != nil {
+ t.Fatalf("NodesDebugExec returned error: %v", err)
+ }
+ if out != "kernel 6.8" {
+ t.Fatalf("unexpected command output: %q", out)
+ }
+
+ created := env.Pods.Created
+ if created == nil {
+ t.Fatalf("expected debug pod to be created")
+ }
+ if created.Namespace != "default" {
+ t.Fatalf("expected default namespace fallback, got %q", created.Namespace)
+ }
+ if created.Spec.NodeName != "worker-0" {
+ t.Fatalf("expected pod to target node worker-0, got %q", created.Spec.NodeName)
+ }
+ if !env.Pods.Deleted {
+ t.Fatalf("expected debug pod to be deleted after execution")
+ }
+
+ if len(created.Spec.Containers) != 1 {
+ t.Fatalf("expected single container in debug pod")
+ }
+ container := created.Spec.Containers[0]
+ if container.Image != DefaultNodeDebugImage {
+ t.Fatalf("expected default image %q, got %q", DefaultNodeDebugImage, container.Image)
+ }
+ expectedCommand := []string{"uname", "-a"}
+ if len(container.Command) != len(expectedCommand) {
+ t.Fatalf("unexpected command length, got %v", container.Command)
+ }
+ for i, part := range expectedCommand {
+ if container.Command[i] != part {
+ t.Fatalf("command[%d] = %q, expected %q", i, container.Command[i], part)
+ }
+ }
+ if container.SecurityContext == nil || container.SecurityContext.Privileged == nil || !*container.SecurityContext.Privileged {
+ t.Fatalf("expected container to run privileged")
+ }
+ if len(container.VolumeMounts) != 1 || container.VolumeMounts[0].MountPath != "/host" {
+ t.Fatalf("expected container to mount host root at /host")
+ }
+
+ if created.Spec.SecurityContext == nil || created.Spec.SecurityContext.RunAsUser == nil || *created.Spec.SecurityContext.RunAsUser != 0 {
+ t.Fatalf("expected pod security context to run as root")
+ }
+
+ if len(created.Spec.Volumes) != 1 || created.Spec.Volumes[0].HostPath == nil {
+ t.Fatalf("expected host root volume to be configured")
+ }
+}
+
+func TestNodesDebugExecReturnsErrorForNonZeroExit(t *testing.T) {
+ env := NewNodeDebugTestEnv(t)
+ env.Pods.ExitCode = 5
+ env.Pods.TerminatedReason = "Error"
+ env.Pods.TerminatedMessage = "some failure"
+ env.Pods.Logs = "bad things happened"
+
+ out, err := NodesDebugExec(context.Background(), env.Kubernetes, "debug-ns", "infra-node", "registry.example/custom:latest", []string{"journalctl", "-xe"}, time.Minute)
+ if err == nil {
+ t.Fatalf("expected error for non-zero exit code")
+ }
+ // Logs should be included in the error message
+ if !strings.Contains(err.Error(), "bad things happened") {
+ t.Fatalf("expected error to contain logs, got: %v", err)
+ }
+ if !strings.Contains(err.Error(), "command exited with code 5") {
+ t.Fatalf("expected error to contain exit code, got: %v", err)
+ }
+ if out != "" {
+ t.Fatalf("expected empty output on error, got %q", out)
+ }
+
+ created := env.Pods.Created
+ if created == nil {
+ t.Fatalf("expected pod to be created")
+ }
+ if created.Namespace != "debug-ns" {
+ t.Fatalf("expected provided namespace to be used, got %q", created.Namespace)
+ }
+ if containerImage := created.Spec.Containers[0].Image; containerImage != "registry.example/custom:latest" {
+ t.Fatalf("expected custom image to be used, got %q", containerImage)
+ }
+}
+
+func TestCreateDebugPod(t *testing.T) {
+ env := NewNodeDebugTestEnv(t)
+
+ created, err := createDebugPod(context.Background(), env.Kubernetes, "worker-1", "test-ns", "custom:v1", []string{"ls", "-la"})
+ if err != nil {
+ t.Fatalf("createDebugPod failed: %v", err)
+ }
+ if created == nil {
+ t.Fatalf("expected pod to be created")
+ }
+ if created.Namespace != "test-ns" {
+ t.Fatalf("expected namespace test-ns, got %q", created.Namespace)
+ }
+ if created.Spec.NodeName != "worker-1" {
+ t.Fatalf("expected node worker-1, got %q", created.Spec.NodeName)
+ }
+ if !strings.HasPrefix(created.Name, "node-debug-worker-1-") {
+ t.Fatalf("unexpected pod name: %q", created.Name)
+ }
+ if len(created.Name) > 63 {
+ t.Fatalf("pod name exceeds DNS label length: %d characters", len(created.Name))
+ }
+ if len(created.Spec.Containers) != 1 {
+ t.Fatalf("expected 1 container, got %d", len(created.Spec.Containers))
+ }
+ container := created.Spec.Containers[0]
+ if container.Image != "custom:v1" {
+ t.Fatalf("expected image custom:v1, got %q", container.Image)
+ }
+ expectedCmd := []string{"ls", "-la"}
+ if len(container.Command) != len(expectedCmd) {
+ t.Fatalf("expected %d command parts, got %d", len(expectedCmd), len(container.Command))
+ }
+ for i, part := range expectedCmd {
+ if container.Command[i] != part {
+ t.Fatalf("command[%d] = %q, expected %q", i, container.Command[i], part)
+ }
+ }
+ if container.SecurityContext == nil || !*container.SecurityContext.Privileged {
+ t.Fatalf("expected privileged container")
+ }
+}
+
+func TestPollForCompletion(t *testing.T) {
+ tests := []struct {
+ name string
+ exitCode int32
+ terminatedReason string
+ waitingReason string
+ waitingMessage string
+ expectError bool
+ expectTerminated bool
+ errorContains []string
+ expectedExitCode int32
+ expectedReason string
+ }{
+ {
+ name: "successful completion",
+ exitCode: 0,
+ expectTerminated: true,
+ expectedExitCode: 0,
+ },
+ {
+ name: "non-zero exit code",
+ exitCode: 42,
+ terminatedReason: "Error",
+ expectTerminated: true,
+ expectedExitCode: 42,
+ expectedReason: "Error",
+ },
+ {
+ name: "image pull error",
+ waitingReason: "ErrImagePull",
+ waitingMessage: "image not found",
+ expectError: true,
+ errorContains: []string{"ErrImagePull", "image not found"},
+ },
+ {
+ name: "image pull backoff",
+ waitingReason: "ImagePullBackOff",
+ waitingMessage: "back-off pulling image",
+ expectError: true,
+ errorContains: []string{"ImagePullBackOff", "back-off pulling image"},
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ env := NewNodeDebugTestEnv(t)
+ env.Pods.ExitCode = tt.exitCode
+ env.Pods.TerminatedReason = tt.terminatedReason
+ env.Pods.WaitingReason = tt.waitingReason
+ env.Pods.WaitingMessage = tt.waitingMessage
+
+ created, _ := createDebugPod(context.Background(), env.Kubernetes, "node-1", "default", DefaultNodeDebugImage, []string{"echo", "test"})
+
+ terminated, lastPod, waitMsg, err := pollForCompletion(context.Background(), env.Kubernetes, "default", created.Name, time.Minute)
+
+ if tt.expectError {
+ if err == nil {
+ t.Fatalf("expected error but got none")
+ }
+ for _, substr := range tt.errorContains {
+ if !strings.Contains(err.Error(), substr) {
+ t.Fatalf("expected error to contain %q, got: %v", substr, err)
+ }
+ }
+ return
+ }
+
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+
+ if tt.expectTerminated {
+ if terminated == nil {
+ t.Fatalf("expected terminated state")
+ }
+ if terminated.ExitCode != tt.expectedExitCode {
+ t.Fatalf("expected exit code %d, got %d", tt.expectedExitCode, terminated.ExitCode)
+ }
+ if tt.expectedReason != "" && terminated.Reason != tt.expectedReason {
+ t.Fatalf("expected reason %q, got %q", tt.expectedReason, terminated.Reason)
+ }
+ if lastPod == nil {
+ t.Fatalf("expected lastPod to be set")
+ }
+ }
+
+ if tt.waitingReason == "" && waitMsg != "" {
+ t.Fatalf("expected no wait message, got %q", waitMsg)
+ }
+ })
+ }
+}
+
+func TestRetrieveLogs(t *testing.T) {
+ env := NewNodeDebugTestEnv(t)
+ env.Pods.Logs = " some output with whitespace \n"
+
+ created, _ := createDebugPod(context.Background(), env.Kubernetes, "node-1", "default", DefaultNodeDebugImage, []string{"echo", "test"})
+
+ logs, err := retrieveLogs(context.Background(), env.Kubernetes, "default", created.Name)
+ if err != nil {
+ t.Fatalf("retrieveLogs failed: %v", err)
+ }
+ if logs != "some output with whitespace" {
+ t.Fatalf("expected trimmed logs, got %q", logs)
+ }
+}
+
+func TestProcessResults(t *testing.T) {
+ tests := []struct {
+ name string
+ terminated *corev1.ContainerStateTerminated
+ pod *corev1.Pod
+ waitMsg string
+ logs string
+ expectError bool
+ errorContains []string
+ expectLogs bool
+ expectedResult string
+ }{
+ {
+ name: "successful completion",
+ terminated: &corev1.ContainerStateTerminated{
+ ExitCode: 0,
+ },
+ logs: "success output",
+ expectError: false,
+ expectLogs: true,
+ expectedResult: "success output",
+ },
+ {
+ name: "non-zero exit code with logs",
+ terminated: &corev1.ContainerStateTerminated{
+ ExitCode: 127,
+ Reason: "CommandNotFound",
+ Message: "command not found",
+ },
+ logs: "error logs",
+ expectError: true,
+ errorContains: []string{"127", "CommandNotFound", "command not found", "error logs", "Output:"},
+ expectLogs: false,
+ expectedResult: "",
+ },
+ {
+ name: "non-zero exit code without reason or message but with logs",
+ terminated: &corev1.ContainerStateTerminated{
+ ExitCode: 1,
+ },
+ logs: "failed output",
+ expectError: true,
+ errorContains: []string{"command exited with code 1", "failed output", "Output:"},
+ expectLogs: false,
+ expectedResult: "",
+ },
+ {
+ name: "non-zero exit code without logs",
+ terminated: &corev1.ContainerStateTerminated{
+ ExitCode: 1,
+ },
+ logs: "",
+ expectError: true,
+ errorContains: []string{"command exited with code 1"},
+ expectLogs: false,
+ expectedResult: "",
+ },
+ {
+ name: "pod failed with logs",
+ pod: &corev1.Pod{
+ Status: corev1.PodStatus{
+ Reason: "Evicted",
+ },
+ },
+ logs: "pod evicted logs",
+ expectError: true,
+ errorContains: []string{"Evicted", "pod evicted logs", "Output:"},
+ expectLogs: false,
+ expectedResult: "",
+ },
+ {
+ name: "pod failed without logs",
+ pod: &corev1.Pod{
+ Status: corev1.PodStatus{
+ Reason: "Evicted",
+ },
+ },
+ logs: "",
+ expectError: true,
+ errorContains: []string{"Evicted"},
+ expectLogs: false,
+ expectedResult: "",
+ },
+ {
+ name: "container waiting with logs",
+ waitMsg: "container waiting: ImagePullBackOff",
+ logs: "waiting logs",
+ expectError: true,
+ errorContains: []string{"did not complete", "waiting logs", "Output:"},
+ expectLogs: false,
+ expectedResult: "",
+ },
+ {
+ name: "container waiting without logs",
+ waitMsg: "container waiting: ImagePullBackOff",
+ logs: "",
+ expectError: true,
+ errorContains: []string{"did not complete"},
+ expectLogs: false,
+ expectedResult: "",
+ },
+ {
+ name: "no terminal state with logs",
+ logs: "incomplete logs",
+ expectError: true,
+ errorContains: []string{"did not reach a terminal state", "incomplete logs", "Output:"},
+ expectLogs: false,
+ expectedResult: "",
+ },
+ {
+ name: "no terminal state without logs",
+ logs: "",
+ expectError: true,
+ errorContains: []string{"did not reach a terminal state"},
+ expectLogs: false,
+ expectedResult: "",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := processResults(tt.terminated, tt.pod, tt.waitMsg, tt.logs)
+
+ if tt.expectError {
+ if err == nil {
+ t.Fatalf("expected error but got none")
+ }
+ for _, substr := range tt.errorContains {
+ if !strings.Contains(err.Error(), substr) {
+ t.Fatalf("expected error to contain %q, got: %v", substr, err)
+ }
+ }
+ // Verify logs are NOT in the result when there's an error
+ if result != "" {
+ t.Fatalf("expected empty result on error, got %q", result)
+ }
+ } else {
+ if err != nil {
+ t.Fatalf("expected no error, got: %v", err)
+ }
+ // Verify logs ARE in the result when successful
+ if result != tt.expectedResult {
+ t.Fatalf("expected result %q, got %q", tt.expectedResult, result)
+ }
+ }
+ })
+ }
+}
+
+func TestSanitizeForName(t *testing.T) {
+ tests := []struct {
+ input string
+ expected string
+ }{
+ {"worker-0", "worker-0"},
+ {"WORKER-0", "worker-0"},
+ {"worker.0", "worker-0"},
+ {"worker_0", "worker-0"},
+ {"ip-10-0-1-42.ec2.internal", "ip-10-0-1-42-ec2-internal"},
+ {"", "node"},
+ {"---", "node"},
+ {strings.Repeat("a", 50), strings.Repeat("a", 40)},
+ {"Worker-Node_123.domain", "worker-node-123-domain"},
+ }
+
+ for _, tt := range tests {
+ t.Run(fmt.Sprintf("sanitize(%q)", tt.input), func(t *testing.T) {
+ result := sanitizeForName(tt.input)
+ if result != tt.expected {
+ t.Fatalf("expected %q, got %q", tt.expected, result)
+ }
+ })
+ }
+}
diff --git a/pkg/ocp/ocp_client.go b/pkg/ocp/ocp_client.go
new file mode 100644
index 000000000..5c2766b4a
--- /dev/null
+++ b/pkg/ocp/ocp_client.go
@@ -0,0 +1,44 @@
+package ocp
+
+import (
+ "context"
+
+ "github.com/containers/kubernetes-mcp-server/pkg/kubernetes"
+
+ corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
+)
+
+// ClientsetInterface defines the interface for access-controlled clientset operations.
+// This allows code to work with kubernetes.AccessControlClientset through an interface,
+// making it easier to test and decouple from the concrete implementation.
+type ClientsetInterface interface {
+ Pods(namespace string) (corev1client.PodInterface, error)
+}
+
+// OpenshiftClient defines a minimal interface for kubernetes operations commonly needed
+// by OCP toolsets. This allows for easier testing and decoupling from the concrete
+// kubernetes.Kubernetes type.
+type OpenshiftClient interface {
+ NamespaceOrDefault(namespace string) string
+ AccessControlClientset() ClientsetInterface
+ PodsLog(ctx context.Context, namespace, name, container string, previous bool, tail int64) (string, error)
+}
+
+// OpenshiftClientAdapter adapts kubernetes.Kubernetes to implement OpenshiftClient.
+// This allows production code to use the concrete *kubernetes.Kubernetes type
+// while tests can use a mock implementation.
+type OpenshiftClientAdapter struct {
+ *kubernetes.Kubernetes
+}
+
+// NewOpenshiftClient creates a new adapter that wraps kubernetes.Kubernetes
+// to implement the OpenshiftClient interface.
+func NewOpenshiftClient(k *kubernetes.Kubernetes) *OpenshiftClientAdapter {
+ return &OpenshiftClientAdapter{Kubernetes: k}
+}
+
+// AccessControlClientset returns the access control clientset as an interface.
+// This satisfies the OpenshiftClient interface.
+func (c *OpenshiftClientAdapter) AccessControlClientset() ClientsetInterface {
+ return c.Kubernetes.AccessControlClientset()
+}
diff --git a/pkg/ocp/testhelpers.go b/pkg/ocp/testhelpers.go
new file mode 100644
index 000000000..e35d1d9fc
--- /dev/null
+++ b/pkg/ocp/testhelpers.go
@@ -0,0 +1,163 @@
+package ocp
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "net/http"
+ "net/url"
+ "strings"
+ "testing"
+
+ corev1 "k8s.io/api/core/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/schema"
+ schemek8s "k8s.io/client-go/kubernetes/scheme"
+ corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
+ restclient "k8s.io/client-go/rest"
+)
+
+// NodeDebugTestEnv bundles a test Kubernetes client with a controllable pods client for tests.
+type NodeDebugTestEnv struct {
+ Kubernetes *FakeKubernetesClient
+ Pods *FakePodInterface
+}
+
+// NewNodeDebugTestEnv constructs a testing harness for exercising NodesDebugExec.
+func NewNodeDebugTestEnv(t *testing.T) *NodeDebugTestEnv {
+ t.Helper()
+
+ podsClient := &FakePodInterface{}
+ fakeK8s := &FakeKubernetesClient{
+ pods: podsClient,
+ namespace: "default",
+ }
+
+ return &NodeDebugTestEnv{
+ Kubernetes: fakeK8s,
+ Pods: podsClient,
+ }
+}
+
+// FakeKubernetesClient implements the OpenshiftClient interface for testing
+type FakeKubernetesClient struct {
+ pods *FakePodInterface
+ namespace string
+}
+
+// AccessControlClientset returns a fake clientset for testing
+func (f *FakeKubernetesClient) AccessControlClientset() ClientsetInterface {
+ return &FakeAccessControlClientset{pods: f.pods}
+}
+
+func (f *FakeKubernetesClient) NamespaceOrDefault(namespace string) string {
+ if namespace == "" {
+ return f.namespace
+ }
+ return namespace
+}
+
+func (f *FakeKubernetesClient) PodsLog(ctx context.Context, namespace, name, container string, previous bool, tail int64) (string, error) {
+ req := f.pods.GetLogs(name, &corev1.PodLogOptions{Container: container, Previous: previous})
+ res := req.Do(ctx)
+ if res.Error() != nil {
+ return "", res.Error()
+ }
+ rawData, err := res.Raw()
+ if err != nil {
+ return "", err
+ }
+ return string(rawData), nil
+}
+
+// FakeAccessControlClientset mimics kubernetes.AccessControlClientset for testing
+type FakeAccessControlClientset struct {
+ pods *FakePodInterface
+}
+
+func (f *FakeAccessControlClientset) Pods(namespace string) (corev1client.PodInterface, error) {
+ return f.pods, nil
+}
+
+// FakePodInterface implements corev1client.PodInterface with deterministic behaviour for tests.
+type FakePodInterface struct {
+ corev1client.PodInterface
+ Created *corev1.Pod
+ Deleted bool
+ ExitCode int32
+ TerminatedReason string
+ TerminatedMessage string
+ WaitingReason string
+ WaitingMessage string
+ Logs string
+}
+
+func (f *FakePodInterface) Create(ctx context.Context, pod *corev1.Pod, opts metav1.CreateOptions) (*corev1.Pod, error) {
+ copy := pod.DeepCopy()
+ if copy.Name == "" && copy.GenerateName != "" {
+ copy.Name = copy.GenerateName + "test"
+ }
+ f.Created = copy
+ return copy.DeepCopy(), nil
+}
+
+func (f *FakePodInterface) Get(ctx context.Context, name string, opts metav1.GetOptions) (*corev1.Pod, error) {
+ if f.Created == nil {
+ return nil, fmt.Errorf("pod not created yet")
+ }
+ pod := f.Created.DeepCopy()
+
+ // If waiting state is set, return that instead of terminated
+ if f.WaitingReason != "" {
+ waiting := &corev1.ContainerStateWaiting{Reason: f.WaitingReason}
+ if f.WaitingMessage != "" {
+ waiting.Message = f.WaitingMessage
+ }
+ pod.Status.ContainerStatuses = []corev1.ContainerStatus{{
+ Name: NodeDebugContainerName,
+ State: corev1.ContainerState{Waiting: waiting},
+ }}
+ pod.Status.Phase = corev1.PodPending
+ return pod, nil
+ }
+
+ // Otherwise return terminated state
+ terminated := &corev1.ContainerStateTerminated{ExitCode: f.ExitCode}
+ if f.TerminatedReason != "" {
+ terminated.Reason = f.TerminatedReason
+ }
+ if f.TerminatedMessage != "" {
+ terminated.Message = f.TerminatedMessage
+ }
+ pod.Status.ContainerStatuses = []corev1.ContainerStatus{{
+ Name: NodeDebugContainerName,
+ State: corev1.ContainerState{Terminated: terminated},
+ }}
+ pod.Status.Phase = corev1.PodSucceeded
+ return pod, nil
+}
+
+func (f *FakePodInterface) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error {
+ f.Deleted = true
+ return nil
+}
+
+func (f *FakePodInterface) GetLogs(name string, opts *corev1.PodLogOptions) *restclient.Request {
+ body := io.NopCloser(strings.NewReader(f.Logs))
+ client := &http.Client{Transport: roundTripperFunc(func(*http.Request) (*http.Response, error) {
+ return &http.Response{StatusCode: http.StatusOK, Body: body}, nil
+ })}
+ content := restclient.ClientContentConfig{
+ ContentType: runtime.ContentTypeJSON,
+ GroupVersion: schema.GroupVersion{Version: "v1"},
+ Negotiator: runtime.NewClientNegotiator(schemek8s.Codecs.WithoutConversion(), schema.GroupVersion{Version: "v1"}),
+ }
+ return restclient.NewRequestWithClient(&url.URL{Scheme: "https", Host: "localhost"}, "", content, client).Verb("GET")
+}
+
+type roundTripperFunc func(*http.Request) (*http.Response, error)
+
+func (f roundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) {
+ return f(req)
+}
diff --git a/pkg/toolsets/openshift/nodes.go b/pkg/toolsets/openshift/nodes.go
new file mode 100644
index 000000000..481cd53bb
--- /dev/null
+++ b/pkg/toolsets/openshift/nodes.go
@@ -0,0 +1,126 @@
+package openshift
+
+import (
+ "errors"
+ "fmt"
+ "time"
+
+ "github.com/google/jsonschema-go/jsonschema"
+ "k8s.io/utils/ptr"
+
+ "github.com/containers/kubernetes-mcp-server/pkg/api"
+ "github.com/containers/kubernetes-mcp-server/pkg/ocp"
+)
+
+func initNodes() []api.ServerTool {
+ return []api.ServerTool{
+ {
+ Tool: api.Tool{
+ Name: "nodes_debug_exec",
+ Description: "Run commands on an OpenShift node using a privileged debug pod with comprehensive troubleshooting utilities. The debug pod uses the UBI9 toolbox image which includes: systemd tools (systemctl, journalctl), networking tools (ss, ip, ping, traceroute, nmap), process tools (ps, top, lsof, strace), file system tools (find, tar, rsync), and debugging tools (gdb). The host filesystem is mounted at /host, allowing commands to chroot /host if needed to access node-level resources. Output is truncated to the most recent 100 lines, so prefer filters like grep when expecting large logs.",
+ InputSchema: &jsonschema.Schema{
+ Type: "object",
+ Properties: map[string]*jsonschema.Schema{
+ "node": {
+ Type: "string",
+ Description: "Name of the node to debug (e.g. worker-0).",
+ },
+ "command": {
+ Type: "array",
+ Description: "Command to execute on the node. All standard debugging utilities from the UBI9 toolbox are available. The host filesystem is mounted at /host - use 'chroot /host ' to access node-level resources, or run commands directly in the toolbox environment. Provide each argument as a separate array item (e.g. ['chroot', '/host', 'systemctl', 'status', 'kubelet'] or ['journalctl', '-u', 'kubelet', '--since', '1 hour ago']).",
+ Items: &jsonschema.Schema{Type: "string"},
+ },
+ "namespace": {
+ Type: "string",
+ Description: "Namespace to create the temporary debug pod in (optional, defaults to the current namespace or 'default').",
+ },
+ "image": {
+ Type: "string",
+ Description: "Container image to use for the debug pod (optional). Defaults to registry.access.redhat.com/ubi9/toolbox:latest which provides comprehensive debugging and troubleshooting utilities.",
+ },
+ "timeout_seconds": {
+ Type: "integer",
+ Description: "Maximum time to wait for the command to complete before timing out (optional, defaults to 60 seconds).",
+ Minimum: ptr.To(float64(1)),
+ },
+ },
+ Required: []string{"node", "command"},
+ },
+ Annotations: api.ToolAnnotations{
+ Title: "Nodes: Debug Exec",
+ ReadOnlyHint: ptr.To(false),
+ DestructiveHint: ptr.To(true),
+ IdempotentHint: ptr.To(false),
+ OpenWorldHint: ptr.To(true),
+ },
+ },
+ Handler: nodesDebugExec,
+ },
+ }
+}
+
+func nodesDebugExec(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
+ nodeArg := params.GetArguments()["node"]
+ nodeName, ok := nodeArg.(string)
+ if nodeArg == nil || !ok || nodeName == "" {
+ return api.NewToolCallResult("", errors.New("missing required argument: node")), nil
+ }
+
+ commandArg := params.GetArguments()["command"]
+ command, err := toStringSlice(commandArg)
+ if err != nil {
+ return api.NewToolCallResult("", fmt.Errorf("invalid command argument: %w", err)), nil
+ }
+
+ namespace := ""
+ if nsArg, ok := params.GetArguments()["namespace"].(string); ok {
+ namespace = nsArg
+ }
+
+ image := ""
+ if imageArg, ok := params.GetArguments()["image"].(string); ok {
+ image = imageArg
+ }
+
+ var timeout time.Duration
+ if timeoutRaw, exists := params.GetArguments()["timeout_seconds"]; exists && timeoutRaw != nil {
+ switch v := timeoutRaw.(type) {
+ case float64:
+ timeout = time.Duration(int64(v)) * time.Second
+ case int:
+ timeout = time.Duration(v) * time.Second
+ case int64:
+ timeout = time.Duration(v) * time.Second
+ default:
+ return api.NewToolCallResult("", errors.New("timeout_seconds must be a numeric value")), nil
+ }
+ }
+
+ output, execErr := ocp.NodesDebugExec(params.Context, ocp.NewOpenshiftClient(params.Kubernetes), namespace, nodeName, image, command, timeout)
+ if output == "" && execErr == nil {
+ output = fmt.Sprintf("Command executed successfully on node %s but produced no output.", nodeName)
+ }
+ return api.NewToolCallResult(output, execErr), nil
+}
+
+func toStringSlice(arg any) ([]string, error) {
+ if arg == nil {
+ return nil, errors.New("command is required")
+ }
+ raw, ok := arg.([]interface{})
+ if !ok {
+ return nil, errors.New("command must be an array of strings")
+ }
+ if len(raw) == 0 {
+ return nil, errors.New("command array cannot be empty")
+ }
+ command := make([]string, 0, len(raw))
+ for _, item := range raw {
+ str, ok := item.(string)
+ if !ok {
+ return nil, errors.New("command items must be strings")
+ }
+ command = append(command, str)
+ }
+ return command, nil
+}
diff --git a/pkg/toolsets/openshift/nodes_test.go b/pkg/toolsets/openshift/nodes_test.go
new file mode 100644
index 000000000..e3cddf2ca
--- /dev/null
+++ b/pkg/toolsets/openshift/nodes_test.go
@@ -0,0 +1,95 @@
+package openshift
+
+import (
+ "context"
+ "testing"
+ "time"
+
+ "github.com/containers/kubernetes-mcp-server/pkg/api"
+ "github.com/containers/kubernetes-mcp-server/pkg/ocp"
+)
+
+type staticRequest struct {
+ args map[string]any
+}
+
+func (s staticRequest) GetArguments() map[string]any {
+ return s.args
+}
+
+func TestNodesDebugExecHandlerValidatesInput(t *testing.T) {
+ t.Run("missing node", func(t *testing.T) {
+ params := api.ToolHandlerParams{
+ Context: context.Background(),
+ ToolCallRequest: staticRequest{args: map[string]any{}},
+ }
+ result, err := nodesDebugExec(params)
+ if err != nil {
+ t.Fatalf("handler returned error: %v", err)
+ }
+ if result.Error == nil || result.Error.Error() != "missing required argument: node" {
+ t.Fatalf("unexpected error: %v", result.Error)
+ }
+ })
+
+ t.Run("invalid command type", func(t *testing.T) {
+ params := api.ToolHandlerParams{
+ Context: context.Background(),
+ ToolCallRequest: staticRequest{args: map[string]any{
+ "node": "worker-0",
+ "command": "ls -la",
+ }},
+ }
+ result, err := nodesDebugExec(params)
+ if err != nil {
+ t.Fatalf("handler returned error: %v", err)
+ }
+ if result.Error == nil || result.Error.Error() != "invalid command argument: command must be an array of strings" {
+ t.Fatalf("unexpected error: %v", result.Error)
+ }
+ })
+}
+
+func TestNodesDebugExecHandlerExecutesCommand(t *testing.T) {
+ env := ocp.NewNodeDebugTestEnv(t)
+ env.Pods.Logs = "done"
+
+ // Call NodesDebugExec directly instead of going through the handler
+ // This avoids the need to mock the full kubernetes.Kubernetes type
+ output, err := ocp.NodesDebugExec(
+ context.Background(),
+ env.Kubernetes,
+ "debug",
+ "infra-node",
+ "registry.local/debug:latest",
+ []string{"systemctl", "status", "kubelet"},
+ 15*time.Second,
+ )
+
+ if err != nil {
+ t.Fatalf("NodesDebugExec returned error: %v", err)
+ }
+ if output != "done" {
+ t.Fatalf("unexpected output: %q", output)
+ }
+
+ created := env.Pods.Created
+ if created == nil {
+ t.Fatalf("expected pod creation")
+ }
+ if created.Namespace != "debug" {
+ t.Fatalf("expected namespace override, got %q", created.Namespace)
+ }
+ if created.Spec.Containers[0].Image != "registry.local/debug:latest" {
+ t.Fatalf("expected custom image, got %q", created.Spec.Containers[0].Image)
+ }
+ expectedCommand := []string{"systemctl", "status", "kubelet"}
+ if len(created.Spec.Containers[0].Command) != len(expectedCommand) {
+ t.Fatalf("unexpected command length: %v", created.Spec.Containers[0].Command)
+ }
+ for i, part := range expectedCommand {
+ if created.Spec.Containers[0].Command[i] != part {
+ t.Fatalf("command[%d]=%q expected %q", i, created.Spec.Containers[0].Command[i], part)
+ }
+ }
+}
diff --git a/pkg/toolsets/openshift/toolset.go b/pkg/toolsets/openshift/toolset.go
new file mode 100644
index 000000000..d11e4b668
--- /dev/null
+++ b/pkg/toolsets/openshift/toolset.go
@@ -0,0 +1,31 @@
+package openshift
+
+import (
+ "slices"
+
+ "github.com/containers/kubernetes-mcp-server/pkg/api"
+ internalk8s "github.com/containers/kubernetes-mcp-server/pkg/kubernetes"
+ "github.com/containers/kubernetes-mcp-server/pkg/toolsets"
+)
+
+type Toolset struct{}
+
+var _ api.Toolset = (*Toolset)(nil)
+
+func (t *Toolset) GetName() string {
+ return "openshift-core"
+}
+
+func (t *Toolset) GetDescription() string {
+ return "Core OpenShift-specific tools (Node debugging, etc.)"
+}
+
+func (t *Toolset) GetTools(o internalk8s.Openshift) []api.ServerTool {
+ return slices.Concat(
+ initNodes(),
+ )
+}
+
+func init() {
+ toolsets.Register(&Toolset{})
+}
From f8247b4d65621fc16eabb441db1e3560de19de44 Mon Sep 17 00:00:00 2001
From: Swarup Ghosh
Date: Tue, 3 Feb 2026 15:59:35 +0530
Subject: [PATCH 2/7] rebase(main): update node tools to match latest clientset
---
pkg/ocp/ocp_client.go | 32 ++++++++++++++++++++++---------
pkg/toolsets/openshift/nodes.go | 2 +-
pkg/toolsets/openshift/toolset.go | 7 +++++--
3 files changed, 29 insertions(+), 12 deletions(-)
diff --git a/pkg/ocp/ocp_client.go b/pkg/ocp/ocp_client.go
index 5c2766b4a..37023253c 100644
--- a/pkg/ocp/ocp_client.go
+++ b/pkg/ocp/ocp_client.go
@@ -3,13 +3,14 @@ package ocp
import (
"context"
+ "github.com/containers/kubernetes-mcp-server/pkg/api"
"github.com/containers/kubernetes-mcp-server/pkg/kubernetes"
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
)
// ClientsetInterface defines the interface for access-controlled clientset operations.
-// This allows code to work with kubernetes.AccessControlClientset through an interface,
+// This allows code to work with kubernetes client through an interface,
// making it easier to test and decouple from the concrete implementation.
type ClientsetInterface interface {
Pods(namespace string) (corev1client.PodInterface, error)
@@ -24,21 +25,34 @@ type OpenshiftClient interface {
PodsLog(ctx context.Context, namespace, name, container string, previous bool, tail int64) (string, error)
}
-// OpenshiftClientAdapter adapts kubernetes.Kubernetes to implement OpenshiftClient.
-// This allows production code to use the concrete *kubernetes.Kubernetes type
+// clientsetAdapter adapts api.KubernetesClient to implement ClientsetInterface.
+type clientsetAdapter struct {
+ client api.KubernetesClient
+}
+
+func (a *clientsetAdapter) Pods(namespace string) (corev1client.PodInterface, error) {
+ return a.client.CoreV1().Pods(namespace), nil
+}
+
+// OpenshiftClientAdapter adapts api.KubernetesClient to implement OpenshiftClient.
+// This allows production code to use the api.KubernetesClient interface
// while tests can use a mock implementation.
type OpenshiftClientAdapter struct {
- *kubernetes.Kubernetes
+ *kubernetes.Core
+ clientset *clientsetAdapter
}
-// NewOpenshiftClient creates a new adapter that wraps kubernetes.Kubernetes
+// NewOpenshiftClient creates a new adapter that wraps api.KubernetesClient
// to implement the OpenshiftClient interface.
-func NewOpenshiftClient(k *kubernetes.Kubernetes) *OpenshiftClientAdapter {
- return &OpenshiftClientAdapter{Kubernetes: k}
+func NewOpenshiftClient(k api.KubernetesClient) *OpenshiftClientAdapter {
+ return &OpenshiftClientAdapter{
+ Core: kubernetes.NewCore(k),
+ clientset: &clientsetAdapter{client: k},
+ }
}
-// AccessControlClientset returns the access control clientset as an interface.
+// AccessControlClientset returns the clientset adapter as an interface.
// This satisfies the OpenshiftClient interface.
func (c *OpenshiftClientAdapter) AccessControlClientset() ClientsetInterface {
- return c.Kubernetes.AccessControlClientset()
+ return c.clientset
}
diff --git a/pkg/toolsets/openshift/nodes.go b/pkg/toolsets/openshift/nodes.go
index 481cd53bb..830793322 100644
--- a/pkg/toolsets/openshift/nodes.go
+++ b/pkg/toolsets/openshift/nodes.go
@@ -96,7 +96,7 @@ func nodesDebugExec(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
}
}
- output, execErr := ocp.NodesDebugExec(params.Context, ocp.NewOpenshiftClient(params.Kubernetes), namespace, nodeName, image, command, timeout)
+ output, execErr := ocp.NodesDebugExec(params.Context, ocp.NewOpenshiftClient(params.KubernetesClient), namespace, nodeName, image, command, timeout)
if output == "" && execErr == nil {
output = fmt.Sprintf("Command executed successfully on node %s but produced no output.", nodeName)
}
diff --git a/pkg/toolsets/openshift/toolset.go b/pkg/toolsets/openshift/toolset.go
index d11e4b668..00d3978f9 100644
--- a/pkg/toolsets/openshift/toolset.go
+++ b/pkg/toolsets/openshift/toolset.go
@@ -4,7 +4,6 @@ import (
"slices"
"github.com/containers/kubernetes-mcp-server/pkg/api"
- internalk8s "github.com/containers/kubernetes-mcp-server/pkg/kubernetes"
"github.com/containers/kubernetes-mcp-server/pkg/toolsets"
)
@@ -20,12 +19,16 @@ func (t *Toolset) GetDescription() string {
return "Core OpenShift-specific tools (Node debugging, etc.)"
}
-func (t *Toolset) GetTools(o internalk8s.Openshift) []api.ServerTool {
+func (t *Toolset) GetTools(o api.Openshift) []api.ServerTool {
return slices.Concat(
initNodes(),
)
}
+func (t *Toolset) GetPrompts() []api.ServerPrompt {
+ return nil
+}
+
func init() {
toolsets.Register(&Toolset{})
}
From ee9f443ca827976c95c39f18358a63792d3a6670 Mon Sep 17 00:00:00 2001
From: Swarup Ghosh
Date: Wed, 5 Nov 2025 02:30:21 +0530
Subject: [PATCH 3/7] move node tools into pkg/toolsets/openshift/nodes
Signed-off-by: Swarup Ghosh
---
pkg/ocp/{ => nodes}/nodes_debug.go | 2 +-
pkg/ocp/{ => nodes}/nodes_debug_test.go | 2 +-
pkg/ocp/{ => nodes}/ocp_client.go | 2 +-
pkg/ocp/{ => nodes}/testhelpers.go | 2 +-
pkg/toolsets/openshift/{ => nodes}/nodes.go | 8 ++++----
pkg/toolsets/openshift/{ => nodes}/nodes_test.go | 8 ++++----
pkg/toolsets/openshift/toolset.go | 3 ++-
7 files changed, 14 insertions(+), 13 deletions(-)
rename pkg/ocp/{ => nodes}/nodes_debug.go (99%)
rename pkg/ocp/{ => nodes}/nodes_debug_test.go (99%)
rename pkg/ocp/{ => nodes}/ocp_client.go (99%)
rename pkg/ocp/{ => nodes}/testhelpers.go (99%)
rename pkg/toolsets/openshift/{ => nodes}/nodes.go (94%)
rename pkg/toolsets/openshift/{ => nodes}/nodes_test.go (94%)
diff --git a/pkg/ocp/nodes_debug.go b/pkg/ocp/nodes/nodes_debug.go
similarity index 99%
rename from pkg/ocp/nodes_debug.go
rename to pkg/ocp/nodes/nodes_debug.go
index cb079a9ae..f8b7e412a 100644
--- a/pkg/ocp/nodes_debug.go
+++ b/pkg/ocp/nodes/nodes_debug.go
@@ -1,4 +1,4 @@
-package ocp
+package nodes
import (
"context"
diff --git a/pkg/ocp/nodes_debug_test.go b/pkg/ocp/nodes/nodes_debug_test.go
similarity index 99%
rename from pkg/ocp/nodes_debug_test.go
rename to pkg/ocp/nodes/nodes_debug_test.go
index 1c86c92f8..8fb415f5f 100644
--- a/pkg/ocp/nodes_debug_test.go
+++ b/pkg/ocp/nodes/nodes_debug_test.go
@@ -1,4 +1,4 @@
-package ocp
+package nodes
import (
"context"
diff --git a/pkg/ocp/ocp_client.go b/pkg/ocp/nodes/ocp_client.go
similarity index 99%
rename from pkg/ocp/ocp_client.go
rename to pkg/ocp/nodes/ocp_client.go
index 37023253c..ce58080cf 100644
--- a/pkg/ocp/ocp_client.go
+++ b/pkg/ocp/nodes/ocp_client.go
@@ -1,4 +1,4 @@
-package ocp
+package nodes
import (
"context"
diff --git a/pkg/ocp/testhelpers.go b/pkg/ocp/nodes/testhelpers.go
similarity index 99%
rename from pkg/ocp/testhelpers.go
rename to pkg/ocp/nodes/testhelpers.go
index e35d1d9fc..7ab6a0fdb 100644
--- a/pkg/ocp/testhelpers.go
+++ b/pkg/ocp/nodes/testhelpers.go
@@ -1,4 +1,4 @@
-package ocp
+package nodes
import (
"context"
diff --git a/pkg/toolsets/openshift/nodes.go b/pkg/toolsets/openshift/nodes/nodes.go
similarity index 94%
rename from pkg/toolsets/openshift/nodes.go
rename to pkg/toolsets/openshift/nodes/nodes.go
index 830793322..2f70f2a14 100644
--- a/pkg/toolsets/openshift/nodes.go
+++ b/pkg/toolsets/openshift/nodes/nodes.go
@@ -1,4 +1,4 @@
-package openshift
+package nodes
import (
"errors"
@@ -9,10 +9,10 @@ import (
"k8s.io/utils/ptr"
"github.com/containers/kubernetes-mcp-server/pkg/api"
- "github.com/containers/kubernetes-mcp-server/pkg/ocp"
+ "github.com/containers/kubernetes-mcp-server/pkg/ocp/nodes"
)
-func initNodes() []api.ServerTool {
+func NodeTools() []api.ServerTool {
return []api.ServerTool{
{
Tool: api.Tool{
@@ -96,7 +96,7 @@ func nodesDebugExec(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
}
}
- output, execErr := ocp.NodesDebugExec(params.Context, ocp.NewOpenshiftClient(params.KubernetesClient), namespace, nodeName, image, command, timeout)
+ output, execErr := nodes.NodesDebugExec(params.Context, nodes.NewOpenshiftClient(params.KubernetesClient), namespace, nodeName, image, command, timeout)
if output == "" && execErr == nil {
output = fmt.Sprintf("Command executed successfully on node %s but produced no output.", nodeName)
}
diff --git a/pkg/toolsets/openshift/nodes_test.go b/pkg/toolsets/openshift/nodes/nodes_test.go
similarity index 94%
rename from pkg/toolsets/openshift/nodes_test.go
rename to pkg/toolsets/openshift/nodes/nodes_test.go
index e3cddf2ca..45929f9ae 100644
--- a/pkg/toolsets/openshift/nodes_test.go
+++ b/pkg/toolsets/openshift/nodes/nodes_test.go
@@ -1,4 +1,4 @@
-package openshift
+package nodes
import (
"context"
@@ -6,7 +6,7 @@ import (
"time"
"github.com/containers/kubernetes-mcp-server/pkg/api"
- "github.com/containers/kubernetes-mcp-server/pkg/ocp"
+ "github.com/containers/kubernetes-mcp-server/pkg/ocp/nodes"
)
type staticRequest struct {
@@ -51,12 +51,12 @@ func TestNodesDebugExecHandlerValidatesInput(t *testing.T) {
}
func TestNodesDebugExecHandlerExecutesCommand(t *testing.T) {
- env := ocp.NewNodeDebugTestEnv(t)
+ env := nodes.NewNodeDebugTestEnv(t)
env.Pods.Logs = "done"
// Call NodesDebugExec directly instead of going through the handler
// This avoids the need to mock the full kubernetes.Kubernetes type
- output, err := ocp.NodesDebugExec(
+ output, err := nodes.NodesDebugExec(
context.Background(),
env.Kubernetes,
"debug",
diff --git a/pkg/toolsets/openshift/toolset.go b/pkg/toolsets/openshift/toolset.go
index 00d3978f9..62a39c7a2 100644
--- a/pkg/toolsets/openshift/toolset.go
+++ b/pkg/toolsets/openshift/toolset.go
@@ -5,6 +5,7 @@ import (
"github.com/containers/kubernetes-mcp-server/pkg/api"
"github.com/containers/kubernetes-mcp-server/pkg/toolsets"
+ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/openshift/nodes"
)
type Toolset struct{}
@@ -21,7 +22,7 @@ func (t *Toolset) GetDescription() string {
func (t *Toolset) GetTools(o api.Openshift) []api.ServerTool {
return slices.Concat(
- initNodes(),
+ nodes.NodeTools(),
)
}
From 89fb360afdde81016aff1b8595fe58065b93f1b5 Mon Sep 17 00:00:00 2001
From: Swarup Ghosh
Date: Wed, 5 Nov 2025 03:04:15 +0530
Subject: [PATCH 4/7] make k.canIUse and exported func
Signed-off-by: Swarup Ghosh
---
pkg/kubernetes/resources.go | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/pkg/kubernetes/resources.go b/pkg/kubernetes/resources.go
index 54f2e9ca7..132c41df7 100644
--- a/pkg/kubernetes/resources.go
+++ b/pkg/kubernetes/resources.go
@@ -10,6 +10,7 @@ import (
"k8s.io/client-go/dynamic"
"github.com/containers/kubernetes-mcp-server/pkg/api"
+
"github.com/containers/kubernetes-mcp-server/pkg/version"
authv1 "k8s.io/api/authorization/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -34,8 +35,9 @@ func (c *Core) ResourcesList(ctx context.Context, gvk *schema.GroupVersionKind,
// Check if operation is allowed for all namespaces (applicable for namespaced resources)
isNamespaced, _ := c.isNamespaced(gvk)
- if isNamespaced && !c.canIUse(ctx, gvr, namespace, "list") && namespace == "" {
+ if isNamespaced && !c.CanIUse(ctx, gvr, namespace, "list") && namespace == "" {
namespace = c.NamespaceOrDefault("")
+
}
if options.AsTable {
return c.resourcesListAsTable(ctx, gvk, gvr, namespace, options)
@@ -227,8 +229,9 @@ func (c *Core) supportsGroupVersion(groupVersion string) bool {
return true
}
-func (c *Core) canIUse(ctx context.Context, gvr *schema.GroupVersionResource, namespace, verb string) bool {
+func (c *Core) CanIUse(ctx context.Context, gvr *schema.GroupVersionResource, namespace, verb string) bool {
accessReviews := c.AuthorizationV1().SelfSubjectAccessReviews()
+
response, err := accessReviews.Create(ctx, &authv1.SelfSubjectAccessReview{
Spec: authv1.SelfSubjectAccessReviewSpec{ResourceAttributes: &authv1.ResourceAttributes{
Namespace: namespace,
From 96395b129b659c60bd04856482344b685b5a6100 Mon Sep 17 00:00:00 2001
From: Swarup Ghosh
Date: Thu, 29 Jan 2026 15:04:26 +0530
Subject: [PATCH 5/7] Implement plan_mustgather tool in the openshift toolset
Signed-off-by: Swarup Ghosh
---
pkg/mcp/modules.go | 1 +
pkg/ocp/mustgather/plan_mustgather.go | 435 ++++++++++++++++++
.../openshift/mustgather/mustgather.go | 80 ++++
pkg/toolsets/openshift/toolset.go | 11 +-
4 files changed, 526 insertions(+), 1 deletion(-)
create mode 100644 pkg/ocp/mustgather/plan_mustgather.go
create mode 100644 pkg/toolsets/openshift/mustgather/mustgather.go
diff --git a/pkg/mcp/modules.go b/pkg/mcp/modules.go
index d9d7323f2..01779939c 100644
--- a/pkg/mcp/modules.go
+++ b/pkg/mcp/modules.go
@@ -8,4 +8,5 @@ import (
_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kiali"
_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/kubevirt"
_ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/observability"
+ _ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/openshift"
)
diff --git a/pkg/ocp/mustgather/plan_mustgather.go b/pkg/ocp/mustgather/plan_mustgather.go
new file mode 100644
index 000000000..d5d501064
--- /dev/null
+++ b/pkg/ocp/mustgather/plan_mustgather.go
@@ -0,0 +1,435 @@
+package mustgather
+
+import (
+ "fmt"
+ "path"
+ "strings"
+ "time"
+
+ "github.com/containers/kubernetes-mcp-server/pkg/api"
+ "github.com/containers/kubernetes-mcp-server/pkg/kubernetes"
+ corev1 "k8s.io/api/core/v1"
+ rbacv1 "k8s.io/api/rbac/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/runtime/schema"
+ "k8s.io/apimachinery/pkg/util/rand"
+ "sigs.k8s.io/yaml"
+)
+
+const (
+ defaultGatherSourceDir = "/must-gather/"
+ defaultMustGatherImage = "registry.redhat.io/openshift4/ose-must-gather:latest"
+ defaultGatherCmd = "/usr/bin/gather"
+ mgAnnotation = "operators.openshift.io/must-gather-image"
+ maxConcurrentGathers = 8
+)
+
+func PlanMustGather(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
+ args := params.GetArguments()
+ k8sCore := kubernetes.NewCore(params)
+
+ var nodeName, sourceDir, namespace, gatherCmd, timeout, since string
+ var hostNetwork, keepResources, allImages bool
+ var images []string
+ var nodeSelector map[string]string
+
+ if args["node_name"] != nil {
+ nodeName = args["node_name"].(string)
+ }
+
+ if args["node_selector"] != nil {
+ nodeSelector = parseNodeSelector(args["node_selector"].(string))
+ }
+
+ if args["host_network"] != nil {
+ hostNetwork = args["host_network"].(bool)
+ }
+
+ sourceDir = defaultGatherSourceDir
+ if args["source_dir"] != nil {
+ sourceDir = path.Clean(args["source_dir"].(string))
+ }
+
+ namespace = fmt.Sprintf("openshift-must-gather-%s", rand.String(6))
+ if args["namespace"] != nil {
+ namespace = args["namespace"].(string)
+ }
+
+ if args["keep_resources"] != nil {
+ keepResources = args["keep_resources"].(bool)
+ }
+
+ gatherCmd = defaultGatherCmd
+ if args["gather_command"] != nil {
+ gatherCmd = args["gather_command"].(string)
+ }
+
+ if args["all_component_images"] != nil {
+ allImages = args["all_component_images"].(bool)
+ }
+
+ if args["images"] != nil {
+ if imagesArg, ok := args["images"].([]interface{}); ok {
+ for _, img := range imagesArg {
+ if imgStr, ok := img.(string); ok {
+ images = append(images, imgStr)
+ }
+ }
+ }
+ }
+
+ if allImages {
+ componentImages, err := getComponentImages(params)
+ if err != nil {
+ return api.NewToolCallResult("",
+ fmt.Errorf("failed to get operator images: %v", err),
+ ), nil
+ }
+
+ images = append(images, componentImages...)
+ }
+
+ if len(images) > maxConcurrentGathers {
+ return api.NewToolCallResult("",
+ fmt.Errorf("more than %d gather images are not supported", maxConcurrentGathers),
+ ), nil
+ }
+
+ if args["timeout"] != nil {
+ timeout = args["timeout"].(string)
+
+ _, err := time.ParseDuration(timeout)
+ if err != nil {
+ return api.NewToolCallResult("", fmt.Errorf("timeout duration is not valid")), nil
+ }
+
+ gatherCmd = fmt.Sprintf("/usr/bin/timeout %s %s", timeout, gatherCmd)
+ }
+
+ if args["since"] != nil {
+ since = args["since"].(string)
+
+ _, err := time.ParseDuration(since)
+ if err != nil {
+ return api.NewToolCallResult("", fmt.Errorf("since duration is not valid")), nil
+ }
+ }
+
+ envVars := []corev1.EnvVar{}
+ if since != "" {
+ envVars = append(envVars, corev1.EnvVar{
+ Name: "MUST_GATHER_SINCE",
+ Value: since,
+ })
+ }
+
+ // template container for gather,
+ // if multiple images are added multiple containers in the same pod will be spin up
+ gatherContainerTemplate := corev1.Container{
+ Name: "gather",
+ Image: defaultMustGatherImage,
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Command: []string{gatherCmd},
+ Env: envVars,
+ VolumeMounts: []corev1.VolumeMount{
+ {
+ Name: "must-gather-output",
+ MountPath: sourceDir,
+ },
+ },
+ }
+
+ var gatherContainers = []corev1.Container{
+ *gatherContainerTemplate.DeepCopy(),
+ }
+
+ if len(images) > 0 {
+ gatherContainers = make([]corev1.Container, len(images))
+ }
+
+ for i, image := range images {
+ gatherContainers[i] = *gatherContainerTemplate.DeepCopy()
+
+ // if more than one gather container(s) are added,
+ // suffix container name with int id
+ if len(images) > 1 {
+ gatherContainers[i].Name = fmt.Sprintf("gather-%d", i+1)
+ }
+ gatherContainers[i].Image = image
+ }
+
+ serviceAccountName := "must-gather-collector"
+
+ pod := &corev1.Pod{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "v1",
+ Kind: "Pod",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ // Avoiding generateName as resources_create_or_update fails without explicit name.
+ Name: fmt.Sprintf("must-gather-%s", rand.String(6)),
+ Namespace: namespace,
+ },
+ Spec: corev1.PodSpec{
+ ServiceAccountName: serviceAccountName,
+ NodeName: nodeName,
+ PriorityClassName: "system-cluster-critical",
+ RestartPolicy: corev1.RestartPolicyNever,
+ Volumes: []corev1.Volume{
+ {
+ Name: "must-gather-output",
+ VolumeSource: corev1.VolumeSource{
+ EmptyDir: &corev1.EmptyDirVolumeSource{},
+ },
+ },
+ },
+ Containers: append(gatherContainers, corev1.Container{
+ Name: "wait",
+ Image: "registry.redhat.io/ubi9/ubi-minimal",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Command: []string{"/bin/bash", "-c", "sleep infinity"},
+ VolumeMounts: []corev1.VolumeMount{
+ {
+ Name: "must-gather-output",
+ MountPath: "/must-gather",
+ },
+ },
+ }),
+ HostNetwork: hostNetwork,
+ NodeSelector: nodeSelector,
+ Tolerations: []corev1.Toleration{
+ {
+ Operator: "Exists",
+ },
+ },
+ },
+ }
+
+ namespaceExists := false
+
+ _, err := k8sCore.ResourcesGet(params, &schema.GroupVersionKind{
+ Group: "",
+ Version: "v1",
+ Kind: "Namespace",
+ }, "", namespace)
+ if err == nil {
+ namespaceExists = true
+ }
+
+ var namespaceObj *corev1.Namespace
+ if !namespaceExists {
+ namespaceObj = &corev1.Namespace{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "v1",
+ Kind: "Namespace",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: namespace,
+ },
+ }
+ }
+
+ serviceAccount := &corev1.ServiceAccount{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "v1",
+ Kind: "ServiceAccount",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: serviceAccountName,
+ Namespace: namespace,
+ },
+ }
+
+ clusterRoleBindingName := fmt.Sprintf("%s-must-gather-collector", namespace)
+ clusterRoleBinding := &rbacv1.ClusterRoleBinding{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "rbac.authorization.k8s.io/v1",
+ Kind: "ClusterRoleBinding",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: clusterRoleBindingName,
+ },
+ RoleRef: rbacv1.RoleRef{
+ APIGroup: "rbac.authorization.k8s.io",
+ Kind: "ClusterRole",
+ Name: "cluster-admin",
+ },
+ Subjects: []rbacv1.Subject{
+ {
+ Kind: "ServiceAccount",
+ Name: serviceAccountName,
+ Namespace: namespace,
+ },
+ },
+ }
+
+ allowChecks := map[string]struct {
+ schema.GroupVersionResource
+ name string
+ verb string
+ }{
+ "create_namespace": {
+ GroupVersionResource: schema.GroupVersionResource{Version: "v1", Resource: "namespaces"},
+ verb: "create",
+ },
+ "create_serviceaccount": {
+ GroupVersionResource: schema.GroupVersionResource{Version: "v1", Resource: "serviceaccounts"},
+ verb: "create",
+ },
+ "create_clusterrolebinding": {
+ GroupVersionResource: schema.GroupVersionResource{Group: "rbac.authorization.k8s.io", Version: "v1", Resource: "clusterrolebindings"},
+ verb: "create",
+ },
+ "create_pod": {
+ GroupVersionResource: schema.GroupVersionResource{Version: "v1", Resource: "pods"},
+ verb: "create",
+ },
+ "use_scc_hostnetwork": {
+ GroupVersionResource: schema.GroupVersionResource{Group: "security.openshift.io", Version: "v1", Resource: "securitycontextconstraints"},
+ name: "hostnetwork-v2",
+ verb: "use",
+ },
+ }
+ isAllowed := make(map[string]bool)
+
+ for k, check := range allowChecks {
+ isAllowed[k] = k8sCore.CanIUse(params, &check.GroupVersionResource, "", check.verb)
+ }
+
+ var result strings.Builder
+ result.WriteString("The generated plan contains YAML manifests for must-gather pods and required resources (namespace, serviceaccount, clusterrolebinding). " +
+ "Suggest how the user can apply the manifest and copy results locally (`oc cp` / `kubectl cp`). \n\n",
+ )
+ result.WriteString("Ask the user if they want to apply the plan \n" +
+ "- use the resource_create_or_update tool to apply the manifest \n" +
+ "- alternatively, advise the user to execute `oc apply` / `kubectl apply` instead. \n\n",
+ )
+
+ if !keepResources {
+ result.WriteString("Once the must-gather collection is completed, the user may wish to cleanup the created resources. \n" +
+ "- use the resources_delete tool to delete the namespace and the clusterrolebinding \n" +
+ "- or, execute cleanup using `kubectl delete`. \n\n")
+ }
+
+ if !namespaceExists && isAllowed["create_namespace"] {
+ namespaceYaml, err := yaml.Marshal(namespaceObj)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal namespace to yaml: %w", err)
+ }
+
+ result.WriteString("```yaml\n")
+ result.Write(namespaceYaml)
+ result.WriteString("```\n\n")
+ }
+
+ if !namespaceExists && !isAllowed["create_namespace"] {
+ result.WriteString("WARNING: The resources_create_or_update call does not have permission to create namespace(s).\n")
+ }
+
+ // yaml(s) are dumped into individual code blocks of ``` ```
+ // because resources_create_or_update tool call fails when content has more than one more resource,
+ // some models are smart to detect an error and retry with one resource a time though.
+
+ serviceAccountYaml, err := yaml.Marshal(serviceAccount)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal service account to yaml: %w", err)
+ }
+ result.WriteString("```yaml\n")
+ result.Write(serviceAccountYaml)
+ result.WriteString("```\n\n")
+
+ if !isAllowed["create_serviceaccount"] {
+ result.WriteString("WARNING: The resources_create_or_update call does not have permission to create serviceaccount(s).\n")
+ }
+
+ clusterRoleBindingYaml, err := yaml.Marshal(clusterRoleBinding)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal cluster role binding to yaml: %w", err)
+ }
+
+ result.WriteString("```yaml\n")
+ result.Write(clusterRoleBindingYaml)
+ result.WriteString("```\n\n")
+
+ if !isAllowed["create_clusterrolebinding"] {
+ result.WriteString("WARNING: The resources_create_or_update call does not have permission to create clusterrolebinding(s).\n")
+ }
+
+ podYaml, err := yaml.Marshal(pod)
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal pod to yaml: %w", err)
+ }
+
+ result.WriteString("```yaml\n")
+ result.Write(podYaml)
+ result.WriteString("```\n")
+
+ if !isAllowed["create_pod"] {
+ result.WriteString("WARNING: The resources_create_or_update call does not have permission to create pod(s).\n")
+ }
+
+ if hostNetwork && !isAllowed["use_scc_hostnetwork"] {
+ result.WriteString("WARNING: The resources_create_or_update call does not have permission to create pod(s) with hostNetwork: true.\n")
+ }
+
+ return api.NewToolCallResult(result.String(), nil), nil
+}
+
+func getComponentImages(params api.ToolHandlerParams) ([]string, error) {
+ var images []string
+ k8sCore := kubernetes.NewCore(params)
+
+ appendImageFromAnnotation := func(obj runtime.Object) error {
+ unstruct, err := runtime.DefaultUnstructuredConverter.ToUnstructured(obj)
+ if err != nil {
+ return err
+ }
+
+ u := unstructured.Unstructured{Object: unstruct}
+ annotations := u.GetAnnotations()
+ if annotations[mgAnnotation] != "" {
+ images = append(images, annotations[mgAnnotation])
+ }
+
+ return nil
+ }
+
+ clusterOperatorsList, err := k8sCore.ResourcesList(params, &schema.GroupVersionKind{
+ Group: "config.openshift.io",
+ Version: "v1",
+ Kind: "ClusterOperator",
+ }, "", api.ListOptions{})
+ if err != nil {
+ return nil, err
+ }
+
+ if err := clusterOperatorsList.EachListItem(appendImageFromAnnotation); err != nil {
+ return images, err
+ }
+
+ csvList, err := k8sCore.ResourcesList(params, &schema.GroupVersionKind{
+ Group: "operators.coreos.com",
+ Version: "v1alpha1",
+ Kind: "ClusterServiceVersion",
+ }, "", api.ListOptions{})
+ if err != nil {
+ return images, err
+ }
+
+ err = csvList.EachListItem(appendImageFromAnnotation)
+ return images, err
+}
+
+func parseNodeSelector(selector string) map[string]string {
+ result := make(map[string]string)
+ pairs := strings.Split(selector, ",")
+ for _, pair := range pairs {
+ kv := strings.SplitN(strings.TrimSpace(pair), "=", 2)
+ if len(kv) == 2 && strings.TrimSpace(kv[0]) != "" {
+ result[strings.TrimSpace(kv[0])] = strings.TrimSpace(kv[1])
+ }
+ }
+ return result
+}
diff --git a/pkg/toolsets/openshift/mustgather/mustgather.go b/pkg/toolsets/openshift/mustgather/mustgather.go
new file mode 100644
index 000000000..3fc04c6fa
--- /dev/null
+++ b/pkg/toolsets/openshift/mustgather/mustgather.go
@@ -0,0 +1,80 @@
+package mustgather
+
+import (
+ "github.com/containers/kubernetes-mcp-server/pkg/api"
+ "github.com/containers/kubernetes-mcp-server/pkg/ocp/mustgather"
+ "github.com/google/jsonschema-go/jsonschema"
+ "k8s.io/utils/ptr"
+)
+
+func Tools() []api.ServerTool {
+ return []api.ServerTool{{
+ Tool: api.Tool{
+ Name: "plan_mustgather",
+ Description: "Plan for collecting a must-gather archive from an OpenShift cluster, must-gather is a tool for collecting cluster data related to debugging and troubleshooting like logs, kubernetes resources, etc.",
+ InputSchema: &jsonschema.Schema{
+ Type: "object",
+ Properties: map[string]*jsonschema.Schema{
+ "node_name": {
+ Type: "string",
+ Description: "Optional node to run the mustgather pod. If not provided, a random control-plane node will be selected automatically",
+ },
+ "node_selector": {
+ Type: "string",
+ Description: "Optional node label selector to use, only relevant when specifying a command and image which needs to capture data on a set of cluster nodes simultaneously",
+ },
+ "host_network": {
+ Type: "boolean",
+ Description: "Optionally run the must-gather pods in the host network of the node. This is only relevant if a specific gather image needs to capture host-level data",
+ },
+ "gather_command": {
+ Type: "string",
+ Description: "Optionally specify a custom gather command to run a specialized script, eg. /usr/bin/gather_audit_logs",
+ Default: api.ToRawMessage("/usr/bin/gather"),
+ },
+ "all_component_images": {
+ Type: "boolean",
+ Description: "Optional when enabled, collects and runs multiple must gathers for all operators and components on the cluster that have an annotated must-gather image available",
+ },
+ "images": {
+ Type: "array",
+ Description: "Optional list of images to use for gathering custom information about specific operators or cluster components. If not specified, OpenShift's default must-gather image will be used by default",
+ Items: &jsonschema.Schema{
+ Type: "string",
+ },
+ },
+ "source_dir": {
+ Type: "string",
+ Description: "Optional to set a specific directory where the pod will copy gathered data from",
+ Default: api.ToRawMessage("/must-gather"),
+ },
+ "timeout": {
+ Type: "string",
+ Description: "Timeout of the gather process eg. 30s, 6m20s, or 2h10m30s",
+ },
+ "namespace": {
+ Type: "string",
+ Description: "Optional to specify an existing privileged namespace where must-gather pods should run. If not provided, a temporary namespace will be created",
+ },
+ "keep_resources": {
+ Type: "boolean",
+ Description: "Optional to retain all temporary resources when the mustgather completes, otherwise temporary resources created will be advised to be cleaned up",
+ },
+ "since": {
+ Type: "string",
+ Description: "Optional to collect logs newer than a relative duration like 5s, 2m5s, or 3h6m10s. If unspecified, all available logs will be collected",
+ },
+ },
+ },
+ Annotations: api.ToolAnnotations{
+ Title: "MustGather: Plan",
+ ReadOnlyHint: ptr.To(true),
+ DestructiveHint: ptr.To(false),
+ IdempotentHint: ptr.To(false),
+ OpenWorldHint: ptr.To(true),
+ },
+ },
+
+ Handler: mustgather.PlanMustGather,
+ }}
+}
diff --git a/pkg/toolsets/openshift/toolset.go b/pkg/toolsets/openshift/toolset.go
index 62a39c7a2..70e58c358 100644
--- a/pkg/toolsets/openshift/toolset.go
+++ b/pkg/toolsets/openshift/toolset.go
@@ -1,10 +1,12 @@
package openshift
import (
+ "context"
"slices"
"github.com/containers/kubernetes-mcp-server/pkg/api"
"github.com/containers/kubernetes-mcp-server/pkg/toolsets"
+ "github.com/containers/kubernetes-mcp-server/pkg/toolsets/openshift/mustgather"
"github.com/containers/kubernetes-mcp-server/pkg/toolsets/openshift/nodes"
)
@@ -17,16 +19,23 @@ func (t *Toolset) GetName() string {
}
func (t *Toolset) GetDescription() string {
- return "Core OpenShift-specific tools (Node debugging, etc.)"
+ return "OpenShift-specific tools for cluster debugging and troubleshooting."
}
func (t *Toolset) GetTools(o api.Openshift) []api.ServerTool {
+ // OpenShift tools are only available when an OpenShift cluster is detected.
+ if !o.IsOpenShift(context.Background()) {
+ return []api.ServerTool{}
+ }
+
return slices.Concat(
+ mustgather.Tools(),
nodes.NodeTools(),
)
}
func (t *Toolset) GetPrompts() []api.ServerPrompt {
+ // OpenShift toolset does not provide prompts
return nil
}
From b09a502bc51a55c30b9af9555e2b2619671fa8de Mon Sep 17 00:00:00 2001
From: Swarup Ghosh
Date: Tue, 3 Feb 2026 16:29:59 +0530
Subject: [PATCH 6/7] decouple pkg/ocp client out from pkg/ocp/nodes
Signed-off-by: Swarup Ghosh
---
pkg/ocp/{nodes/ocp_client.go => client.go} | 2 +-
pkg/ocp/nodes/nodes_debug.go | 9 +++++----
pkg/ocp/nodes/testhelpers.go | 8 +++++---
pkg/toolsets/openshift/nodes/nodes.go | 3 ++-
4 files changed, 13 insertions(+), 9 deletions(-)
rename pkg/ocp/{nodes/ocp_client.go => client.go} (99%)
diff --git a/pkg/ocp/nodes/ocp_client.go b/pkg/ocp/client.go
similarity index 99%
rename from pkg/ocp/nodes/ocp_client.go
rename to pkg/ocp/client.go
index ce58080cf..37023253c 100644
--- a/pkg/ocp/nodes/ocp_client.go
+++ b/pkg/ocp/client.go
@@ -1,4 +1,4 @@
-package nodes
+package ocp
import (
"context"
diff --git a/pkg/ocp/nodes/nodes_debug.go b/pkg/ocp/nodes/nodes_debug.go
index f8b7e412a..ac6c9bcdd 100644
--- a/pkg/ocp/nodes/nodes_debug.go
+++ b/pkg/ocp/nodes/nodes_debug.go
@@ -8,6 +8,7 @@ import (
"time"
"github.com/containers/kubernetes-mcp-server/pkg/kubernetes"
+ "github.com/containers/kubernetes-mcp-server/pkg/ocp"
"github.com/containers/kubernetes-mcp-server/pkg/version"
corev1 "k8s.io/api/core/v1"
@@ -36,7 +37,7 @@ const (
// default debug image is used. Timeout controls how long we wait for the pod to complete.
func NodesDebugExec(
ctx context.Context,
- k OpenshiftClient,
+ k ocp.OpenshiftClient,
namespace string,
nodeName string,
image string,
@@ -97,7 +98,7 @@ func NodesDebugExec(
// createDebugPod creates a privileged pod on the target node to run debug commands.
func createDebugPod(
ctx context.Context,
- k OpenshiftClient,
+ k ocp.OpenshiftClient,
nodeName string,
namespace string,
image string,
@@ -188,7 +189,7 @@ func createDebugPod(
// pollForCompletion polls the debug pod until it completes or times out.
func pollForCompletion(
ctx context.Context,
- k OpenshiftClient,
+ k ocp.OpenshiftClient,
namespace string,
podName string,
timeout time.Duration,
@@ -248,7 +249,7 @@ func pollForCompletion(
}
// retrieveLogs retrieves the logs from the debug pod.
-func retrieveLogs(ctx context.Context, k OpenshiftClient, namespace, podName string) (string, error) {
+func retrieveLogs(ctx context.Context, k ocp.OpenshiftClient, namespace, podName string) (string, error) {
logCtx, logCancel := context.WithTimeout(ctx, 30*time.Second)
defer logCancel()
logs, logErr := k.PodsLog(logCtx, namespace, podName, NodeDebugContainerName, false, 0)
diff --git a/pkg/ocp/nodes/testhelpers.go b/pkg/ocp/nodes/testhelpers.go
index 7ab6a0fdb..ba5c69df4 100644
--- a/pkg/ocp/nodes/testhelpers.go
+++ b/pkg/ocp/nodes/testhelpers.go
@@ -9,6 +9,8 @@ import (
"strings"
"testing"
+ "github.com/containers/kubernetes-mcp-server/pkg/ocp"
+
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
@@ -40,14 +42,14 @@ func NewNodeDebugTestEnv(t *testing.T) *NodeDebugTestEnv {
}
}
-// FakeKubernetesClient implements the OpenshiftClient interface for testing
+// FakeKubernetesClient implements the ocp.OpenshiftClient interface for testing
type FakeKubernetesClient struct {
pods *FakePodInterface
namespace string
}
// AccessControlClientset returns a fake clientset for testing
-func (f *FakeKubernetesClient) AccessControlClientset() ClientsetInterface {
+func (f *FakeKubernetesClient) AccessControlClientset() ocp.ClientsetInterface {
return &FakeAccessControlClientset{pods: f.pods}
}
@@ -71,7 +73,7 @@ func (f *FakeKubernetesClient) PodsLog(ctx context.Context, namespace, name, con
return string(rawData), nil
}
-// FakeAccessControlClientset mimics kubernetes.AccessControlClientset for testing
+// FakeAccessControlClientset mimics ocp.ClientsetInterface for testing
type FakeAccessControlClientset struct {
pods *FakePodInterface
}
diff --git a/pkg/toolsets/openshift/nodes/nodes.go b/pkg/toolsets/openshift/nodes/nodes.go
index 2f70f2a14..c339ee4d1 100644
--- a/pkg/toolsets/openshift/nodes/nodes.go
+++ b/pkg/toolsets/openshift/nodes/nodes.go
@@ -9,6 +9,7 @@ import (
"k8s.io/utils/ptr"
"github.com/containers/kubernetes-mcp-server/pkg/api"
+ "github.com/containers/kubernetes-mcp-server/pkg/ocp"
"github.com/containers/kubernetes-mcp-server/pkg/ocp/nodes"
)
@@ -96,7 +97,7 @@ func nodesDebugExec(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
}
}
- output, execErr := nodes.NodesDebugExec(params.Context, nodes.NewOpenshiftClient(params.KubernetesClient), namespace, nodeName, image, command, timeout)
+ output, execErr := nodes.NodesDebugExec(params.Context, ocp.NewOpenshiftClient(params.KubernetesClient), namespace, nodeName, image, command, timeout)
if output == "" && execErr == nil {
output = fmt.Sprintf("Command executed successfully on node %s but produced no output.", nodeName)
}
From 30efff5e055ae8df4e91bb3e1c3b5adc2484a10c Mon Sep 17 00:00:00 2001
From: Swarup Ghosh
Date: Tue, 3 Feb 2026 17:48:16 +0530
Subject: [PATCH 7/7] Improve pkg/ocp/client, use DynamicClient in mustgather
Signed-off-by: Swarup Ghosh
---
pkg/ocp/mustgather/plan_mustgather.go | 164 ++++++++----------
.../openshift/mustgather/mustgather.go | 71 +++++++-
2 files changed, 143 insertions(+), 92 deletions(-)
diff --git a/pkg/ocp/mustgather/plan_mustgather.go b/pkg/ocp/mustgather/plan_mustgather.go
index d5d501064..fd3658d7c 100644
--- a/pkg/ocp/mustgather/plan_mustgather.go
+++ b/pkg/ocp/mustgather/plan_mustgather.go
@@ -1,6 +1,7 @@
package mustgather
import (
+ "context"
"fmt"
"path"
"strings"
@@ -15,6 +16,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/rand"
+ "k8s.io/client-go/dynamic"
"sigs.k8s.io/yaml"
)
@@ -26,94 +28,71 @@ const (
maxConcurrentGathers = 8
)
-func PlanMustGather(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
- args := params.GetArguments()
- k8sCore := kubernetes.NewCore(params)
-
- var nodeName, sourceDir, namespace, gatherCmd, timeout, since string
- var hostNetwork, keepResources, allImages bool
- var images []string
- var nodeSelector map[string]string
-
- if args["node_name"] != nil {
- nodeName = args["node_name"].(string)
- }
-
- if args["node_selector"] != nil {
- nodeSelector = parseNodeSelector(args["node_selector"].(string))
- }
-
- if args["host_network"] != nil {
- hostNetwork = args["host_network"].(bool)
- }
-
- sourceDir = defaultGatherSourceDir
- if args["source_dir"] != nil {
- sourceDir = path.Clean(args["source_dir"].(string))
- }
-
- namespace = fmt.Sprintf("openshift-must-gather-%s", rand.String(6))
- if args["namespace"] != nil {
- namespace = args["namespace"].(string)
- }
+// PlanMustGatherParams contains the parameters for planning a must-gather collection.
+type PlanMustGatherParams struct {
+ NodeName string
+ NodeSelector map[string]string
+ HostNetwork bool
+ SourceDir string // custom gather directory inside pod, default is "/must-gather"
+ Namespace string
+ KeepResources bool
+ GatherCommand string // custom gather command, default is "/usr/bin/gather"
+ AllImages bool // whether to use custom gather images from installed operators on cluster
+ Images []string // custom list of must-gather images
+ Timeout string
+ Since string
+}
- if args["keep_resources"] != nil {
- keepResources = args["keep_resources"].(bool)
- }
+// PlanMustGather generates a must-gather plan with YAML manifests for creating the required resources.
+// It returns the plan as a string containing YAML manifests and instructions.
+func PlanMustGather(ctx context.Context, k api.KubernetesClient, params PlanMustGatherParams) (string, error) {
+ dynamicClient := k.DynamicClient()
+ k8sCore := kubernetes.NewCore(k)
- gatherCmd = defaultGatherCmd
- if args["gather_command"] != nil {
- gatherCmd = args["gather_command"].(string)
+ sourceDir := params.SourceDir
+ if sourceDir == "" {
+ sourceDir = defaultGatherSourceDir
+ } else {
+ sourceDir = path.Clean(sourceDir)
}
- if args["all_component_images"] != nil {
- allImages = args["all_component_images"].(bool)
+ namespace := params.Namespace
+ if namespace == "" {
+ namespace = fmt.Sprintf("openshift-must-gather-%s", rand.String(6))
}
- if args["images"] != nil {
- if imagesArg, ok := args["images"].([]interface{}); ok {
- for _, img := range imagesArg {
- if imgStr, ok := img.(string); ok {
- images = append(images, imgStr)
- }
- }
- }
+ gatherCmd := params.GatherCommand
+ if gatherCmd == "" {
+ gatherCmd = defaultGatherCmd
}
- if allImages {
- componentImages, err := getComponentImages(params)
+ images := params.Images
+ if params.AllImages {
+ componentImages, err := getComponentImages(ctx, dynamicClient)
if err != nil {
- return api.NewToolCallResult("",
- fmt.Errorf("failed to get operator images: %v", err),
- ), nil
+ return "", fmt.Errorf("failed to get operator images: %v", err)
}
-
images = append(images, componentImages...)
}
if len(images) > maxConcurrentGathers {
- return api.NewToolCallResult("",
- fmt.Errorf("more than %d gather images are not supported", maxConcurrentGathers),
- ), nil
+ return "", fmt.Errorf("more than %d gather images are not supported", maxConcurrentGathers)
}
- if args["timeout"] != nil {
- timeout = args["timeout"].(string)
-
+ timeout := params.Timeout
+ if timeout != "" {
_, err := time.ParseDuration(timeout)
if err != nil {
- return api.NewToolCallResult("", fmt.Errorf("timeout duration is not valid")), nil
+ return "", fmt.Errorf("timeout duration is not valid")
}
-
gatherCmd = fmt.Sprintf("/usr/bin/timeout %s %s", timeout, gatherCmd)
}
- if args["since"] != nil {
- since = args["since"].(string)
-
+ since := params.Since
+ if since != "" {
_, err := time.ParseDuration(since)
if err != nil {
- return api.NewToolCallResult("", fmt.Errorf("since duration is not valid")), nil
+ return "", fmt.Errorf("since duration is not valid")
}
}
@@ -174,7 +153,7 @@ func PlanMustGather(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
},
Spec: corev1.PodSpec{
ServiceAccountName: serviceAccountName,
- NodeName: nodeName,
+ NodeName: params.NodeName,
PriorityClassName: "system-cluster-critical",
RestartPolicy: corev1.RestartPolicyNever,
Volumes: []corev1.Volume{
@@ -197,8 +176,8 @@ func PlanMustGather(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
},
},
}),
- HostNetwork: hostNetwork,
- NodeSelector: nodeSelector,
+ HostNetwork: params.HostNetwork,
+ NodeSelector: params.NodeSelector,
Tolerations: []corev1.Toleration{
{
Operator: "Exists",
@@ -208,8 +187,7 @@ func PlanMustGather(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
}
namespaceExists := false
-
- _, err := k8sCore.ResourcesGet(params, &schema.GroupVersionKind{
+ _, err := k8sCore.ResourcesGet(ctx, &schema.GroupVersionKind{
Group: "",
Version: "v1",
Kind: "Namespace",
@@ -294,8 +272,8 @@ func PlanMustGather(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
}
isAllowed := make(map[string]bool)
- for k, check := range allowChecks {
- isAllowed[k] = k8sCore.CanIUse(params, &check.GroupVersionResource, "", check.verb)
+ for key, check := range allowChecks {
+ isAllowed[key] = k8sCore.CanIUse(ctx, &check.GroupVersionResource, "", check.verb)
}
var result strings.Builder
@@ -307,7 +285,7 @@ func PlanMustGather(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
"- alternatively, advise the user to execute `oc apply` / `kubectl apply` instead. \n\n",
)
- if !keepResources {
+ if !params.KeepResources {
result.WriteString("Once the must-gather collection is completed, the user may wish to cleanup the created resources. \n" +
"- use the resources_delete tool to delete the namespace and the clusterrolebinding \n" +
"- or, execute cleanup using `kubectl delete`. \n\n")
@@ -316,7 +294,7 @@ func PlanMustGather(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
if !namespaceExists && isAllowed["create_namespace"] {
namespaceYaml, err := yaml.Marshal(namespaceObj)
if err != nil {
- return nil, fmt.Errorf("failed to marshal namespace to yaml: %w", err)
+ return "", fmt.Errorf("failed to marshal namespace to yaml: %w", err)
}
result.WriteString("```yaml\n")
@@ -334,7 +312,7 @@ func PlanMustGather(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
serviceAccountYaml, err := yaml.Marshal(serviceAccount)
if err != nil {
- return nil, fmt.Errorf("failed to marshal service account to yaml: %w", err)
+ return "", fmt.Errorf("failed to marshal service account to yaml: %w", err)
}
result.WriteString("```yaml\n")
result.Write(serviceAccountYaml)
@@ -346,7 +324,7 @@ func PlanMustGather(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
clusterRoleBindingYaml, err := yaml.Marshal(clusterRoleBinding)
if err != nil {
- return nil, fmt.Errorf("failed to marshal cluster role binding to yaml: %w", err)
+ return "", fmt.Errorf("failed to marshal cluster role binding to yaml: %w", err)
}
result.WriteString("```yaml\n")
@@ -359,7 +337,7 @@ func PlanMustGather(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
podYaml, err := yaml.Marshal(pod)
if err != nil {
- return nil, fmt.Errorf("failed to marshal pod to yaml: %w", err)
+ return "", fmt.Errorf("failed to marshal pod to yaml: %w", err)
}
result.WriteString("```yaml\n")
@@ -370,16 +348,15 @@ func PlanMustGather(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
result.WriteString("WARNING: The resources_create_or_update call does not have permission to create pod(s).\n")
}
- if hostNetwork && !isAllowed["use_scc_hostnetwork"] {
+ if params.HostNetwork && !isAllowed["use_scc_hostnetwork"] {
result.WriteString("WARNING: The resources_create_or_update call does not have permission to create pod(s) with hostNetwork: true.\n")
}
- return api.NewToolCallResult(result.String(), nil), nil
+ return result.String(), nil
}
-func getComponentImages(params api.ToolHandlerParams) ([]string, error) {
+func getComponentImages(ctx context.Context, dynamicClient dynamic.Interface) ([]string, error) {
var images []string
- k8sCore := kubernetes.NewCore(params)
appendImageFromAnnotation := func(obj runtime.Object) error {
unstruct, err := runtime.DefaultUnstructuredConverter.ToUnstructured(obj)
@@ -396,11 +373,13 @@ func getComponentImages(params api.ToolHandlerParams) ([]string, error) {
return nil
}
- clusterOperatorsList, err := k8sCore.ResourcesList(params, &schema.GroupVersionKind{
- Group: "config.openshift.io",
- Version: "v1",
- Kind: "ClusterOperator",
- }, "", api.ListOptions{})
+ // List ClusterOperators
+ clusterOperatorGVR := schema.GroupVersionResource{
+ Group: "config.openshift.io",
+ Version: "v1",
+ Resource: "clusteroperators",
+ }
+ clusterOperatorsList, err := dynamicClient.Resource(clusterOperatorGVR).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}
@@ -409,11 +388,13 @@ func getComponentImages(params api.ToolHandlerParams) ([]string, error) {
return images, err
}
- csvList, err := k8sCore.ResourcesList(params, &schema.GroupVersionKind{
- Group: "operators.coreos.com",
- Version: "v1alpha1",
- Kind: "ClusterServiceVersion",
- }, "", api.ListOptions{})
+ // List ClusterServiceVersions
+ csvGVR := schema.GroupVersionResource{
+ Group: "operators.coreos.com",
+ Version: "v1alpha1",
+ Resource: "clusterserviceversions",
+ }
+ csvList, err := dynamicClient.Resource(csvGVR).List(ctx, metav1.ListOptions{})
if err != nil {
return images, err
}
@@ -422,7 +403,8 @@ func getComponentImages(params api.ToolHandlerParams) ([]string, error) {
return images, err
}
-func parseNodeSelector(selector string) map[string]string {
+// ParseNodeSelector parses a comma-separated key=value selector string into a map.
+func ParseNodeSelector(selector string) map[string]string {
result := make(map[string]string)
pairs := strings.Split(selector, ",")
for _, pair := range pairs {
diff --git a/pkg/toolsets/openshift/mustgather/mustgather.go b/pkg/toolsets/openshift/mustgather/mustgather.go
index 3fc04c6fa..02ef5194f 100644
--- a/pkg/toolsets/openshift/mustgather/mustgather.go
+++ b/pkg/toolsets/openshift/mustgather/mustgather.go
@@ -1,6 +1,8 @@
package mustgather
import (
+ "context"
+
"github.com/containers/kubernetes-mcp-server/pkg/api"
"github.com/containers/kubernetes-mcp-server/pkg/ocp/mustgather"
"github.com/google/jsonschema-go/jsonschema"
@@ -75,6 +77,73 @@ func Tools() []api.ServerTool {
},
},
- Handler: mustgather.PlanMustGather,
+ Handler: planMustGather,
}}
}
+
+// planMustGather is the handler that parses arguments and calls the core
+// PlanMustGather tool.
+func planMustGather(params api.ToolHandlerParams) (*api.ToolCallResult, error) {
+ args := params.GetArguments()
+ ctx := context.Background()
+
+ mgParams := mustgather.PlanMustGatherParams{}
+
+ if args["node_name"] != nil {
+ mgParams.NodeName = args["node_name"].(string)
+ }
+
+ if args["node_selector"] != nil {
+ mgParams.NodeSelector = mustgather.ParseNodeSelector(args["node_selector"].(string))
+ }
+
+ if args["host_network"] != nil {
+ mgParams.HostNetwork = args["host_network"].(bool)
+ }
+
+ if args["source_dir"] != nil {
+ mgParams.SourceDir = args["source_dir"].(string)
+ }
+
+ if args["namespace"] != nil {
+ mgParams.Namespace = args["namespace"].(string)
+ }
+
+ if args["keep_resources"] != nil {
+ mgParams.KeepResources = args["keep_resources"].(bool)
+ }
+
+ if args["gather_command"] != nil {
+ mgParams.GatherCommand = args["gather_command"].(string)
+ }
+
+ if args["all_component_images"] != nil {
+ mgParams.AllImages = args["all_component_images"].(bool)
+ }
+
+ if args["images"] != nil {
+ if imagesArg, ok := args["images"].([]interface{}); ok {
+ for _, img := range imagesArg {
+ if imgStr, ok := img.(string); ok {
+ mgParams.Images = append(mgParams.Images, imgStr)
+ }
+ }
+ }
+ }
+
+ if args["timeout"] != nil {
+ mgParams.Timeout = args["timeout"].(string)
+ }
+
+ if args["since"] != nil {
+ mgParams.Since = args["since"].(string)
+ }
+
+ // params implements api.KubernetesClient
+ result, err := mustgather.PlanMustGather(ctx, params, mgParams)
+ if err != nil {
+ return api.NewToolCallResult("", err), nil
+ }
+
+ return api.NewToolCallResult(result, nil), nil
+}