diff --git a/internal/validation/suite.go b/internal/validation/suite.go index ad79d178..786df2b9 100644 --- a/internal/validation/suite.go +++ b/internal/validation/suite.go @@ -73,10 +73,6 @@ func RunInstanceLifecycleValidation(t *testing.T, config ProviderConfig) { require.NoError(t, err) require.NotEmpty(t, types, "Should have instance types") - locations, err := client.GetLocations(ctx, v1.GetLocationsArgs{}) - require.NoError(t, err) - require.NotEmpty(t, locations, "Should have locations") - t.Run("ValidateCreateInstance", func(t *testing.T) { attrs := v1.CreateInstanceAttrs{} selectedType := v1.InstanceType{} @@ -139,6 +135,8 @@ func RunInstanceLifecycleValidation(t *testing.T, config ProviderConfig) { require.NoError(t, err, "ValidateDockerFirewallAllowsContainerToContainerCommunication should pass - container to container communication should be allowed") }) + runMicroK8sFirewallValidation(ctx, t, client, instance) + if capabilities.IsCapable(v1.CapabilityStopStartInstance) && instance.Stoppable { t.Run("ValidateStopStartInstance", func(t *testing.T) { err := v1.ValidateStopStartInstance(ctx, client, instance) @@ -153,6 +151,18 @@ func RunInstanceLifecycleValidation(t *testing.T, config ProviderConfig) { }) } +func runMicroK8sFirewallValidation(ctx context.Context, t *testing.T, client v1.CloudInstanceReader, instance *v1.Instance) { + t.Run("ValidateMicroK8sFirewallAllowsEgress", func(t *testing.T) { + err := v1.ValidateMicroK8sFirewallAllowsEgress(ctx, client, instance, ssh.GetTestPrivateKey()) + require.NoError(t, err, "ValidateMicroK8sFirewallAllowsEgress should pass - microk8s pod egress should be allowed") + }) + + t.Run("ValidateMicroK8sFirewallAllowsPodToPodCommunication", func(t *testing.T) { + err := v1.ValidateMicroK8sFirewallAllowsPodToPodCommunication(ctx, client, instance, ssh.GetTestPrivateKey()) + require.NoError(t, err, "ValidateMicroK8sFirewallAllowsPodToPodCommunication should pass - microk8s pod to pod communication should be allowed") + }) +} + type NetworkValidationOpts struct { Name string RefID string @@ -342,6 +352,16 @@ func RunFirewallValidation(t *testing.T, config ProviderConfig, opts FirewallVal require.NoError(t, err, "ValidateDockerFirewallAllowsContainerToContainerCommunication should pass - container to container communication should be allowed") }) + t.Run("ValidateMicroK8sFirewallAllowsEgress", func(t *testing.T) { + err := v1.ValidateMicroK8sFirewallAllowsEgress(ctx, client, instance, ssh.GetTestPrivateKey()) + require.NoError(t, err, "ValidateMicroK8sFirewallAllowsEgress should pass - microk8s pod egress should be allowed") + }) + + t.Run("ValidateMicroK8sFirewallAllowsPodToPodCommunication", func(t *testing.T) { + err := v1.ValidateMicroK8sFirewallAllowsPodToPodCommunication(ctx, client, instance, ssh.GetTestPrivateKey()) + require.NoError(t, err, "ValidateMicroK8sFirewallAllowsPodToPodCommunication should pass - microk8s pod to pod communication should be allowed") + }) + // Test that SSH port is accessible (sanity check) t.Run("ValidateSSHPortAccessible", func(t *testing.T) { err := v1.ValidateFirewallAllowsPort(ctx, client, instance, ssh.GetTestPrivateKey(), instance.SSHPort) diff --git a/v1/networking_validation.go b/v1/networking_validation.go index 4fc4b8d9..b6c7fe88 100644 --- a/v1/networking_validation.go +++ b/v1/networking_validation.go @@ -280,6 +280,176 @@ func ValidateDockerFirewallAllowsContainerToContainerCommunication(ctx context.C return nil } +func ValidateMicroK8sFirewallAllowsEgress(ctx context.Context, client CloudInstanceReader, instance *Instance, privateKey string) error { + var err error + instance, err = WaitForInstanceLifecycleStatus(ctx, client, instance, LifecycleStatusRunning, PendingToRunningTimeout) + if err != nil { + return fmt.Errorf("failed to wait for instance running: %w", err) + } + + publicIP := instance.PublicIP + if publicIP == "" { + return fmt.Errorf("public IP is not available for instance %s", instance.CloudID) + } + + sshClient, err := ssh.ConnectToHost(ctx, ssh.ConnectionConfig{ + User: instance.SSHUser, + HostPort: fmt.Sprintf("%s:%d", publicIP, instance.SSHPort), + PrivKey: privateKey, + }) + if err != nil { + return fmt.Errorf("failed to SSH into instance: %w", err) + } + defer func() { _ = sshClient.Close() }() + + microK8sCmd, err := setupMicroK8sCommand(ctx, sshClient, instance.CloudID) + if err != nil { + return err + } + + // Ensure prior run artifacts do not interfere. + _, _, _ = sshClient.RunCommand(ctx, fmt.Sprintf("%s kubectl delete pod mk8s-egress-test --ignore-not-found=true", microK8sCmd)) + + cmd := fmt.Sprintf( + "%s kubectl run mk8s-egress-test --image=alpine:3.20 --restart=Never --command -- sh -c 'ping -c 3 8.8.8.8'", + microK8sCmd, + ) + _, stderr, err := sshClient.RunCommand(ctx, cmd) + if err != nil { + return fmt.Errorf("failed to create microk8s egress test pod: %w, stderr: %s", err, stderr) + } + + defer func() { + _, _, _ = sshClient.RunCommand(ctx, fmt.Sprintf("%s kubectl delete pod mk8s-egress-test --ignore-not-found=true", microK8sCmd)) + }() + + cmd = fmt.Sprintf("%s kubectl wait --for=jsonpath='{.status.phase}'=Succeeded pod/mk8s-egress-test --timeout=180s", microK8sCmd) + _, stderr, err = sshClient.RunCommand(ctx, cmd) + if err != nil { + logsCmd := fmt.Sprintf("%s kubectl logs mk8s-egress-test 2>/dev/null || true", microK8sCmd) + logs, _, _ := sshClient.RunCommand(ctx, logsCmd) + return fmt.Errorf("microk8s egress test pod did not succeed: %w, stderr: %s, logs: %s", err, stderr, logs) + } + + cmd = fmt.Sprintf("%s kubectl logs mk8s-egress-test", microK8sCmd) + stdout, stderr, err := sshClient.RunCommand(ctx, cmd) + if err != nil { + return fmt.Errorf("failed to get microk8s egress test pod logs: %w, stderr: %s", err, stderr) + } + if !strings.Contains(stdout, "3 packets transmitted, 3 packets received") { + return fmt.Errorf("expected successful pod egress ping, got logs: %s", stdout) + } + + return nil +} + +func ValidateMicroK8sFirewallAllowsPodToPodCommunication(ctx context.Context, client CloudInstanceReader, instance *Instance, privateKey string) error { + var err error + instance, err = WaitForInstanceLifecycleStatus(ctx, client, instance, LifecycleStatusRunning, PendingToRunningTimeout) + if err != nil { + return fmt.Errorf("failed to wait for instance running: %w", err) + } + + publicIP := instance.PublicIP + if publicIP == "" { + return fmt.Errorf("public IP is not available for instance %s", instance.CloudID) + } + + sshClient, err := ssh.ConnectToHost(ctx, ssh.ConnectionConfig{ + User: instance.SSHUser, + HostPort: fmt.Sprintf("%s:%d", publicIP, instance.SSHPort), + PrivKey: privateKey, + }) + if err != nil { + return fmt.Errorf("failed to SSH into instance: %w", err) + } + defer func() { _ = sshClient.Close() }() + + microK8sCmd, err := setupMicroK8sCommand(ctx, sshClient, instance.CloudID) + if err != nil { + return err + } + + cleanupMicroK8sPodToPodArtifacts(ctx, sshClient, microK8sCmd) + defer cleanupMicroK8sPodToPodArtifacts(ctx, sshClient, microK8sCmd) + + if err := createMicroK8sNginxPod(ctx, sshClient, microK8sCmd); err != nil { + return err + } + if err := waitForMicroK8sNginxReady(ctx, sshClient, microK8sCmd); err != nil { + return err + } + if err := exposeMicroK8sNginxService(ctx, sshClient, microK8sCmd); err != nil { + return err + } + return runMicroK8sPodToPodTest(ctx, sshClient, microK8sCmd) +} + +func cleanupMicroK8sPodToPodArtifacts(ctx context.Context, sshClient *ssh.Client, microK8sCmd string) { + _, _, _ = sshClient.RunCommand(ctx, fmt.Sprintf("%s kubectl delete pod mk8s-nginx mk8s-c2c-test --ignore-not-found=true", microK8sCmd)) + _, _, _ = sshClient.RunCommand(ctx, fmt.Sprintf("%s kubectl delete service mk8s-nginx-svc --ignore-not-found=true", microK8sCmd)) +} + +func createMicroK8sNginxPod(ctx context.Context, sshClient *ssh.Client, microK8sCmd string) error { + cmd := fmt.Sprintf("%s kubectl run mk8s-nginx --image=nginx:alpine --restart=Never --port=80", microK8sCmd) + _, stderr, err := sshClient.RunCommand(ctx, cmd) + if err != nil { + return fmt.Errorf("failed to create microk8s nginx pod: %w, stderr: %s", err, stderr) + } + + return nil +} + +func waitForMicroK8sNginxReady(ctx context.Context, sshClient *ssh.Client, microK8sCmd string) error { + cmd := fmt.Sprintf("%s kubectl wait --for=condition=Ready pod/mk8s-nginx --timeout=180s", microK8sCmd) + _, stderr, err := sshClient.RunCommand(ctx, cmd) + if err != nil { + return fmt.Errorf("microk8s nginx pod did not become ready: %w, stderr: %s", err, stderr) + } + + return nil +} + +func exposeMicroK8sNginxService(ctx context.Context, sshClient *ssh.Client, microK8sCmd string) error { + cmd := fmt.Sprintf("%s kubectl expose pod mk8s-nginx --name=mk8s-nginx-svc --port=80 --target-port=80", microK8sCmd) + _, stderr, err := sshClient.RunCommand(ctx, cmd) + if err != nil { + return fmt.Errorf("failed to create microk8s nginx service: %w, stderr: %s", err, stderr) + } + + return nil +} + +func runMicroK8sPodToPodTest(ctx context.Context, sshClient *ssh.Client, microK8sCmd string) error { + cmd := fmt.Sprintf( + "%s kubectl run mk8s-c2c-test --image=alpine:3.20 --restart=Never --command -- sh -c 'wget -q -O- http://mk8s-nginx-svc'", + microK8sCmd, + ) + _, stderr, err := sshClient.RunCommand(ctx, cmd) + if err != nil { + return fmt.Errorf("failed to create microk8s pod-to-pod test pod: %w, stderr: %s", err, stderr) + } + + cmd = fmt.Sprintf("%s kubectl wait --for=jsonpath='{.status.phase}'=Succeeded pod/mk8s-c2c-test --timeout=180s", microK8sCmd) + _, stderr, err = sshClient.RunCommand(ctx, cmd) + if err != nil { + logsCmd := fmt.Sprintf("%s kubectl logs mk8s-c2c-test 2>/dev/null || true", microK8sCmd) + logs, _, _ := sshClient.RunCommand(ctx, logsCmd) + return fmt.Errorf("microk8s pod-to-pod test pod did not succeed: %w, stderr: %s, logs: %s", err, stderr, logs) + } + + cmd = fmt.Sprintf("%s kubectl logs mk8s-c2c-test", microK8sCmd) + stdout, stderr, err := sshClient.RunCommand(ctx, cmd) + if err != nil { + return fmt.Errorf("failed to get microk8s pod-to-pod test pod logs: %w, stderr: %s", err, stderr) + } + if !strings.Contains(stdout, "Welcome to nginx") { + return fmt.Errorf("expected successful pod-to-pod communication, got logs: %s", stdout) + } + + return nil +} + // setupDockerCommand ensures Docker is available and returns the command to use (always with sudo) func setupDockerCommand(ctx context.Context, sshClient *ssh.Client, instanceID CloudProviderInstanceID) (string, error) { // Check if Docker is available @@ -301,6 +471,30 @@ func setupDockerCommand(ctx context.Context, sshClient *ssh.Client, instanceID C return "sudo docker", nil } +// setupMicroK8sCommand ensures MicroK8s is available and returns the command to use (always with sudo). +func setupMicroK8sCommand(ctx context.Context, sshClient *ssh.Client, instanceID CloudProviderInstanceID) (string, error) { + checkCmd := "sudo microk8s status --wait-ready --timeout 120" + _, _, err := sshClient.RunCommand(ctx, checkCmd) + if err != nil { + fmt.Printf("MicroK8s not found or not ready, attempting to install on instance %s\n", instanceID) + _, stderr, installErr := sshClient.RunCommand(ctx, "sudo snap install microk8s --classic") + if installErr != nil { + return "", fmt.Errorf("microk8s not available and failed to install: %w, stderr: %s", installErr, stderr) + } + _, stderr, readyErr := sshClient.RunCommand(ctx, checkCmd) + if readyErr != nil { + return "", fmt.Errorf("microk8s installed but not ready: %w, stderr: %s", readyErr, stderr) + } + } + + _, stderr, err := sshClient.RunCommand(ctx, "sudo microk8s enable dns") + if err != nil && !strings.Contains(stderr, "Nothing to do for dns") && !strings.Contains(stderr, "is already enabled") { + return "", fmt.Errorf("failed to enable microk8s dns addon: %w, stderr: %s", err, stderr) + } + + return "sudo microk8s", nil +} + // waitForDockerService waits for a Docker container's service to be ready and responding func waitForDockerService(ctx context.Context, sshClient *ssh.Client, dockerCmd, containerName string, port int) error { for i := 0; i < 30; i++ { // Try for up to 30 seconds diff --git a/v1/providers/nebius/instance.go b/v1/providers/nebius/instance.go index 3460efdb..3efcdc4f 100644 --- a/v1/providers/nebius/instance.go +++ b/v1/providers/nebius/instance.go @@ -1815,8 +1815,14 @@ func generateIPTablesCommands() []string { "iptables -A DOCKER-USER -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT", "iptables -A DOCKER-USER -i docker0 ! -o docker0 -j ACCEPT", "iptables -A DOCKER-USER -i br+ ! -o br+ -j ACCEPT", + "iptables -A DOCKER-USER -i cni+ ! -o cni+ -j ACCEPT", + "iptables -A DOCKER-USER -i cali+ ! -o cali+ -j ACCEPT", "iptables -A DOCKER-USER -i docker0 -o docker0 -j ACCEPT", "iptables -A DOCKER-USER -i br+ -o br+ -j ACCEPT", + "iptables -A DOCKER-USER -i cni+ -o cni+ -j ACCEPT", + "iptables -A DOCKER-USER -i cali+ -o cali+ -j ACCEPT", + "iptables -A DOCKER-USER -i flannel+ -j ACCEPT", + "iptables -A DOCKER-USER -i vxlan.calico -j ACCEPT", "iptables -A DOCKER-USER -i lo -j ACCEPT", "iptables -A DOCKER-USER -j DROP", "iptables -A DOCKER-USER -j RETURN", // Expected by Docker diff --git a/v1/providers/shadeform/firewall.go b/v1/providers/shadeform/firewall.go index 13a00137..bd2b5017 100644 --- a/v1/providers/shadeform/firewall.go +++ b/v1/providers/shadeform/firewall.go @@ -24,10 +24,16 @@ const ( // Allow containers to initiate outbound traffic (default bridge + user-defined bridges). ipTablesAllowDockerUserOutboundInit0 = "iptables -A DOCKER-USER -i docker0 ! -o docker0 -j ACCEPT" ipTablesAllowDockerUserOutboundInit1 = "iptables -A DOCKER-USER -i br+ ! -o br+ -j ACCEPT" + ipTablesAllowDockerUserOutboundInit2 = "iptables -A DOCKER-USER -i cni+ ! -o cni+ -j ACCEPT" + ipTablesAllowDockerUserOutboundInit3 = "iptables -A DOCKER-USER -i cali+ ! -o cali+ -j ACCEPT" // Allow container-to-container on the same bridge. ipTablesAllowDockerUserDockerToDocker0 = "iptables -A DOCKER-USER -i docker0 -o docker0 -j ACCEPT" ipTablesAllowDockerUserDockerToDocker1 = "iptables -A DOCKER-USER -i br+ -o br+ -j ACCEPT" + ipTablesAllowDockerUserDockerToDocker2 = "iptables -A DOCKER-USER -i cni+ -o cni+ -j ACCEPT" + ipTablesAllowDockerUserDockerToDocker3 = "iptables -A DOCKER-USER -i cali+ -o cali+ -j ACCEPT" + ipTablesAllowDockerUserFlannel = "iptables -A DOCKER-USER -i flannel+ -j ACCEPT" + ipTablesAllowDockerUserCalicoVXLAN = "iptables -A DOCKER-USER -i vxlan.calico -j ACCEPT" // Allow inbound traffic on the loopback interface. ipTablesAllowDockerUserInpboundLoopback = "iptables -A DOCKER-USER -i lo -j ACCEPT" @@ -80,8 +86,14 @@ func (c *ShadeformClient) getIPTablesCommands() []string { ipTablesAllowDockerUserOutbound, ipTablesAllowDockerUserOutboundInit0, ipTablesAllowDockerUserOutboundInit1, + ipTablesAllowDockerUserOutboundInit2, + ipTablesAllowDockerUserOutboundInit3, ipTablesAllowDockerUserDockerToDocker0, ipTablesAllowDockerUserDockerToDocker1, + ipTablesAllowDockerUserDockerToDocker2, + ipTablesAllowDockerUserDockerToDocker3, + ipTablesAllowDockerUserFlannel, + ipTablesAllowDockerUserCalicoVXLAN, ipTablesAllowDockerUserInpboundLoopback, ipTablesDropDockerUserInbound, ipTablesReturnDockerUser, // Expected by Docker