|
| 1 | +package io.sentrius.agent.launcher.service; |
| 2 | + |
| 3 | +import java.io.IOException; |
| 4 | +import java.util.ArrayList; |
| 5 | +import java.util.List; |
| 6 | +import java.util.Map; |
| 7 | +import java.util.Optional; |
| 8 | +import java.util.regex.Matcher; |
| 9 | +import java.util.regex.Pattern; |
| 10 | +import io.kubernetes.client.custom.IntOrString; |
| 11 | +import io.kubernetes.client.custom.Quantity; |
| 12 | +import io.kubernetes.client.openapi.ApiClient; |
| 13 | +import io.kubernetes.client.openapi.ApiException; |
| 14 | +import io.kubernetes.client.openapi.apis.CoreV1Api; |
| 15 | +import io.kubernetes.client.openapi.models.V1ConfigMapVolumeSource; |
| 16 | +import io.kubernetes.client.openapi.models.V1Container; |
| 17 | +import io.kubernetes.client.openapi.models.V1ObjectMeta; |
| 18 | +import io.kubernetes.client.openapi.models.V1Pod; |
| 19 | +import io.kubernetes.client.openapi.models.V1PodSpec; |
| 20 | +import io.kubernetes.client.openapi.models.V1PodStatus; |
| 21 | +import io.kubernetes.client.openapi.models.V1ResourceRequirements; |
| 22 | +import io.kubernetes.client.openapi.models.V1Service; |
| 23 | +import io.kubernetes.client.openapi.models.V1ServicePort; |
| 24 | +import io.kubernetes.client.openapi.models.V1ServiceSpec; |
| 25 | +import io.kubernetes.client.openapi.models.V1Volume; |
| 26 | +import io.kubernetes.client.openapi.models.V1VolumeMount; |
| 27 | +import io.kubernetes.client.util.Config; |
| 28 | +import io.sentrius.sso.core.dto.AgentRegistrationDTO; |
| 29 | +import io.sentrius.sso.core.model.AgentStatus; |
| 30 | +import lombok.extern.slf4j.Slf4j; |
| 31 | +import org.springframework.beans.factory.annotation.Value; |
| 32 | +import org.springframework.scheduling.annotation.Async; |
| 33 | +import org.springframework.scheduling.annotation.Scheduled; |
| 34 | +import org.springframework.stereotype.Service; |
| 35 | + |
| 36 | +@Slf4j |
| 37 | +@Service |
| 38 | +public class PodMonitor { |
| 39 | + |
| 40 | + private final CoreV1Api coreV1Api; |
| 41 | + |
| 42 | + @Value("${sentrius.agent.registry}") |
| 43 | + private String agentRegistry; |
| 44 | + |
| 45 | + @Value("${sentrius.agent.namespace}") |
| 46 | + private String agentNamespace; |
| 47 | + |
| 48 | + public PodMonitor() throws IOException { |
| 49 | + ApiClient client = Config.defaultClient(); // in-cluster or kubeconfig |
| 50 | + this.coreV1Api = new CoreV1Api(client); |
| 51 | + } |
| 52 | + |
| 53 | + @Scheduled(fixedDelay = 60000) // Runs every 60 seconds |
| 54 | + @Async |
| 55 | + public void removePodsInErrorState() throws ApiException { |
| 56 | + |
| 57 | + log.info("Identifying pods to be removed"); |
| 58 | + var pods = coreV1Api.listNamespacedPod( |
| 59 | + agentNamespace |
| 60 | + ).execute().getItems(); |
| 61 | + |
| 62 | + List<V1Pod> podsToRemove = new ArrayList<>(); |
| 63 | + for (V1Pod pod : pods) { |
| 64 | + |
| 65 | + var podName = pod.getMetadata().getName(); |
| 66 | + |
| 67 | + if (podName == null || !podName.startsWith("sentrius-agent-")) { |
| 68 | + log.info("Skipping pod {}", podName); |
| 69 | + continue; |
| 70 | + } |
| 71 | + V1PodStatus status = pod.getStatus(); |
| 72 | + if (status == null) { |
| 73 | + log.warn("Pod {} has no status information", podName); |
| 74 | + continue; |
| 75 | + } |
| 76 | + |
| 77 | + String phase = status.getPhase(); // e.g., "Running", "Pending", "Failed", "Succeeded" |
| 78 | + if ("Error".equalsIgnoreCase(phase) || "Failed".equalsIgnoreCase(phase)) { |
| 79 | + log.info("Pod {} is in phase {}, adding to removal list", podName, phase); |
| 80 | + podsToRemove.add(pod); |
| 81 | + } else { |
| 82 | + log.info("Pod {} is in phase {}, skipping", podName, phase); |
| 83 | + } |
| 84 | + } |
| 85 | + |
| 86 | + for (V1Pod pod : podsToRemove) { |
| 87 | + var podName = pod.getMetadata().getName(); |
| 88 | + try { |
| 89 | + assert podName != null; |
| 90 | + coreV1Api.deleteNamespacedPod( |
| 91 | + podName, |
| 92 | + agentNamespace |
| 93 | + ).execute(); |
| 94 | + log.info("Deleted pod {}", podName); |
| 95 | + } catch (ApiException e) { |
| 96 | + log.error("Failed to delete pod {}: {}", podName, e.getResponseBody()); |
| 97 | + } |
| 98 | + } |
| 99 | + |
| 100 | + |
| 101 | + } |
| 102 | +} |
0 commit comments