diff --git a/Makefile b/Makefile index 3ed7879c16e..c7342038963 100644 --- a/Makefile +++ b/Makefile @@ -200,6 +200,7 @@ karpenter-api: $(CONTROLLER_GEN) $(YQ) karpenter-operator/hack/adjust-cel.sh $(CONTROLLER_GEN) $(CRD_OPTIONS) paths="./api/karpenter/..." output:crd:artifacts:config=karpenter-operator/controllers/karpenter/assets cp karpenter-operator/controllers/karpenter/assets/karpenter.hypershift.openshift.io_openshiftec2nodeclasses.yaml karpenter-operator/controllers/karpenter/assets/zz_generated.crd-manifests/openshiftec2nodeclasses.crd.yaml + $(GO) run ./hack/kubelet-ratcheting-gen/main.go .PHONY: control-plane-operator control-plane-operator: diff --git a/api/.golangci.yml b/api/.golangci.yml index 822c8a8d9b6..6a6b9124223 100644 --- a/api/.golangci.yml +++ b/api/.golangci.yml @@ -319,6 +319,27 @@ linters: - kubeapilinter path: hypershift/v1beta1/openstack.go text: 'nobools: field OpenStackPlatformSpec.DisableExternalNetwork pointer should not use a bool. Use a string type with meaningful constant values as an enum.' + - linters: + - kubeapilinter + path: karpenter/v1/kubelet_config.go + text: 'nobools: field KubeletConfiguration.CPUCFSQuota pointer should not use a bool. Use a string type with meaningful constant values as an enum.' + + - linters: + - kubeapilinter + path: karpenter/v1/karpenter_types.go + text: 'minlength: field OpenshiftEC2NodeClassSpec.Kubelet type KubeletConfiguration must have a minimum properties, add kubebuilder:validation:MinProperties marker' + - linters: + - kubeapilinter + path: karpenter/v1/karpenter_types.go + text: 'optionalfields: field OpenshiftEC2NodeClassSpec.Kubelet has a valid zero value' + - linters: + - kubeapilinter + path: karpenter/v1/kubelet_config.go + text: 'nodurations: field KubeletConfiguration.EvictionSoftGracePeriod map value should not use a Duration. Use an integer type with units in the name to avoid the need for clients to implement Go style duration parsing.' + - linters: + - kubeapilinter + path: karpenter/v1/kubelet_config.go + text: 'nomaps: field KubeletConfiguration\.(SystemReserved|KubeReserved|EvictionHard|EvictionSoft|EvictionSoftGracePeriod)' # statussubresource (1 issue) - linters: diff --git a/api/AGENTS.md b/api/AGENTS.md index 9dc4ee7f33e..57075e26c8f 100644 --- a/api/AGENTS.md +++ b/api/AGENTS.md @@ -55,6 +55,33 @@ To avoid introducing new dependencies, do not add utils or methods to the API ty - **Ratcheting validation**: when adding new validation to existing fields, verify that existing clusters with values that predate the new validation can still be updated. CRD validation ratchets (allows unchanged invalid values through), but only for fields that are literally unchanged in the update. +### KubeletConfiguration Field Graduation (Karpenter) + +The `KubeletConfiguration` type in `api/karpenter/v1/kubelet_config.go` accepts arbitrary kubelet +configuration fields via a `PreserveUnknownFields` schema and a `runtime.RawExtension` overflow +mechanism. A subset of these fields are promoted to explicitly typed struct fields with CEL validation. + +When graduating a field from overflow to a typed struct field: + +- **Match upstream kubelet's field name and JSON tag exactly.** The primary compatibility target is + `k8s.io/kubelet/config/v1beta1.KubeletConfiguration`. Users today set kubelet fields that land in + our overflow using kubelet field names. Changing a name would be a breaking API change requiring a + new API version, because existing serialized data (and user-supplied YAML) uses the kubelet name. +- **Use upstream Karpenter as a secondary reference.** Upstream Karpenter + (`github.com/aws/karpenter-provider-aws/pkg/apis/v1.KubeletConfiguration`) currently mirrors + kubelet field names. If Karpenter ever diverges from kubelet naming, follow kubelet and add a + translation layer in `karpenterKubeletConfigurationFromNodeClassSpec()`. +- **Match the Go type from upstream kubelet/Karpenter.** Use the same Go type (pointer vs value, map + key/value types) to keep the mapping trivial and avoid serialization incompatibilities. Minor + differences (e.g., bare `int32` vs `*int32`) are acceptable only when they don't change the JSON + representation. +- **Add stricter CEL validation as needed.** Admission-time validation does not affect serialization + compatibility. We can add constraints beyond what upstream provides (e.g., `Minimum=1` instead of + `Minimum=0`, cross-field rules like `imageGCHigh > imageGCLow`) without breaking the API. +- **Do not rename fields for OpenShift conventions.** Normally OpenShift API conventions may prefer + different naming. For KubeletConfiguration fields, upstream kubelet compatibility takes precedence + over convention to avoid breaking changes. + ## API Type Change Guidelines ### N-1 and N+1 Compatibility diff --git a/api/karpenter/v1/karpenter_types.go b/api/karpenter/v1/karpenter_types.go index beba4471e10..902c6e1a95c 100644 --- a/api/karpenter/v1/karpenter_types.go +++ b/api/karpenter/v1/karpenter_types.go @@ -275,6 +275,11 @@ type OpenshiftEC2NodeClassSpec struct { // +kubebuilder:validation:MaxLength=64 // +optional Version string `json:"version,omitempty"` + + // kubelet configures kubelet settings for nodes provisioned by this NodeClass. + // These settings are injected into the node's ignition configuration via MachineConfig. + // +optional + Kubelet KubeletConfiguration `json:"kubelet,omitzero"` } // SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. diff --git a/api/karpenter/v1/kubelet_config.go b/api/karpenter/v1/kubelet_config.go new file mode 100644 index 00000000000..7e3b8b75779 --- /dev/null +++ b/api/karpenter/v1/kubelet_config.go @@ -0,0 +1,219 @@ +package v1 + +import ( + "encoding/json" + "reflect" + "strings" + + "k8s.io/apimachinery/pkg/runtime" +) + +// kubeletConfigKnownFields is the set of JSON keys corresponding to the explicitly typed +// fields in KubeletConfiguration. It is derived from the struct's json tags at init time +// so it stays in sync automatically when fields are added or removed. +var kubeletConfigKnownFields map[string]struct{} + +func init() { + t := reflect.TypeOf(KubeletConfiguration{}) + kubeletConfigKnownFields = make(map[string]struct{}, t.NumField()) + for i := range t.NumField() { + f := t.Field(i) + if tag, ok := f.Tag.Lookup("json"); ok { + name, _, _ := strings.Cut(tag, ",") + if name != "" && name != "-" { + kubeletConfigKnownFields[name] = struct{}{} + } + } + } +} + +// EvictionThreshold is a threshold value for a kubelet eviction signal. +// Values are either a percentage (e.g. "10%") or a Kubernetes quantity (e.g. "100Mi"). +// +kubebuilder:validation:MaxLength=64 +type EvictionThreshold string + +// KubeletConfiguration configures kubelet settings for nodes provisioned by this NodeClass. +// These settings are injected into the node's ignition configuration via MachineConfig. +// The fields listed below are validated at admission time. Additional kubelet configuration +// fields beyond those listed here are also accepted and will be passed through to the node's +// kubelet configuration without validation. Overflow fields bypass all CRD validation; +// invalid overflow values will cause node bootstrap failures (kubelet crash loop) rather +// than admission errors. +// When graduating new fields from overflow to typed fields, match upstream kubelet's +// field names and types exactly. See api/AGENTS.md "KubeletConfiguration Field Graduation" +// for the full strategy. +// +kubebuilder:pruning:PreserveUnknownFields +// +kubebuilder:validation:XValidation:rule="!has(self.imageGCHighThresholdPercent) || !has(self.imageGCLowThresholdPercent) || self.imageGCHighThresholdPercent > self.imageGCLowThresholdPercent",message="imageGCHighThresholdPercent must be greater than imageGCLowThresholdPercent" +// +kubebuilder:validation:XValidation:rule="!has(self.podsPerCore) || !has(self.maxPods) || self.podsPerCore <= self.maxPods",message="podsPerCore must not exceed maxPods" +// +kubebuilder:validation:XValidation:rule="!has(self.evictionSoft) || (has(self.evictionSoftGracePeriod) && self.evictionSoft.all(e, e in self.evictionSoftGracePeriod))",message="evictionSoft entry does not have a matching evictionSoftGracePeriod" +// +kubebuilder:validation:XValidation:rule="!has(self.evictionSoftGracePeriod) || (has(self.evictionSoft) && self.evictionSoftGracePeriod.all(e, e in self.evictionSoft))",message="evictionSoftGracePeriod entry does not have a matching evictionSoft" +// +kubebuilder:validation:XValidation:rule="!has(self.evictionHard) || !has(self.evictionSoft) || self.evictionHard.all(key, !(key in self.evictionSoft) || ((self.evictionSoft[key].endsWith('%') && self.evictionHard[key].endsWith('%')) ? (self.evictionSoft[key].size() <= 1 || self.evictionHard[key].size() <= 1 || double(self.evictionSoft[key].substring(0, self.evictionSoft[key].size() - 1)) >= double(self.evictionHard[key].substring(0, self.evictionHard[key].size() - 1))) : (!(isQuantity(self.evictionSoft[key]) && isQuantity(self.evictionHard[key])) || quantity(self.evictionSoft[key]).compareTo(quantity(self.evictionHard[key])) >= 0)))",message="evictionSoft threshold must be greater than or equal to evictionHard threshold for the same signal (soft eviction should fire before hard)" +type KubeletConfiguration struct { + // maxPods is the maximum number of pods that can run on a node. + // The value must be between 1 and 2500, inclusive. + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=2500 + // +optional + MaxPods int32 `json:"maxPods,omitempty"` + // podsPerCore is the maximum number of pods per core. The value must be between 1 and 2500, + // inclusive, and cannot exceed maxPods when both are set. + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=2500 + // +optional + PodsPerCore int32 `json:"podsPerCore,omitempty"` + // systemReserved is a set of ResourceName=ResourceQuantity pairs that describe + // resources reserved for non-kubernetes components. + // Currently only cpu, memory, ephemeral-storage, and pid are supported. + // +kubebuilder:validation:XValidation:message="valid keys for systemReserved are ['cpu','memory','ephemeral-storage','pid']",rule="self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' || x=='pid')" + // +kubebuilder:validation:XValidation:message="systemReserved value cannot be a negative resource quantity",rule="self.all(x, !self[x].startsWith('-'))" + // +kubebuilder:validation:XValidation:message="systemReserved values must not be empty",rule="self.all(x, self[x].size() > 0)" + // +kubebuilder:validation:MinProperties=1 + // +kubebuilder:validation:MaxProperties=20 + // +optional + SystemReserved map[string]string `json:"systemReserved,omitempty"` + // kubeReserved is a set of ResourceName=ResourceQuantity pairs that describe + // resources reserved for kubernetes system components. + // Currently only cpu, memory, ephemeral-storage, and pid are supported. + // +kubebuilder:validation:XValidation:message="valid keys for kubeReserved are ['cpu','memory','ephemeral-storage','pid']",rule="self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' || x=='pid')" + // +kubebuilder:validation:XValidation:message="kubeReserved value cannot be a negative resource quantity",rule="self.all(x, !self[x].startsWith('-'))" + // +kubebuilder:validation:XValidation:message="kubeReserved values must not be empty",rule="self.all(x, self[x].size() > 0)" + // +kubebuilder:validation:MinProperties=1 + // +kubebuilder:validation:MaxProperties=20 + // +optional + KubeReserved map[string]string `json:"kubeReserved,omitempty"` + // evictionHard is a map of signal names to quantities that defines hard eviction thresholds. + // +kubebuilder:validation:XValidation:message="valid keys for evictionHard are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']",rule="self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'])" + // +kubebuilder:validation:XValidation:message="evictionHard values must not be empty",rule="self.all(x, self[x].size() > 0)" + // +kubebuilder:validation:MinProperties=1 + // +kubebuilder:validation:MaxProperties=20 + // +optional + EvictionHard map[string]EvictionThreshold `json:"evictionHard,omitempty"` + // evictionSoft is a map of signal names to quantities that defines soft eviction thresholds. + // +kubebuilder:validation:XValidation:message="valid keys for evictionSoft are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']",rule="self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'])" + // +kubebuilder:validation:XValidation:message="evictionSoft values must not be empty",rule="self.all(x, self[x].size() > 0)" + // +kubebuilder:validation:MinProperties=1 + // +kubebuilder:validation:MaxProperties=20 + // +optional + EvictionSoft map[string]EvictionThreshold `json:"evictionSoft,omitempty"` + // evictionSoftGracePeriod is a map of signal names to quantities that defines grace periods + // for each soft eviction signal. + // +kubebuilder:validation:XValidation:message="valid keys for evictionSoftGracePeriod are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']",rule="self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'])" + // +kubebuilder:validation:MinProperties=1 + // +kubebuilder:validation:MaxProperties=20 + // +optional + EvictionSoftGracePeriod map[string]string `json:"evictionSoftGracePeriod,omitempty"` + // evictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use + // when terminating pods in response to soft eviction thresholds. + // +optional + EvictionMaxPodGracePeriod *int32 `json:"evictionMaxPodGracePeriod,omitempty"` + // imageGCHighThresholdPercent is the percent of disk usage which triggers image garbage collection. + // The value must be between 0 and 100, inclusive, and must be greater than imageGCLowThresholdPercent when both are set. + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=100 + // +optional + ImageGCHighThresholdPercent *int32 `json:"imageGCHighThresholdPercent,omitempty"` + // imageGCLowThresholdPercent is the percent of disk usage to which image garbage collection attempts to free. + // The value must be between 0 and 100, inclusive, and must be less than imageGCHighThresholdPercent when both are set. + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=100 + // +optional + ImageGCLowThresholdPercent *int32 `json:"imageGCLowThresholdPercent,omitempty"` + // cpuCFSQuota enables CPU CFS quota enforcement for containers that specify CPU limits. + // +optional + CPUCFSQuota *bool `json:"cpuCFSQuota,omitempty"` + + // Overflow holds additional kubelet configuration fields not explicitly defined above. + // These fields are preserved during serialization and deserialization, allowing arbitrary + // kubelet configuration to pass through to the node's ignition/MachineConfig. + Overflow runtime.RawExtension `json:"-"` +} + +// UnmarshalJSON implements custom JSON unmarshaling for KubeletConfiguration. +// It deserializes known fields into the struct and captures all additional fields +// into the overflow map for pass-through. +func (k *KubeletConfiguration) UnmarshalJSON(data []byte) error { + // Zero the receiver so that fields absent from the new input + // (including Overflow, which is json:"-") do not survive from a + // previous decode. + *k = KubeletConfiguration{} + + // Unmarshal known fields via alias to avoid infinite recursion + type Alias KubeletConfiguration + aux := &struct{ *Alias }{Alias: (*Alias)(k)} + if err := json.Unmarshal(data, aux); err != nil { + return err + } + + // Unmarshal everything into a raw map + var raw map[string]json.RawMessage + if err := json.Unmarshal(data, &raw); err != nil { + return err + } + + // Separate unknown fields into overflow + for key := range kubeletConfigKnownFields { + delete(raw, key) + } + if len(raw) > 0 { + overflowBytes, err := json.Marshal(raw) + if err != nil { + return err + } + k.Overflow = runtime.RawExtension{Raw: overflowBytes} + } + return nil +} + +// MarshalJSON implements custom JSON marshaling for KubeletConfiguration. +// It serializes the known typed fields and merges any overflow fields back in. +func (k KubeletConfiguration) MarshalJSON() ([]byte, error) { + // Marshal known fields via alias to avoid infinite recursion + type Alias KubeletConfiguration + data, err := json.Marshal((*Alias)(&k)) + if err != nil { + return nil, err + } + + if len(k.Overflow.Raw) == 0 { + return data, nil + } + + // Merge overflow fields into the output; structured fields win on conflict. + var overflowMap map[string]json.RawMessage + if err := json.Unmarshal(k.Overflow.Raw, &overflowMap); err != nil { + return nil, err + } + var structured map[string]json.RawMessage + if err := json.Unmarshal(data, &structured); err != nil { + return nil, err + } + for key, val := range structured { + overflowMap[key] = val + } + return json.Marshal(overflowMap) +} + +// HasTypedFields reports whether any explicitly defined struct fields are set. +// This is used by IsZero, but is separate so we can differentiate the zero case +// from "only overflow fields set". This must be kept in sync with the typed +// fields in KubeletConfiguration. +func (k KubeletConfiguration) HasTypedFields() bool { + return k.MaxPods != 0 || + k.PodsPerCore != 0 || + k.SystemReserved != nil || + k.KubeReserved != nil || + k.EvictionHard != nil || + k.EvictionSoft != nil || + k.EvictionSoftGracePeriod != nil || + k.EvictionMaxPodGracePeriod != nil || + k.ImageGCHighThresholdPercent != nil || + k.ImageGCLowThresholdPercent != nil || + k.CPUCFSQuota != nil +} + +// IsZero reports whether the KubeletConfiguration is empty (no typed fields set and +// no overflow fields). This is used by the omitzero JSON tag to determine whether the +// field should be omitted during serialization. +func (k KubeletConfiguration) IsZero() bool { + return !k.HasTypedFields() && len(k.Overflow.Raw) == 0 +} diff --git a/api/karpenter/v1/kubelet_config_test.go b/api/karpenter/v1/kubelet_config_test.go new file mode 100644 index 00000000000..be17f6a3ad2 --- /dev/null +++ b/api/karpenter/v1/kubelet_config_test.go @@ -0,0 +1,234 @@ +package v1 + +import ( + "encoding/json" + "reflect" + "testing" + + "k8s.io/utils/ptr" +) + +func TestKubeletConfigurationMarshalRoundTrip(t *testing.T) { + testCases := []struct { + name string + config KubeletConfiguration + }{ + { + name: "When all typed fields are set they should round-trip", + config: KubeletConfiguration{ + MaxPods: 110, + PodsPerCore: 10, + SystemReserved: map[string]string{ + "cpu": "100m", + "memory": "256Mi", + }, + KubeReserved: map[string]string{ + "cpu": "200m", + "memory": "512Mi", + }, + EvictionHard: map[string]EvictionThreshold{ + "memory.available": "100Mi", + }, + EvictionSoft: map[string]EvictionThreshold{ + "memory.available": "200Mi", + }, + EvictionSoftGracePeriod: map[string]string{ + "memory.available": "30s", + }, + EvictionMaxPodGracePeriod: ptr.To(int32(60)), + ImageGCHighThresholdPercent: ptr.To(int32(85)), + ImageGCLowThresholdPercent: ptr.To(int32(80)), + CPUCFSQuota: ptr.To(true), + }, + }, + { + name: "When only some fields are set they should round-trip", + config: KubeletConfiguration{ + MaxPods: 50, + CPUCFSQuota: ptr.To(false), + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + data, err := json.Marshal(tc.config) + if err != nil { + t.Fatalf("failed to marshal: %v", err) + } + + var roundTripped KubeletConfiguration + if err := json.Unmarshal(data, &roundTripped); err != nil { + t.Fatalf("failed to unmarshal: %v", err) + } + + if !reflect.DeepEqual(tc.config, roundTripped) { + t.Errorf("round-trip mismatch:\n original: %+v\n round-tripped: %+v", tc.config, roundTripped) + } + }) + } +} + +func TestKubeletConfigurationOverflowPreservation(t *testing.T) { + t.Run("When unknown fields are present they should be preserved through marshal/unmarshal", func(t *testing.T) { + input := `{ + "maxPods": 110, + "registryPullQPS": 5, + "registryBurst": 10, + "containerLogMaxSize": "10Mi", + "podPidsLimit": 4096 + }` + + var config KubeletConfiguration + if err := json.Unmarshal([]byte(input), &config); err != nil { + t.Fatalf("failed to unmarshal: %v", err) + } + + // Typed field should be populated + if config.MaxPods != 110 { + t.Errorf("expected MaxPods=110, got %v", config.MaxPods) + } + + // Marshal back + data, err := json.Marshal(config) + if err != nil { + t.Fatalf("failed to marshal: %v", err) + } + + // Verify overflow fields are present in output + var raw map[string]json.RawMessage + if err := json.Unmarshal(data, &raw); err != nil { + t.Fatalf("failed to unmarshal to raw map: %v", err) + } + + for _, key := range []string{"maxPods", "registryPullQPS", "registryBurst", "containerLogMaxSize", "podPidsLimit"} { + if _, ok := raw[key]; !ok { + t.Errorf("expected key %q to be present in marshaled output", key) + } + } + }) + + t.Run("When only overflow fields are present they should round-trip", func(t *testing.T) { + input := `{"registryPullQPS": 5, "registryBurst": 10}` + + var config KubeletConfiguration + if err := json.Unmarshal([]byte(input), &config); err != nil { + t.Fatalf("failed to unmarshal: %v", err) + } + + data, err := json.Marshal(config) + if err != nil { + t.Fatalf("failed to marshal: %v", err) + } + + var raw map[string]json.RawMessage + if err := json.Unmarshal(data, &raw); err != nil { + t.Fatalf("failed to unmarshal to raw map: %v", err) + } + + if len(raw) != 2 { + t.Errorf("expected 2 keys, got %d: %v", len(raw), raw) + } + if _, ok := raw["registryPullQPS"]; !ok { + t.Error("expected registryPullQPS in output") + } + if _, ok := raw["registryBurst"]; !ok { + t.Error("expected registryBurst in output") + } + }) + + t.Run("When a structured field conflicts with an overflow field the structured field should win", func(t *testing.T) { + input := `{"maxPods": 110, "registryPullQPS": 5}` + var config KubeletConfiguration + if err := json.Unmarshal([]byte(input), &config); err != nil { + t.Fatalf("failed to unmarshal: %v", err) + } + + // Simulate a stale overflow that still contains maxPods from a prior serialization + config.Overflow.Raw = []byte(`{"maxPods": 999, "registryPullQPS": 5}`) + config.MaxPods = 42 + + data, err := json.Marshal(config) + if err != nil { + t.Fatalf("failed to marshal: %v", err) + } + + var raw map[string]json.RawMessage + if err := json.Unmarshal(data, &raw); err != nil { + t.Fatalf("failed to unmarshal to raw map: %v", err) + } + + if string(raw["maxPods"]) != "42" { + t.Errorf("expected structured maxPods=42 to win, got %s", raw["maxPods"]) + } + if _, ok := raw["registryPullQPS"]; !ok { + t.Error("expected overflow field registryPullQPS to still be present") + } + }) + + t.Run("When there are no overflow fields the output should contain only typed fields", func(t *testing.T) { + config := KubeletConfiguration{ + MaxPods: 110, + } + + data, err := json.Marshal(config) + if err != nil { + t.Fatalf("failed to marshal: %v", err) + } + + var raw map[string]json.RawMessage + if err := json.Unmarshal(data, &raw); err != nil { + t.Fatalf("failed to unmarshal to raw map: %v", err) + } + + if len(raw) != 1 { + t.Errorf("expected 1 key, got %d: %v", len(raw), raw) + } + }) +} + +func TestKubeletConfigurationDeepCopy(t *testing.T) { + t.Run("When overflow fields are present DeepCopy should preserve them", func(t *testing.T) { + input := `{"maxPods": 110, "registryPullQPS": 5}` + + var original KubeletConfiguration + if err := json.Unmarshal([]byte(input), &original); err != nil { + t.Fatalf("failed to unmarshal: %v", err) + } + + copied := original.DeepCopy() + + // Verify typed fields are copied + if copied.MaxPods != 110 { + t.Errorf("expected MaxPods=110, got %v", copied.MaxPods) + } + + // Verify overflow is independent (modifying copy shouldn't affect original) + copied.MaxPods = 200 + + if original.MaxPods != 110 { + t.Error("modifying copy affected original MaxPods") + } + + // Verify overflow survives marshal + data, err := json.Marshal(copied) + if err != nil { + t.Fatalf("failed to marshal copy: %v", err) + } + + var raw map[string]json.RawMessage + if err := json.Unmarshal(data, &raw); err != nil { + t.Fatalf("failed to unmarshal to raw map: %v", err) + } + if _, ok := raw["registryPullQPS"]; !ok { + t.Error("overflow field registryPullQPS not preserved in deep copy") + } + }) + + t.Run("When KubeletConfiguration is nil DeepCopy should return nil", func(t *testing.T) { + var nilConfig *KubeletConfiguration + if nilConfig.DeepCopy() != nil { + t.Error("expected nil from DeepCopy of nil") + } + }) +} diff --git a/api/karpenter/v1/zz_generated.deepcopy.go b/api/karpenter/v1/zz_generated.deepcopy.go index 8c1af4f6438..75ff805c942 100644 --- a/api/karpenter/v1/zz_generated.deepcopy.go +++ b/api/karpenter/v1/zz_generated.deepcopy.go @@ -104,6 +104,77 @@ func (in *CapacityReservationSelectorTerm) DeepCopy() *CapacityReservationSelect return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { + *out = *in + if in.SystemReserved != nil { + in, out := &in.SystemReserved, &out.SystemReserved + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.KubeReserved != nil { + in, out := &in.KubeReserved, &out.KubeReserved + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.EvictionHard != nil { + in, out := &in.EvictionHard, &out.EvictionHard + *out = make(map[string]EvictionThreshold, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.EvictionSoft != nil { + in, out := &in.EvictionSoft, &out.EvictionSoft + *out = make(map[string]EvictionThreshold, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.EvictionSoftGracePeriod != nil { + in, out := &in.EvictionSoftGracePeriod, &out.EvictionSoftGracePeriod + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.EvictionMaxPodGracePeriod != nil { + in, out := &in.EvictionMaxPodGracePeriod, &out.EvictionMaxPodGracePeriod + *out = new(int32) + **out = **in + } + if in.ImageGCHighThresholdPercent != nil { + in, out := &in.ImageGCHighThresholdPercent, &out.ImageGCHighThresholdPercent + *out = new(int32) + **out = **in + } + if in.ImageGCLowThresholdPercent != nil { + in, out := &in.ImageGCLowThresholdPercent, &out.ImageGCLowThresholdPercent + *out = new(int32) + **out = **in + } + if in.CPUCFSQuota != nil { + in, out := &in.CPUCFSQuota, &out.CPUCFSQuota + *out = new(bool) + **out = **in + } + in.Overflow.DeepCopyInto(&out.Overflow) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeletConfiguration. +func (in *KubeletConfiguration) DeepCopy() *KubeletConfiguration { + if in == nil { + return nil + } + out := new(KubeletConfiguration) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MetadataOptions) DeepCopyInto(out *MetadataOptions) { *out = *in @@ -217,6 +288,7 @@ func (in *OpenshiftEC2NodeClassSpec) DeepCopyInto(out *OpenshiftEC2NodeClassSpec } } out.MetadataOptions = in.MetadataOptions + in.Kubelet.DeepCopyInto(&out.Kubelet) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OpenshiftEC2NodeClassSpec. diff --git a/client/applyconfiguration/karpenter/v1/kubeletconfiguration.go b/client/applyconfiguration/karpenter/v1/kubeletconfiguration.go new file mode 100644 index 00000000000..02dad155b5b --- /dev/null +++ b/client/applyconfiguration/karpenter/v1/kubeletconfiguration.go @@ -0,0 +1,162 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +import ( + karpenterv1 "github.com/openshift/hypershift/api/karpenter/v1" +) + +// KubeletConfigurationApplyConfiguration represents a declarative configuration of the KubeletConfiguration type for use +// with apply. +type KubeletConfigurationApplyConfiguration struct { + MaxPods *int32 `json:"maxPods,omitempty"` + PodsPerCore *int32 `json:"podsPerCore,omitempty"` + SystemReserved map[string]string `json:"systemReserved,omitempty"` + KubeReserved map[string]string `json:"kubeReserved,omitempty"` + EvictionHard map[string]karpenterv1.EvictionThreshold `json:"evictionHard,omitempty"` + EvictionSoft map[string]karpenterv1.EvictionThreshold `json:"evictionSoft,omitempty"` + EvictionSoftGracePeriod map[string]string `json:"evictionSoftGracePeriod,omitempty"` + EvictionMaxPodGracePeriod *int32 `json:"evictionMaxPodGracePeriod,omitempty"` + ImageGCHighThresholdPercent *int32 `json:"imageGCHighThresholdPercent,omitempty"` + ImageGCLowThresholdPercent *int32 `json:"imageGCLowThresholdPercent,omitempty"` + CPUCFSQuota *bool `json:"cpuCFSQuota,omitempty"` +} + +// KubeletConfigurationApplyConfiguration constructs a declarative configuration of the KubeletConfiguration type for use with +// apply. +func KubeletConfiguration() *KubeletConfigurationApplyConfiguration { + return &KubeletConfigurationApplyConfiguration{} +} + +// WithMaxPods sets the MaxPods field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the MaxPods field is set to the value of the last call. +func (b *KubeletConfigurationApplyConfiguration) WithMaxPods(value int32) *KubeletConfigurationApplyConfiguration { + b.MaxPods = &value + return b +} + +// WithPodsPerCore sets the PodsPerCore field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the PodsPerCore field is set to the value of the last call. +func (b *KubeletConfigurationApplyConfiguration) WithPodsPerCore(value int32) *KubeletConfigurationApplyConfiguration { + b.PodsPerCore = &value + return b +} + +// WithSystemReserved puts the entries into the SystemReserved field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, the entries provided by each call will be put on the SystemReserved field, +// overwriting an existing map entries in SystemReserved field with the same key. +func (b *KubeletConfigurationApplyConfiguration) WithSystemReserved(entries map[string]string) *KubeletConfigurationApplyConfiguration { + if b.SystemReserved == nil && len(entries) > 0 { + b.SystemReserved = make(map[string]string, len(entries)) + } + for k, v := range entries { + b.SystemReserved[k] = v + } + return b +} + +// WithKubeReserved puts the entries into the KubeReserved field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, the entries provided by each call will be put on the KubeReserved field, +// overwriting an existing map entries in KubeReserved field with the same key. +func (b *KubeletConfigurationApplyConfiguration) WithKubeReserved(entries map[string]string) *KubeletConfigurationApplyConfiguration { + if b.KubeReserved == nil && len(entries) > 0 { + b.KubeReserved = make(map[string]string, len(entries)) + } + for k, v := range entries { + b.KubeReserved[k] = v + } + return b +} + +// WithEvictionHard puts the entries into the EvictionHard field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, the entries provided by each call will be put on the EvictionHard field, +// overwriting an existing map entries in EvictionHard field with the same key. +func (b *KubeletConfigurationApplyConfiguration) WithEvictionHard(entries map[string]karpenterv1.EvictionThreshold) *KubeletConfigurationApplyConfiguration { + if b.EvictionHard == nil && len(entries) > 0 { + b.EvictionHard = make(map[string]karpenterv1.EvictionThreshold, len(entries)) + } + for k, v := range entries { + b.EvictionHard[k] = v + } + return b +} + +// WithEvictionSoft puts the entries into the EvictionSoft field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, the entries provided by each call will be put on the EvictionSoft field, +// overwriting an existing map entries in EvictionSoft field with the same key. +func (b *KubeletConfigurationApplyConfiguration) WithEvictionSoft(entries map[string]karpenterv1.EvictionThreshold) *KubeletConfigurationApplyConfiguration { + if b.EvictionSoft == nil && len(entries) > 0 { + b.EvictionSoft = make(map[string]karpenterv1.EvictionThreshold, len(entries)) + } + for k, v := range entries { + b.EvictionSoft[k] = v + } + return b +} + +// WithEvictionSoftGracePeriod puts the entries into the EvictionSoftGracePeriod field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, the entries provided by each call will be put on the EvictionSoftGracePeriod field, +// overwriting an existing map entries in EvictionSoftGracePeriod field with the same key. +func (b *KubeletConfigurationApplyConfiguration) WithEvictionSoftGracePeriod(entries map[string]string) *KubeletConfigurationApplyConfiguration { + if b.EvictionSoftGracePeriod == nil && len(entries) > 0 { + b.EvictionSoftGracePeriod = make(map[string]string, len(entries)) + } + for k, v := range entries { + b.EvictionSoftGracePeriod[k] = v + } + return b +} + +// WithEvictionMaxPodGracePeriod sets the EvictionMaxPodGracePeriod field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the EvictionMaxPodGracePeriod field is set to the value of the last call. +func (b *KubeletConfigurationApplyConfiguration) WithEvictionMaxPodGracePeriod(value int32) *KubeletConfigurationApplyConfiguration { + b.EvictionMaxPodGracePeriod = &value + return b +} + +// WithImageGCHighThresholdPercent sets the ImageGCHighThresholdPercent field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ImageGCHighThresholdPercent field is set to the value of the last call. +func (b *KubeletConfigurationApplyConfiguration) WithImageGCHighThresholdPercent(value int32) *KubeletConfigurationApplyConfiguration { + b.ImageGCHighThresholdPercent = &value + return b +} + +// WithImageGCLowThresholdPercent sets the ImageGCLowThresholdPercent field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the ImageGCLowThresholdPercent field is set to the value of the last call. +func (b *KubeletConfigurationApplyConfiguration) WithImageGCLowThresholdPercent(value int32) *KubeletConfigurationApplyConfiguration { + b.ImageGCLowThresholdPercent = &value + return b +} + +// WithCPUCFSQuota sets the CPUCFSQuota field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the CPUCFSQuota field is set to the value of the last call. +func (b *KubeletConfigurationApplyConfiguration) WithCPUCFSQuota(value bool) *KubeletConfigurationApplyConfiguration { + b.CPUCFSQuota = &value + return b +} diff --git a/client/applyconfiguration/karpenter/v1/openshiftec2nodeclassspec.go b/client/applyconfiguration/karpenter/v1/openshiftec2nodeclassspec.go index 73fbdadf013..fd5c2b35fe3 100644 --- a/client/applyconfiguration/karpenter/v1/openshiftec2nodeclassspec.go +++ b/client/applyconfiguration/karpenter/v1/openshiftec2nodeclassspec.go @@ -34,6 +34,7 @@ type OpenshiftEC2NodeClassSpecApplyConfiguration struct { Monitoring *karpenterv1.MonitoringState `json:"monitoring,omitempty"` MetadataOptions *MetadataOptionsApplyConfiguration `json:"metadataOptions,omitempty"` Version *string `json:"version,omitempty"` + Kubelet *KubeletConfigurationApplyConfiguration `json:"kubelet,omitempty"` } // OpenshiftEC2NodeClassSpecApplyConfiguration constructs a declarative configuration of the OpenshiftEC2NodeClassSpec type for use with @@ -147,3 +148,11 @@ func (b *OpenshiftEC2NodeClassSpecApplyConfiguration) WithVersion(value string) b.Version = &value return b } + +// WithKubelet sets the Kubelet field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Kubelet field is set to the value of the last call. +func (b *OpenshiftEC2NodeClassSpecApplyConfiguration) WithKubelet(value *KubeletConfigurationApplyConfiguration) *OpenshiftEC2NodeClassSpecApplyConfiguration { + b.Kubelet = value + return b +} diff --git a/client/applyconfiguration/utils.go b/client/applyconfiguration/utils.go index ca549fbd163..77e8b9c056b 100644 --- a/client/applyconfiguration/utils.go +++ b/client/applyconfiguration/utils.go @@ -421,6 +421,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} { return &karpenterv1.CapacityReservationApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("CapacityReservationSelectorTerm"): return &karpenterv1.CapacityReservationSelectorTermApplyConfiguration{} + case v1.SchemeGroupVersion.WithKind("KubeletConfiguration"): + return &karpenterv1.KubeletConfigurationApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("MetadataOptions"): return &karpenterv1.MetadataOptionsApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("OpenshiftEC2NodeClass"): diff --git a/hack/kubelet-ratcheting-gen/main.go b/hack/kubelet-ratcheting-gen/main.go new file mode 100644 index 00000000000..7c245f75010 --- /dev/null +++ b/hack/kubelet-ratcheting-gen/main.go @@ -0,0 +1,178 @@ +// kubelet-ratcheting-gen generates the envtest ratcheting testsuite for KubeletConfiguration. +// It reflects over KubeletConfiguration to enumerate all typed fields, verifies each has a +// fixture value, and writes a testsuite YAML that simulates a pre-upgrade → post-upgrade +// CRD schema swap. Run via: go run ./hack/kubelet-ratcheting-gen/main.go (from repo root). +// +// When adding a new typed field to KubeletConfiguration: +// 1. Add a fixture value to the fixtures map below. +// 2. Run `make karpenter-api` to regenerate the testsuite. +// 3. Commit the updated testsuite YAML. +package main + +import ( + "fmt" + "os" + "reflect" + "strings" + "text/template" + + karpenterv1 "github.com/openshift/hypershift/api/karpenter/v1" +) + +const outputPath = "karpenter-operator/controllers/karpenter/assets/tests/" + + "openshiftec2nodeclasses.karpenter.hypershift.openshift.io/" + + "stable.openshiftec2nodeclasses.kubelet-ratcheting.testsuite.yaml" + +// fixtures maps each KubeletConfiguration JSON field name to a valid YAML snippet. +// Values must satisfy all CRD validation rules (min/max, CEL cross-field constraints). +// Add an entry here when promoting a new field from overflow to typed. +var fixtures = map[string]string{ + "maxPods": "110", + "podsPerCore": "10", + "systemReserved": "cpu: \"100m\"\n memory: \"256Mi\"", + "kubeReserved": "cpu: \"200m\"\n memory: \"512Mi\"", + "evictionHard": "memory.available: \"100Mi\"\n nodefs.available: \"10%\"", + "evictionSoft": "memory.available: \"200Mi\"", + "evictionSoftGracePeriod": "memory.available: \"30s\"", + "evictionMaxPodGracePeriod": "60", + "imageGCHighThresholdPercent": "85", + "imageGCLowThresholdPercent": "80", + "cpuCFSQuota": "true", +} + +// mapFields are fields whose fixture value is a YAML mapping (rendered with extra indentation). +var mapFields = map[string]bool{ + "systemReserved": true, + "kubeReserved": true, + "evictionHard": true, + "evictionSoft": true, + "evictionSoftGracePeriod": true, +} + +const testsuiteTmpl = `# Code generated by hack/kubelet-ratcheting-gen. DO NOT EDIT. +# To regenerate: make karpenter-api +# To add a new field: add a fixture in hack/kubelet-ratcheting-gen/main.go, then regenerate. +apiVersion: apiextensions.k8s.io/v1 +name: "OpenshiftEC2NodeClass kubelet ratcheting validation" +crdName: openshiftec2nodeclasses.karpenter.hypershift.openshift.io +version: v1 +# Verifies that all currently-typed kubelet fields survive a CRD schema upgrade. +# initialCRDPatches strip the entire kubelet typed-field schema back to a pure +# "type: object, x-kubernetes-preserve-unknown-fields: true" — the state that existed +# before any fields were promoted from overflow. The initial object is created with all +# typed fields set (they land in overflow under the patched schema). The patches are then +# reverted and the update is applied, verifying every field survives the upgrade seamlessly. +tests: + onUpdate: + - name: When all typed kubelet fields were set before they were typed they should remain valid after CRD upgrade + initialCRDPatches: + # Strip all typed field schemas, leaving type: object, x-kubernetes-preserve-unknown-fields: true. + - op: remove + path: /spec/versions/0/schema/openAPIV3Schema/properties/spec/properties/kubelet/properties + # Strip all CEL cross-field validation rules. + - op: remove + path: /spec/versions/0/schema/openAPIV3Schema/properties/spec/properties/kubelet/x-kubernetes-validations + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: +{{- range .Fields}} + {{.Name}}:{{if .IsMap}} + {{.Value}}{{else}} {{.Value}}{{end}} +{{- end}} + updated: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: +{{- range .Fields}} + {{.Name}}:{{if .IsMap}} + {{.Value}}{{else}} {{.Value}}{{end}} +{{- end}} +` + +type field struct { + Name string + Value string + IsMap bool +} + +func main() { + if err := run(); err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } +} + +func run() error { + fields, err := collectFields() + if err != nil { + return err + } + + tmpl, err := template.New("testsuite").Parse(testsuiteTmpl) + if err != nil { + return fmt.Errorf("parsing template: %w", err) + } + + f, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("creating output file %s: %w", outputPath, err) + } + defer f.Close() + + if err := tmpl.Execute(f, struct{ Fields []field }{Fields: fields}); err != nil { + return fmt.Errorf("executing template: %w", err) + } + + fmt.Printf("generated %s (%d fields)\n", outputPath, len(fields)) + return nil +} + +// collectFields reflects over KubeletConfiguration, enumerates typed fields by JSON tag, +// and looks up each one in the fixtures map. Missing fixtures cause a hard failure. +func collectFields() ([]field, error) { + t := reflect.TypeOf(karpenterv1.KubeletConfiguration{}) + var fields []field + var missing []string + + for i := range t.NumField() { + f := t.Field(i) + tag := f.Tag.Get("json") + if tag == "" || tag == "-" { + continue + } + name, _, _ := strings.Cut(tag, ",") + if name == "" || name == "-" { + continue + } + + val, ok := fixtures[name] + if !ok { + missing = append(missing, fmt.Sprintf("%s (json:%q)", f.Name, name)) + continue + } + fields = append(fields, field{Name: name, Value: val, IsMap: mapFields[name]}) + } + + if len(missing) > 0 { + return nil, fmt.Errorf( + "no fixture value for the following KubeletConfiguration fields — "+ + "add them to the fixtures map in hack/kubelet-ratcheting-gen/main.go:\n %s", + strings.Join(missing, "\n "), + ) + } + + return fields, nil +} diff --git a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go index 022b442d182..163f6693056 100644 --- a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go +++ b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go @@ -2114,7 +2114,7 @@ func (r *HostedClusterReconciler) reconcile(ctx context.Context, req ctrl.Reques } } - if err := r.reconcileKarpenterOperator(cpContext, createOrUpdate, hcluster, r.HypershiftOperatorImage, controlPlaneOperatorImage); err != nil { + if err := r.reconcileKarpenterOperator(cpContext, hcluster, r.HypershiftOperatorImage, controlPlaneOperatorImage); err != nil { return ctrl.Result{}, fmt.Errorf("failed to reconcile karpenter operator: %w", err) } diff --git a/hypershift-operator/controllers/hostedcluster/karpenter.go b/hypershift-operator/controllers/hostedcluster/karpenter.go index d7711919f3c..9e950fefe4b 100644 --- a/hypershift-operator/controllers/hostedcluster/karpenter.go +++ b/hypershift-operator/controllers/hostedcluster/karpenter.go @@ -25,7 +25,6 @@ import ( controlplanecomponent "github.com/openshift/hypershift/support/controlplane-component" "github.com/openshift/hypershift/support/k8sutil" karpenterutil "github.com/openshift/hypershift/support/karpenter" - "github.com/openshift/hypershift/support/upsert" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -38,39 +37,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) -func (r *HostedClusterReconciler) reconcileKarpenterOperator(cpContext controlplanecomponent.ControlPlaneContext, createOrUpdate upsert.CreateOrUpdateFN, hcluster *hyperv1.HostedCluster, hypershiftOperatorImage, controlPlaneOperatorImage string) error { - // TODO(jkyros): I rearranged this so we always reconcile so it can at least attempt to disable if it's turned off. I was planning on moving the KubeletConfig configmap creation - // into the karpenter-operator as part of the KubeletConfig work so this should get cleaner. - if karpenterutil.IsKarpenterEnabled(hcluster.Spec.AutoNode) && hcluster.Status.KubeConfig != nil && hcluster.Status.IgnitionEndpoint != "" { - // Generate configMap with KubeletConfig to register Nodes with karpenter expected taint. - configMap := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: karpenterutil.KarpenterTaintConfigMapName, - Namespace: cpContext.HCP.Namespace, - }, - } - - kubeletConfig := fmt.Sprintf(`apiVersion: machineconfiguration.openshift.io/v1 -kind: KubeletConfig -metadata: - name: %s -spec: - kubeletConfig: - registerWithTaints: - - key: "karpenter.sh/unregistered" - value: "true" - effect: "NoExecute"`, karpenterutil.KarpenterTaintConfigMapName) - - _, err := createOrUpdate(cpContext, r.Client, configMap, func() error { - configMap.Data = map[string]string{ - "config": kubeletConfig, - } - return nil - }) - if err != nil { - return fmt.Errorf("failed to create configmap: %w", err) - } - } +func (r *HostedClusterReconciler) reconcileKarpenterOperator(cpContext controlplanecomponent.ControlPlaneContext, hcluster *hyperv1.HostedCluster, hypershiftOperatorImage, controlPlaneOperatorImage string) error { + // The taint ConfigMap (set-karpenter-taint) is created by the karpenter-operator itself. if !karpenterutil.IsKarpenterEnabled(hcluster.Spec.AutoNode) { diff --git a/karpenter-operator/controllers/karpenter/assets/karpenter.hypershift.openshift.io_openshiftec2nodeclasses.yaml b/karpenter-operator/controllers/karpenter/assets/karpenter.hypershift.openshift.io_openshiftec2nodeclasses.yaml index 90ad3d61509..ebcbf762d35 100644 --- a/karpenter-operator/controllers/karpenter/assets/karpenter.hypershift.openshift.io_openshiftec2nodeclasses.yaml +++ b/karpenter-operator/controllers/karpenter/assets/karpenter.hypershift.openshift.io_openshiftec2nodeclasses.yaml @@ -258,6 +258,163 @@ spec: - Public - SubnetDefault type: string + kubelet: + description: |- + kubelet configures kubelet settings for nodes provisioned by this NodeClass. + These settings are injected into the node's ignition configuration via MachineConfig. + properties: + cpuCFSQuota: + description: cpuCFSQuota enables CPU CFS quota enforcement for + containers that specify CPU limits. + type: boolean + evictionHard: + additionalProperties: + description: |- + EvictionThreshold is a threshold value for a kubelet eviction signal. + Values are either a percentage (e.g. "10%") or a Kubernetes quantity (e.g. "100Mi"). + maxLength: 64 + type: string + description: evictionHard is a map of signal names to quantities + that defines hard eviction thresholds. + maxProperties: 20 + minProperties: 1 + type: object + x-kubernetes-validations: + - message: valid keys for evictionHard are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + - message: evictionHard values must not be empty + rule: self.all(x, self[x].size() > 0) + evictionMaxPodGracePeriod: + description: |- + evictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use + when terminating pods in response to soft eviction thresholds. + format: int32 + type: integer + evictionSoft: + additionalProperties: + description: |- + EvictionThreshold is a threshold value for a kubelet eviction signal. + Values are either a percentage (e.g. "10%") or a Kubernetes quantity (e.g. "100Mi"). + maxLength: 64 + type: string + description: evictionSoft is a map of signal names to quantities + that defines soft eviction thresholds. + maxProperties: 20 + minProperties: 1 + type: object + x-kubernetes-validations: + - message: valid keys for evictionSoft are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + - message: evictionSoft values must not be empty + rule: self.all(x, self[x].size() > 0) + evictionSoftGracePeriod: + additionalProperties: + type: string + description: |- + evictionSoftGracePeriod is a map of signal names to quantities that defines grace periods + for each soft eviction signal. + maxProperties: 20 + minProperties: 1 + type: object + x-kubernetes-validations: + - message: valid keys for evictionSoftGracePeriod are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + imageGCHighThresholdPercent: + description: |- + imageGCHighThresholdPercent is the percent of disk usage which triggers image garbage collection. + The value must be between 0 and 100, inclusive, and must be greater than imageGCLowThresholdPercent when both are set. + format: int32 + maximum: 100 + minimum: 0 + type: integer + imageGCLowThresholdPercent: + description: |- + imageGCLowThresholdPercent is the percent of disk usage to which image garbage collection attempts to free. + The value must be between 0 and 100, inclusive, and must be less than imageGCHighThresholdPercent when both are set. + format: int32 + maximum: 100 + minimum: 0 + type: integer + kubeReserved: + additionalProperties: + type: string + description: |- + kubeReserved is a set of ResourceName=ResourceQuantity pairs that describe + resources reserved for kubernetes system components. + Currently only cpu, memory, ephemeral-storage, and pid are supported. + maxProperties: 20 + minProperties: 1 + type: object + x-kubernetes-validations: + - message: valid keys for kubeReserved are ['cpu','memory','ephemeral-storage','pid'] + rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' + || x=='pid') + - message: kubeReserved value cannot be a negative resource quantity + rule: self.all(x, !self[x].startsWith('-')) + - message: kubeReserved values must not be empty + rule: self.all(x, self[x].size() > 0) + maxPods: + description: |- + maxPods is the maximum number of pods that can run on a node. + The value must be between 1 and 2500, inclusive. + format: int32 + maximum: 2500 + minimum: 1 + type: integer + podsPerCore: + description: |- + podsPerCore is the maximum number of pods per core. The value must be between 1 and 2500, + inclusive, and cannot exceed maxPods when both are set. + format: int32 + maximum: 2500 + minimum: 1 + type: integer + systemReserved: + additionalProperties: + type: string + description: |- + systemReserved is a set of ResourceName=ResourceQuantity pairs that describe + resources reserved for non-kubernetes components. + Currently only cpu, memory, ephemeral-storage, and pid are supported. + maxProperties: 20 + minProperties: 1 + type: object + x-kubernetes-validations: + - message: valid keys for systemReserved are ['cpu','memory','ephemeral-storage','pid'] + rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' + || x=='pid') + - message: systemReserved value cannot be a negative resource + quantity + rule: self.all(x, !self[x].startsWith('-')) + - message: systemReserved values must not be empty + rule: self.all(x, self[x].size() > 0) + type: object + x-kubernetes-preserve-unknown-fields: true + x-kubernetes-validations: + - message: imageGCHighThresholdPercent must be greater than imageGCLowThresholdPercent + rule: '!has(self.imageGCHighThresholdPercent) || !has(self.imageGCLowThresholdPercent) + || self.imageGCHighThresholdPercent > self.imageGCLowThresholdPercent' + - message: podsPerCore must not exceed maxPods + rule: '!has(self.podsPerCore) || !has(self.maxPods) || self.podsPerCore + <= self.maxPods' + - message: evictionSoft entry does not have a matching evictionSoftGracePeriod + rule: '!has(self.evictionSoft) || (has(self.evictionSoftGracePeriod) + && self.evictionSoft.all(e, e in self.evictionSoftGracePeriod))' + - message: evictionSoftGracePeriod entry does not have a matching + evictionSoft + rule: '!has(self.evictionSoftGracePeriod) || (has(self.evictionSoft) + && self.evictionSoftGracePeriod.all(e, e in self.evictionSoft))' + - message: evictionSoft threshold must be greater than or equal to + evictionHard threshold for the same signal (soft eviction should + fire before hard) + rule: '!has(self.evictionHard) || !has(self.evictionSoft) || self.evictionHard.all(key, + !(key in self.evictionSoft) || ((self.evictionSoft[key].endsWith(''%'') + && self.evictionHard[key].endsWith(''%'')) ? (self.evictionSoft[key].size() + <= 1 || self.evictionHard[key].size() <= 1 || double(self.evictionSoft[key].substring(0, + self.evictionSoft[key].size() - 1)) >= double(self.evictionHard[key].substring(0, + self.evictionHard[key].size() - 1))) : (!(isQuantity(self.evictionSoft[key]) + && isQuantity(self.evictionHard[key])) || quantity(self.evictionSoft[key]).compareTo(quantity(self.evictionHard[key])) + >= 0)))' metadataOptions: description: |- metadataOptions contains parameters for specifying the exposure of the diff --git a/karpenter-operator/controllers/karpenter/assets/tests/openshiftec2nodeclasses.karpenter.hypershift.openshift.io/stable.openshiftec2nodeclasses.kubelet-field-promotion.testsuite.yaml b/karpenter-operator/controllers/karpenter/assets/tests/openshiftec2nodeclasses.karpenter.hypershift.openshift.io/stable.openshiftec2nodeclasses.kubelet-field-promotion.testsuite.yaml new file mode 100644 index 00000000000..96b3dac679d --- /dev/null +++ b/karpenter-operator/controllers/karpenter/assets/tests/openshiftec2nodeclasses.karpenter.hypershift.openshift.io/stable.openshiftec2nodeclasses.kubelet-field-promotion.testsuite.yaml @@ -0,0 +1,196 @@ +apiVersion: apiextensions.k8s.io/v1 +name: "OpenshiftEC2NodeClass kubelet field promotion compatibility" +crdName: openshiftec2nodeclasses.karpenter.hypershift.openshift.io +version: v1 +# These tests verify that promoting a kubelet config field from overflow (unknown/pass-through) +# to a typed struct field does not break existing NodeClass objects or reject previously-valid YAML. +tests: + onUpdate: + # When a field that previously lived in overflow becomes a typed field, existing objects that + # used it as an overflow field must still be updatable — mixing typed and overflow fields is valid. + - name: When an overflow field and a newly-typed field coexist on update it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + podPidsLimit: 4096 + updated: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 110 + podPidsLimit: 4096 + expected: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 110 + podPidsLimit: 4096 + + # When a field was set as overflow before the struct existed and is now set as a typed field, + # the update must be accepted — the field value is the same, only the code path changed. + - name: When a field transitions from overflow to typed on update it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 110 + podPidsLimit: 4096 + updated: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 110 + containerLogMaxSize: "50Mi" + expected: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 110 + containerLogMaxSize: "50Mi" + + # When a typed field is removed on update it should be accepted regardless of overflow fields remaining. + - name: When a typed field is removed and only overflow fields remain on update it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 110 + podPidsLimit: 4096 + updated: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + podPidsLimit: 4096 + expected: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + podPidsLimit: 4096 + + # When an object with only overflow fields is updated to add typed fields it should pass. + - name: When typed fields are added alongside existing overflow fields on update it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + podPidsLimit: 4096 + containerLogMaxSize: "50Mi" + updated: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 110 + podsPerCore: 10 + podPidsLimit: 4096 + containerLogMaxSize: "50Mi" + expected: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 110 + podsPerCore: 10 + podPidsLimit: 4096 + containerLogMaxSize: "50Mi" + + # CEL cross-field validation must still fire on update even when overflow fields are present. + - name: When podsPerCore exceeds maxPods on update it should fail even with overflow fields present + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 50 + podsPerCore: 10 + podPidsLimit: 4096 + updated: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 10 + podsPerCore: 50 + podPidsLimit: 4096 + expectedError: "podsPerCore must not exceed maxPods" diff --git a/karpenter-operator/controllers/karpenter/assets/tests/openshiftec2nodeclasses.karpenter.hypershift.openshift.io/stable.openshiftec2nodeclasses.kubelet-ratcheting.testsuite.yaml b/karpenter-operator/controllers/karpenter/assets/tests/openshiftec2nodeclasses.karpenter.hypershift.openshift.io/stable.openshiftec2nodeclasses.kubelet-ratcheting.testsuite.yaml new file mode 100644 index 00000000000..8037c2db6f5 --- /dev/null +++ b/karpenter-operator/controllers/karpenter/assets/tests/openshiftec2nodeclasses.karpenter.hypershift.openshift.io/stable.openshiftec2nodeclasses.kubelet-ratcheting.testsuite.yaml @@ -0,0 +1,81 @@ +# Code generated by hack/kubelet-ratcheting-gen. DO NOT EDIT. +# To regenerate: make karpenter-api +# To add a new field: add a fixture in hack/kubelet-ratcheting-gen/main.go, then regenerate. +apiVersion: apiextensions.k8s.io/v1 +name: "OpenshiftEC2NodeClass kubelet ratcheting validation" +crdName: openshiftec2nodeclasses.karpenter.hypershift.openshift.io +version: v1 +# Verifies that all currently-typed kubelet fields survive a CRD schema upgrade. +# initialCRDPatches strip the entire kubelet typed-field schema back to a pure +# "type: object, x-kubernetes-preserve-unknown-fields: true" — the state that existed +# before any fields were promoted from overflow. The initial object is created with all +# typed fields set (they land in overflow under the patched schema). The patches are then +# reverted and the update is applied, verifying every field survives the upgrade seamlessly. +tests: + onUpdate: + - name: When all typed kubelet fields were set before they were typed they should remain valid after CRD upgrade + initialCRDPatches: + # Strip all typed field schemas, leaving type: object, x-kubernetes-preserve-unknown-fields: true. + - op: remove + path: /spec/versions/0/schema/openAPIV3Schema/properties/spec/properties/kubelet/properties + # Strip all CEL cross-field validation rules. + - op: remove + path: /spec/versions/0/schema/openAPIV3Schema/properties/spec/properties/kubelet/x-kubernetes-validations + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 110 + podsPerCore: 10 + systemReserved: + cpu: "100m" + memory: "256Mi" + kubeReserved: + cpu: "200m" + memory: "512Mi" + evictionHard: + memory.available: "100Mi" + nodefs.available: "10%" + evictionSoft: + memory.available: "200Mi" + evictionSoftGracePeriod: + memory.available: "30s" + evictionMaxPodGracePeriod: 60 + imageGCHighThresholdPercent: 85 + imageGCLowThresholdPercent: 80 + cpuCFSQuota: true + updated: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 110 + podsPerCore: 10 + systemReserved: + cpu: "100m" + memory: "256Mi" + kubeReserved: + cpu: "200m" + memory: "512Mi" + evictionHard: + memory.available: "100Mi" + nodefs.available: "10%" + evictionSoft: + memory.available: "200Mi" + evictionSoftGracePeriod: + memory.available: "30s" + evictionMaxPodGracePeriod: 60 + imageGCHighThresholdPercent: 85 + imageGCLowThresholdPercent: 80 + cpuCFSQuota: true diff --git a/karpenter-operator/controllers/karpenter/assets/tests/openshiftec2nodeclasses.karpenter.hypershift.openshift.io/stable.openshiftec2nodeclasses.kubelet.testsuite.yaml b/karpenter-operator/controllers/karpenter/assets/tests/openshiftec2nodeclasses.karpenter.hypershift.openshift.io/stable.openshiftec2nodeclasses.kubelet.testsuite.yaml new file mode 100644 index 00000000000..45407c5d2ee --- /dev/null +++ b/karpenter-operator/controllers/karpenter/assets/tests/openshiftec2nodeclasses.karpenter.hypershift.openshift.io/stable.openshiftec2nodeclasses.kubelet.testsuite.yaml @@ -0,0 +1,859 @@ +apiVersion: apiextensions.k8s.io/v1 +name: "OpenshiftEC2NodeClass kubelet validation" +crdName: openshiftec2nodeclasses.karpenter.hypershift.openshift.io +version: v1 +tests: + onCreate: + # --- maxPods validation --- + - name: When kubelet maxPods is 0 it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 0 + expectedError: "maxPods" + + - name: When kubelet maxPods is negative it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: -1 + expectedError: "maxPods" + + - name: When kubelet maxPods is 1 it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 1 + + - name: When kubelet maxPods is 2500 it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 2500 + + - name: When kubelet maxPods exceeds 2500 it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 2501 + expectedError: "maxPods" + + - name: When kubelet maxPods is valid it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 110 + + # --- podsPerCore validation --- + - name: When kubelet podsPerCore is 0 it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + podsPerCore: 0 + expectedError: "podsPerCore" + + - name: When kubelet podsPerCore is negative it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + podsPerCore: -1 + expectedError: "podsPerCore" + + - name: When kubelet podsPerCore is 1 it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + podsPerCore: 1 + + - name: When kubelet podsPerCore exceeds 2500 it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + podsPerCore: 2501 + expectedError: "podsPerCore" + + # --- podsPerCore <= maxPods cross-field validation --- + - name: When kubelet podsPerCore exceeds maxPods it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 10 + podsPerCore: 20 + expectedError: "podsPerCore must not exceed maxPods" + + - name: When kubelet podsPerCore equals maxPods it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 50 + podsPerCore: 50 + + - name: When kubelet podsPerCore is less than maxPods it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 110 + podsPerCore: 10 + + # --- systemReserved validation --- + - name: When kubelet systemReserved has valid keys it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + systemReserved: + cpu: "100m" + memory: "256Mi" + ephemeral-storage: "1Gi" + pid: "1000" + + - name: When kubelet systemReserved has invalid key it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + systemReserved: + gpu: "1" + expectedError: "valid keys for systemReserved" + + - name: When kubelet systemReserved has invalid key among valid keys it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + systemReserved: + cpu: "100m" + memory: "256Mi" + gpu: "1" + expectedError: "valid keys for systemReserved" + + - name: When kubelet systemReserved has one negative value among valid entries it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + systemReserved: + cpu: "100m" + memory: "-256Mi" + expectedError: "cannot be a negative resource quantity" + + - name: When kubelet systemReserved has negative value it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + systemReserved: + cpu: "-100m" + expectedError: "cannot be a negative resource quantity" + + - name: When kubelet systemReserved has an empty value it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + systemReserved: + cpu: "" + expectedError: "systemReserved values must not be empty" + + - name: When kubelet systemReserved is empty it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + systemReserved: {} + expectedError: "systemReserved" + + # --- kubeReserved validation --- + - name: When kubelet kubeReserved has valid keys it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + kubeReserved: + cpu: "200m" + memory: "512Mi" + + - name: When kubelet kubeReserved has invalid key it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + kubeReserved: + disk: "10Gi" + expectedError: "valid keys for kubeReserved" + + - name: When kubelet kubeReserved has negative value it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + kubeReserved: + memory: "-512Mi" + expectedError: "cannot be a negative resource quantity" + + - name: When kubelet kubeReserved has an empty value it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + kubeReserved: + memory: "" + expectedError: "kubeReserved values must not be empty" + + # --- evictionHard validation --- + - name: When kubelet evictionHard has valid keys it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionHard: + memory.available: "100Mi" + nodefs.available: "10%" + + - name: When kubelet evictionHard has invalid key it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionHard: + disk.pressure: "10%" + expectedError: "valid keys for evictionHard" + + - name: When kubelet evictionHard has an empty value it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionHard: + memory.available: "" + expectedError: "evictionHard values must not be empty" + + # --- evictionSoft validation --- + - name: When kubelet evictionSoft has valid keys it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionSoft: + memory.available: "200Mi" + imagefs.available: "15%" + evictionSoftGracePeriod: + memory.available: "30s" + imagefs.available: "1m" + + - name: When kubelet evictionSoft has invalid key it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionSoft: + cpu.pressure: "50%" + expectedError: "valid keys for evictionSoft" + + - name: When kubelet evictionSoft has an empty value it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionSoft: + memory.available: "" + expectedError: "evictionSoft values must not be empty" + + # --- evictionSoftGracePeriod validation --- + - name: When kubelet evictionSoftGracePeriod has valid keys it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionSoft: + memory.available: "200Mi" + nodefs.inodesFree: "10%" + evictionSoftGracePeriod: + memory.available: "30s" + nodefs.inodesFree: "1m" + + - name: When kubelet evictionSoftGracePeriod has invalid key it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionSoftGracePeriod: + invalid.signal: "30s" + expectedError: "valid keys for evictionSoftGracePeriod" + + # --- evictionSoft / evictionSoftGracePeriod cross-validation --- + - name: When kubelet evictionSoft is set without evictionSoftGracePeriod it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionSoft: + memory.available: "200Mi" + expectedError: "evictionSoft entry does not have a matching evictionSoftGracePeriod" + + - name: When kubelet evictionSoftGracePeriod is set without evictionSoft it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionSoftGracePeriod: + memory.available: "30s" + expectedError: "evictionSoftGracePeriod entry does not have a matching evictionSoft" + + - name: When kubelet evictionSoft and evictionSoftGracePeriod keys match it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionSoft: + memory.available: "200Mi" + nodefs.available: "15%" + evictionSoftGracePeriod: + memory.available: "30s" + nodefs.available: "1m" + + - name: When kubelet evictionSoft has a key not in evictionSoftGracePeriod it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionSoft: + memory.available: "200Mi" + nodefs.available: "15%" + evictionSoftGracePeriod: + memory.available: "30s" + expectedError: "evictionSoft entry does not have a matching evictionSoftGracePeriod" + + - name: When kubelet evictionHard and evictionSoft share the same signal it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionHard: + memory.available: "100Mi" + evictionSoft: + memory.available: "200Mi" + evictionSoftGracePeriod: + memory.available: "30s" + + # --- evictionSoft >= evictionHard threshold ordering validation --- + - name: When kubelet evictionSoft quantity threshold is less than evictionHard it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionHard: + memory.available: "200Mi" + evictionSoft: + memory.available: "100Mi" + evictionSoftGracePeriod: + memory.available: "30s" + expectedError: "evictionSoft threshold must be greater than or equal to evictionHard threshold for the same signal (soft eviction should fire before hard)" + + - name: When kubelet evictionSoft percentage threshold is less than evictionHard it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionHard: + nodefs.available: "15%" + evictionSoft: + nodefs.available: "10%" + evictionSoftGracePeriod: + nodefs.available: "1m0s" + expectedError: "evictionSoft threshold must be greater than or equal to evictionHard threshold for the same signal (soft eviction should fire before hard)" + + - name: When kubelet evictionSoft quantity threshold equals evictionHard it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionHard: + memory.available: "100Mi" + evictionSoft: + memory.available: "100Mi" + evictionSoftGracePeriod: + memory.available: "30s" + + - name: When kubelet evictionSoft quantity threshold is greater than evictionHard it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionHard: + memory.available: "100Mi" + evictionSoft: + memory.available: "200Mi" + evictionSoftGracePeriod: + memory.available: "30s" + + - name: When kubelet evictionSoft percentage threshold equals evictionHard it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionHard: + nodefs.available: "10%" + evictionSoft: + nodefs.available: "10%" + evictionSoftGracePeriod: + nodefs.available: "1m0s" + + - name: When kubelet evictionSoft percentage threshold is greater than evictionHard it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionHard: + nodefs.available: "10%" + evictionSoft: + nodefs.available: "15%" + evictionSoftGracePeriod: + nodefs.available: "1m0s" + + - name: When kubelet evictionSoft and evictionHard have multiple signals with one soft less than hard it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionHard: + memory.available: "100Mi" + nodefs.available: "15%" + evictionSoft: + memory.available: "200Mi" + nodefs.available: "10%" + evictionSoftGracePeriod: + memory.available: "30s" + nodefs.available: "1m0s" + expectedError: "evictionSoft threshold must be greater than or equal to evictionHard threshold for the same signal (soft eviction should fire before hard)" + + - name: When kubelet evictionHard has signals not in evictionSoft it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionHard: + memory.available: "100Mi" + nodefs.available: "10%" + evictionSoft: + memory.available: "200Mi" + evictionSoftGracePeriod: + memory.available: "30s" + + - name: When only evictionHard is set without evictionSoft it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + evictionHard: + memory.available: "100Mi" + + # --- imageGC threshold validation --- + - name: When kubelet imageGCHighThresholdPercent exceeds 100 it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + imageGCHighThresholdPercent: 101 + expectedError: "imageGCHighThresholdPercent" + + - name: When kubelet imageGCLowThresholdPercent is negative it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + imageGCLowThresholdPercent: -1 + expectedError: "imageGCLowThresholdPercent" + + - name: When kubelet imageGCHighThresholdPercent is less than imageGCLowThresholdPercent it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + imageGCHighThresholdPercent: 50 + imageGCLowThresholdPercent: 80 + expectedError: "imageGCHighThresholdPercent must be greater than imageGCLowThresholdPercent" + + - name: When kubelet imageGCHighThresholdPercent equals imageGCLowThresholdPercent it should fail + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + imageGCHighThresholdPercent: 80 + imageGCLowThresholdPercent: 80 + expectedError: "imageGCHighThresholdPercent must be greater than imageGCLowThresholdPercent" + + - name: When kubelet imageGC thresholds are valid it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + imageGCHighThresholdPercent: 85 + imageGCLowThresholdPercent: 80 + + # --- Comprehensive valid kubelet config --- + - name: When kubelet has all valid fields set it should pass + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 110 + podsPerCore: 10 + systemReserved: + cpu: "100m" + memory: "256Mi" + kubeReserved: + cpu: "200m" + memory: "512Mi" + evictionHard: + memory.available: "100Mi" + nodefs.available: "10%" + evictionSoft: + memory.available: "200Mi" + evictionSoftGracePeriod: + memory.available: "30s" + evictionMaxPodGracePeriod: 60 + imageGCHighThresholdPercent: 85 + imageGCLowThresholdPercent: 80 + cpuCFSQuota: true + + # --- Overflow fields (PreserveUnknownFields) --- + - name: When kubelet has unknown fields they should be preserved + initial: | + apiVersion: karpenter.hypershift.openshift.io/v1 + kind: OpenshiftEC2NodeClass + spec: + subnetSelectorTerms: + - tags: + env: test + securityGroupSelectorTerms: + - id: sg-0123456789abcdef0 + kubelet: + maxPods: 110 + podPidsLimit: 4096 + containerLogMaxSize: "50Mi" diff --git a/karpenter-operator/controllers/karpenter/assets/zz_generated.crd-manifests/openshiftec2nodeclasses.crd.yaml b/karpenter-operator/controllers/karpenter/assets/zz_generated.crd-manifests/openshiftec2nodeclasses.crd.yaml index 90ad3d61509..ebcbf762d35 100644 --- a/karpenter-operator/controllers/karpenter/assets/zz_generated.crd-manifests/openshiftec2nodeclasses.crd.yaml +++ b/karpenter-operator/controllers/karpenter/assets/zz_generated.crd-manifests/openshiftec2nodeclasses.crd.yaml @@ -258,6 +258,163 @@ spec: - Public - SubnetDefault type: string + kubelet: + description: |- + kubelet configures kubelet settings for nodes provisioned by this NodeClass. + These settings are injected into the node's ignition configuration via MachineConfig. + properties: + cpuCFSQuota: + description: cpuCFSQuota enables CPU CFS quota enforcement for + containers that specify CPU limits. + type: boolean + evictionHard: + additionalProperties: + description: |- + EvictionThreshold is a threshold value for a kubelet eviction signal. + Values are either a percentage (e.g. "10%") or a Kubernetes quantity (e.g. "100Mi"). + maxLength: 64 + type: string + description: evictionHard is a map of signal names to quantities + that defines hard eviction thresholds. + maxProperties: 20 + minProperties: 1 + type: object + x-kubernetes-validations: + - message: valid keys for evictionHard are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + - message: evictionHard values must not be empty + rule: self.all(x, self[x].size() > 0) + evictionMaxPodGracePeriod: + description: |- + evictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use + when terminating pods in response to soft eviction thresholds. + format: int32 + type: integer + evictionSoft: + additionalProperties: + description: |- + EvictionThreshold is a threshold value for a kubelet eviction signal. + Values are either a percentage (e.g. "10%") or a Kubernetes quantity (e.g. "100Mi"). + maxLength: 64 + type: string + description: evictionSoft is a map of signal names to quantities + that defines soft eviction thresholds. + maxProperties: 20 + minProperties: 1 + type: object + x-kubernetes-validations: + - message: valid keys for evictionSoft are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + - message: evictionSoft values must not be empty + rule: self.all(x, self[x].size() > 0) + evictionSoftGracePeriod: + additionalProperties: + type: string + description: |- + evictionSoftGracePeriod is a map of signal names to quantities that defines grace periods + for each soft eviction signal. + maxProperties: 20 + minProperties: 1 + type: object + x-kubernetes-validations: + - message: valid keys for evictionSoftGracePeriod are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'] + rule: self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']) + imageGCHighThresholdPercent: + description: |- + imageGCHighThresholdPercent is the percent of disk usage which triggers image garbage collection. + The value must be between 0 and 100, inclusive, and must be greater than imageGCLowThresholdPercent when both are set. + format: int32 + maximum: 100 + minimum: 0 + type: integer + imageGCLowThresholdPercent: + description: |- + imageGCLowThresholdPercent is the percent of disk usage to which image garbage collection attempts to free. + The value must be between 0 and 100, inclusive, and must be less than imageGCHighThresholdPercent when both are set. + format: int32 + maximum: 100 + minimum: 0 + type: integer + kubeReserved: + additionalProperties: + type: string + description: |- + kubeReserved is a set of ResourceName=ResourceQuantity pairs that describe + resources reserved for kubernetes system components. + Currently only cpu, memory, ephemeral-storage, and pid are supported. + maxProperties: 20 + minProperties: 1 + type: object + x-kubernetes-validations: + - message: valid keys for kubeReserved are ['cpu','memory','ephemeral-storage','pid'] + rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' + || x=='pid') + - message: kubeReserved value cannot be a negative resource quantity + rule: self.all(x, !self[x].startsWith('-')) + - message: kubeReserved values must not be empty + rule: self.all(x, self[x].size() > 0) + maxPods: + description: |- + maxPods is the maximum number of pods that can run on a node. + The value must be between 1 and 2500, inclusive. + format: int32 + maximum: 2500 + minimum: 1 + type: integer + podsPerCore: + description: |- + podsPerCore is the maximum number of pods per core. The value must be between 1 and 2500, + inclusive, and cannot exceed maxPods when both are set. + format: int32 + maximum: 2500 + minimum: 1 + type: integer + systemReserved: + additionalProperties: + type: string + description: |- + systemReserved is a set of ResourceName=ResourceQuantity pairs that describe + resources reserved for non-kubernetes components. + Currently only cpu, memory, ephemeral-storage, and pid are supported. + maxProperties: 20 + minProperties: 1 + type: object + x-kubernetes-validations: + - message: valid keys for systemReserved are ['cpu','memory','ephemeral-storage','pid'] + rule: self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' + || x=='pid') + - message: systemReserved value cannot be a negative resource + quantity + rule: self.all(x, !self[x].startsWith('-')) + - message: systemReserved values must not be empty + rule: self.all(x, self[x].size() > 0) + type: object + x-kubernetes-preserve-unknown-fields: true + x-kubernetes-validations: + - message: imageGCHighThresholdPercent must be greater than imageGCLowThresholdPercent + rule: '!has(self.imageGCHighThresholdPercent) || !has(self.imageGCLowThresholdPercent) + || self.imageGCHighThresholdPercent > self.imageGCLowThresholdPercent' + - message: podsPerCore must not exceed maxPods + rule: '!has(self.podsPerCore) || !has(self.maxPods) || self.podsPerCore + <= self.maxPods' + - message: evictionSoft entry does not have a matching evictionSoftGracePeriod + rule: '!has(self.evictionSoft) || (has(self.evictionSoftGracePeriod) + && self.evictionSoft.all(e, e in self.evictionSoftGracePeriod))' + - message: evictionSoftGracePeriod entry does not have a matching + evictionSoft + rule: '!has(self.evictionSoftGracePeriod) || (has(self.evictionSoft) + && self.evictionSoftGracePeriod.all(e, e in self.evictionSoft))' + - message: evictionSoft threshold must be greater than or equal to + evictionHard threshold for the same signal (soft eviction should + fire before hard) + rule: '!has(self.evictionHard) || !has(self.evictionSoft) || self.evictionHard.all(key, + !(key in self.evictionSoft) || ((self.evictionSoft[key].endsWith(''%'') + && self.evictionHard[key].endsWith(''%'')) ? (self.evictionSoft[key].size() + <= 1 || self.evictionHard[key].size() <= 1 || double(self.evictionSoft[key].substring(0, + self.evictionSoft[key].size() - 1)) >= double(self.evictionHard[key].substring(0, + self.evictionHard[key].size() - 1))) : (!(isQuantity(self.evictionSoft[key]) + && isQuantity(self.evictionHard[key])) || quantity(self.evictionSoft[key]).compareTo(quantity(self.evictionHard[key])) + >= 0)))' metadataOptions: description: |- metadataOptions contains parameters for specifying the exposure of the diff --git a/karpenter-operator/controllers/karpenter/karpenter_controller.go b/karpenter-operator/controllers/karpenter/karpenter_controller.go index 3fa06270f4d..9aeb438a996 100644 --- a/karpenter-operator/controllers/karpenter/karpenter_controller.go +++ b/karpenter-operator/controllers/karpenter/karpenter_controller.go @@ -293,6 +293,13 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu } } + // Moved here from hypershift operator. Consumed by our ignition controller to taint our nodes + // on firstboot so our nodes don't get workloads on them until karpenter okays it. Centralized here + // so each nodeclass doesn't need its own separate taint configmap. + if err := r.reconcileTaintConfigMap(ctx, hcp); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to reconcile taint configmap: %w", err) + } + // Reconcile AutoNode status before the release image lookup so node/nodeclaim counts // are always updated even if the release image lookup fails during/after a control plane upgrade. if err := r.reconcileAutoNodeStatus(ctx, hcp); err != nil { @@ -414,6 +421,25 @@ func sumNodeClaimVCPUs(nodeClaims []karpenterv1.NodeClaim, liveNodes map[string] return int32(total) } +// reconcileTaintConfigMap ensures the set-karpenter-taint ConfigMap exists in the HCP namespace. +func (r *Reconciler) reconcileTaintConfigMap(ctx context.Context, hcp *hyperv1.HostedControlPlane) error { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: karpenterutil.KarpenterTaintConfigMapName, + Namespace: hcp.Namespace, + }, + } + _, err := r.CreateOrUpdate(ctx, r.ManagementClient, cm, func() error { + manifest, err := karpenterutil.KarpenterTaintConfigManifest() + if err != nil { + return fmt.Errorf("failed to generate taint config manifest: %w", err) + } + cm.Data = map[string]string{"config": manifest} + return nil + }) + return err +} + // reconcileCRDs reconcile the Karpenter CRDs, if onlyCreate is true it uses an only write non cached client. func (r *Reconciler) reconcileCRDs(ctx context.Context, onlyCreate bool) error { log := ctrl.LoggerFrom(ctx) diff --git a/karpenter-operator/controllers/karpenter/karpenter_controller_test.go b/karpenter-operator/controllers/karpenter/karpenter_controller_test.go index 887ac7bd08e..4d1b945535a 100644 --- a/karpenter-operator/controllers/karpenter/karpenter_controller_test.go +++ b/karpenter-operator/controllers/karpenter/karpenter_controller_test.go @@ -25,6 +25,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/log" karpenterv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + "sigs.k8s.io/yaml" "github.com/go-logr/logr/testr" "go.uber.org/mock/gomock" @@ -465,3 +466,91 @@ func nodeClaimWithCapacity(name, nodeName, cpus string) karpenterv1.NodeClaim { } return nc } + +func TestReconcileTaintConfigMap(t *testing.T) { + scheme := api.Scheme + namespace := "clusters-test" + + hcp := &hyperv1.HostedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hcp", + Namespace: namespace, + }, + } + + t.Run("When taint ConfigMap does not exist it should create it", func(t *testing.T) { + g := NewWithT(t) + ctx := log.IntoContext(t.Context(), testr.New(t)) + + fakeManagementClient := fake.NewClientBuilder().WithScheme(scheme).Build() + r := &Reconciler{ + ManagementClient: fakeManagementClient, + CreateOrUpdateProvider: upsert.New(false), + } + + err := r.reconcileTaintConfigMap(ctx, hcp) + g.Expect(err).NotTo(HaveOccurred()) + + cm := &corev1.ConfigMap{} + err = fakeManagementClient.Get(ctx, client.ObjectKey{Name: karpenterutil.KarpenterTaintConfigMapName, Namespace: namespace}, cm) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(cm.Data).To(HaveKey("config")) + var cr map[string]interface{} + err = yaml.Unmarshal([]byte(cm.Data["config"]), &cr) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(cr["apiVersion"]).To(Equal("machineconfiguration.openshift.io/v1")) + g.Expect(cr["kind"]).To(Equal("KubeletConfig")) + metadata, ok := cr["metadata"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(metadata["name"]).To(Equal(karpenterutil.KarpenterTaintConfigMapName)) + spec, ok := cr["spec"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + kubeletConfig, ok := spec["kubeletConfig"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + taints, ok := kubeletConfig["registerWithTaints"].([]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(taints).To(HaveLen(len(karpenterutil.KarpenterBaseTaints))) + taint, ok := taints[0].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(taint["key"]).To(Equal(karpenterutil.KarpenterBaseTaints[0].Key)) + g.Expect(taint["value"]).To(Equal(karpenterutil.KarpenterBaseTaints[0].Value)) + g.Expect(taint["effect"]).To(Equal(string(karpenterutil.KarpenterBaseTaints[0].Effect))) + }) + + t.Run("When taint ConfigMap already exists it should be idempotent", func(t *testing.T) { + g := NewWithT(t) + ctx := log.IntoContext(t.Context(), testr.New(t)) + + existingCM := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: karpenterutil.KarpenterTaintConfigMapName, + Namespace: namespace, + }, + Data: map[string]string{"config": "old-data"}, + } + fakeManagementClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(existingCM).Build() + r := &Reconciler{ + ManagementClient: fakeManagementClient, + CreateOrUpdateProvider: upsert.New(false), + } + + err := r.reconcileTaintConfigMap(ctx, hcp) + g.Expect(err).NotTo(HaveOccurred()) + + cm := &corev1.ConfigMap{} + err = fakeManagementClient.Get(ctx, client.ObjectKey{Name: karpenterutil.KarpenterTaintConfigMapName, Namespace: namespace}, cm) + g.Expect(err).NotTo(HaveOccurred()) + var cr map[string]interface{} + err = yaml.Unmarshal([]byte(cm.Data["config"]), &cr) + g.Expect(err).NotTo(HaveOccurred()) + spec, ok := cr["spec"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + kubeletConfig, ok := spec["kubeletConfig"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + taints, ok := kubeletConfig["registerWithTaints"].([]interface{}) + g.Expect(ok).To(BeTrue()) + taint, ok := taints[0].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(taint["key"]).To(Equal(karpenterutil.KarpenterBaseTaints[0].Key)) + }) +} diff --git a/karpenter-operator/controllers/karpenterignition/karpenterignition_controller.go b/karpenter-operator/controllers/karpenterignition/karpenterignition_controller.go index 318b720ea9a..e28894a4058 100644 --- a/karpenter-operator/controllers/karpenterignition/karpenterignition_controller.go +++ b/karpenter-operator/controllers/karpenterignition/karpenterignition_controller.go @@ -2,8 +2,10 @@ package karpenterignition import ( "context" + "encoding/json" "errors" "fmt" + "strings" "time" hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" @@ -11,6 +13,7 @@ import ( "github.com/openshift/hypershift/control-plane-operator/controllers/hostedcontrolplane/common" "github.com/openshift/hypershift/hypershift-operator/controllers/nodepool" haproxy "github.com/openshift/hypershift/hypershift-operator/controllers/nodepool/apiserver-haproxy" + "github.com/openshift/hypershift/support/k8sutil" karpenterutil "github.com/openshift/hypershift/support/karpenter" "github.com/openshift/hypershift/support/releaseinfo" "github.com/openshift/hypershift/support/supportedversion" @@ -30,10 +33,12 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/cluster" "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" + "sigs.k8s.io/yaml" "github.com/blang/semver" ) @@ -44,6 +49,8 @@ const ( // nodePoolAnnotationCurrentConfigVersion mirrors the annotation from nodepool_controller.go // It's used to track the current config version for outdated token cleanup nodePoolAnnotationCurrentConfigVersion = "hypershift.openshift.io/nodePoolCurrentConfigVersion" + + kubeletConfigFinalizer = "hypershift.openshift.io/karpenter-kubelet-config-finalizer" ) type KarpenterIgnitionReconciler struct { @@ -108,6 +115,11 @@ func (r *KarpenterIgnitionReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{}, fmt.Errorf("failed to get OpenshiftEC2NodeClass: %w", err) } + // Handle deletion: clean up management-cluster ConfigMap before finalizer is removed + if !openshiftEC2NodeClass.DeletionTimestamp.IsZero() { + return r.reconcileDeletedNodeClass(ctx, hcp, openshiftEC2NodeClass) + } + hostedCluster, err := hostedClusterFromHCP(hcp, r.IgnitionEndpoint) if err != nil { return ctrl.Result{}, fmt.Errorf("failed to get HostedCluster: %w", err) @@ -144,6 +156,35 @@ func (r *KarpenterIgnitionReconciler) Reconcile(ctx context.Context, req ctrl.Re skewErr = detectVersionSkew(hostedCluster, version) } + // If spec.kubelet is configured, add a finalizer to clean up the configmap. We can't just use owner + // references because this is cross cluster (the configmap lives in the control plane) + if !openshiftEC2NodeClass.Spec.Kubelet.IsZero() { + + if !controllerutil.ContainsFinalizer(openshiftEC2NodeClass, kubeletConfigFinalizer) { + original := openshiftEC2NodeClass.DeepCopy() + controllerutil.AddFinalizer(openshiftEC2NodeClass, kubeletConfigFinalizer) + if err := r.GuestClient.Patch(ctx, openshiftEC2NodeClass, + client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{})); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to add kubelet config finalizer: %w", err) + } + } + } + + if err := r.reconcileKubeletConfigMap(ctx, hcp, openshiftEC2NodeClass); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to reconcile kubelet config configmap: %w", err) + } + + // The reconcile will have deleted the configmap if we make it here, so we can + // remove the finalizer + if openshiftEC2NodeClass.Spec.Kubelet.IsZero() && controllerutil.ContainsFinalizer(openshiftEC2NodeClass, kubeletConfigFinalizer) { + original := openshiftEC2NodeClass.DeepCopy() + controllerutil.RemoveFinalizer(openshiftEC2NodeClass, kubeletConfigFinalizer) + if err := r.GuestClient.Patch(ctx, openshiftEC2NodeClass, + client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{})); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to remove kubelet config finalizer: %w", err) + } + } + if err := r.reconcileNodeClassToken(ctx, hcp, hostedCluster, openshiftEC2NodeClass, releaseImage); err != nil { log.Error(err, "failed to reconcile token for OpenshiftEC2NodeClass", "name", openshiftEC2NodeClass.Name) // Still update version status so conditions are set even when token reconciliation fails. @@ -172,6 +213,45 @@ func (r *KarpenterIgnitionReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{}, nil } +// reconcileDeletedNodeClass handles cleanup when an OpenshiftEC2NodeClass is being deleted. +// It deletes the kubelet ConfigMap from the management cluster and removes the finalizer +// to allow the NodeClass deletion to proceed. +func (r *KarpenterIgnitionReconciler) reconcileDeletedNodeClass( + ctx context.Context, + hcp *hyperv1.HostedControlPlane, + openshiftEC2NodeClass *hyperkarpenterv1.OpenshiftEC2NodeClass, +) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + + if !controllerutil.ContainsFinalizer(openshiftEC2NodeClass, kubeletConfigFinalizer) { + return ctrl.Result{}, nil + } + + // Delete the kubelet ConfigMap from the management cluster + configMapName := karpenterutil.KarpenterNodeClassKubeletConfigName(openshiftEC2NodeClass.Name) + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: configMapName, + Namespace: hcp.Namespace, + }, + } + if _, err := k8sutil.DeleteIfNeeded(ctx, r.ManagementClient, cm); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to delete kubelet config configmap %s: %w", configMapName, err) + } + log.Info("Deleted kubelet config ConfigMap", "name", configMapName) + + // Remove the finalizer to allow NodeClass deletion to proceed + original := openshiftEC2NodeClass.DeepCopy() + controllerutil.RemoveFinalizer(openshiftEC2NodeClass, kubeletConfigFinalizer) + if err := r.GuestClient.Patch(ctx, openshiftEC2NodeClass, + client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{})); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to remove kubelet config finalizer: %w", err) + } + log.Info("Removed kubelet config finalizer from OpenshiftEC2NodeClass", "name", openshiftEC2NodeClass.Name) + + return ctrl.Result{}, nil +} + // reconcileNodeClassToken reconciles the ignition token and user-data secrets for an OpenshiftEC2NodeClass. func (r *KarpenterIgnitionReconciler) reconcileNodeClassToken( ctx context.Context, @@ -224,6 +304,21 @@ func (r *KarpenterIgnitionReconciler) createInMemoryNodePool( openshiftEC2NodeClass *hyperkarpenterv1.OpenshiftEC2NodeClass, releaseImage string, ) *hyperv1.NodePool { + // When Spec.Kubelet is set, the per-nodeclass kubelet ConfigMap is generated with + // KarpenterBaseTaintMap merged in (so registerWithTaints is always present), so + // set-karpenter-taint must NOT also be included — the MCO bootstrap rejects two + // KubeletConfigs targeting the same MachineConfigPool. + var configRefs []corev1.LocalObjectReference + if !openshiftEC2NodeClass.Spec.Kubelet.IsZero() { + configRefs = []corev1.LocalObjectReference{ + {Name: karpenterutil.KarpenterNodeClassKubeletConfigName(openshiftEC2NodeClass.Name)}, + } + } else { + configRefs = []corev1.LocalObjectReference{ + {Name: karpenterutil.KarpenterTaintConfigMapName}, + } + } + return &hyperv1.NodePool{ ObjectMeta: metav1.ObjectMeta{ Name: karpenterutil.KarpenterNodePoolName(openshiftEC2NodeClass), @@ -240,12 +335,8 @@ func (r *KarpenterIgnitionReconciler) createInMemoryNodePool( Release: hyperv1.Release{ Image: releaseImage, }, - Config: []corev1.LocalObjectReference{ - { - Name: karpenterutil.KarpenterTaintConfigMapName, - }, - }, - Arch: hyperv1.ArchitectureAMD64, // used to find default AMI + Config: configRefs, + Arch: hyperv1.ArchitectureAMD64, // used to find default AMI }, } } @@ -376,6 +467,92 @@ func detectVersionSkew(hostedCluster *hyperv1.HostedCluster, version string) err return supportedversion.ValidateVersionSkew(&hostedClusterVersion, &nodeClassVersion) } +// reconcileKubeletConfigMap creates, updates, or deletes the per-OpenshiftEC2NodeClass KubeletConfig ConfigMap +// in the HCP namespace based on whether the nodeclass has KubeletConfig set. +func (r *KarpenterIgnitionReconciler) reconcileKubeletConfigMap( + ctx context.Context, + hcp *hyperv1.HostedControlPlane, + openshiftEC2NodeClass *hyperkarpenterv1.OpenshiftEC2NodeClass, +) error { + configMapName := karpenterutil.KarpenterNodeClassKubeletConfigName(openshiftEC2NodeClass.Name) + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: configMapName, + Namespace: hcp.Namespace, + }, + } + + // Delete the configmap if there are no kubelet settings on the nodeclass + if openshiftEC2NodeClass.Spec.Kubelet.IsZero() { + if _, err := k8sutil.DeleteIfNeeded(ctx, r.ManagementClient, cm); err != nil { + return fmt.Errorf("failed to delete kubelet config configmap %s: %w", configMapName, err) + } + return nil + } + + // Convert user KubeletConfiguration to a plain map (via JSON round-trip to honor json tags + // and omitempty) + nodeClassKubeletRaw, err := json.Marshal(openshiftEC2NodeClass.Spec.Kubelet) + if err != nil { + return fmt.Errorf("failed to marshal user kubelet config: %w", err) + } + var nodeClassKubeletMap map[string]interface{} + if err := json.Unmarshal(nodeClassKubeletRaw, &nodeClassKubeletMap); err != nil { + return fmt.Errorf("failed to unmarshal user kubelet config: %w", err) + } + + // Merge nodeclass with the taint base, our taints go in last so they always win — + // registerWithTaints is not currently user-settable but this ordering ensures our taint + // can never be accidentally overridden + merged := mergeKubeletConfigMaps(nodeClassKubeletMap, karpenterutil.KarpenterBaseTaintMap()) + manifest, err := kubeletConfigManifest(configMapName, merged) + if err != nil { + return fmt.Errorf("failed to generate kubelet config manifest: %w", err) + } + + _, err = r.CreateOrUpdate(ctx, r.ManagementClient, cm, func() error { + if cm.Labels == nil { + cm.Labels = map[string]string{} + } + cm.Labels[karpenterutil.KarpenterNodeClassKubeletConfigLabel] = "true" + cm.Data = map[string]string{ + "config": manifest, + } + return nil + }) + return err +} + +// mergeKubeletConfigMaps merges two kubeletConfig maps. Base keys are included unless +// overlay defines the same key, in which case overlay wins. Callers should pass our +// required fields (e.g. taint base) as overlay so they are never clobbered by user config. +func mergeKubeletConfigMaps(base, overlay map[string]interface{}) map[string]interface{} { + result := make(map[string]interface{}, len(base)+len(overlay)) + for k, v := range base { + result[k] = v + } + for k, v := range overlay { + result[k] = v + } + return result +} + +// kubeletConfigManifest serializes a pre-merged kubeletConfig map into a KubeletConfig CR YAML +// string suitable for storage in a ConfigMap "config" key. +func kubeletConfigManifest(name string, kubeletConfig map[string]interface{}) (string, error) { + cr := map[string]interface{}{ + "apiVersion": "machineconfiguration.openshift.io/v1", + "kind": "KubeletConfig", + "metadata": map[string]interface{}{"name": name}, + "spec": map[string]interface{}{"kubeletConfig": kubeletConfig}, + } + out, err := yaml.Marshal(cr) + if err != nil { + return "", err + } + return strings.TrimSuffix(string(out), "\n"), nil +} + // buildConfigGenerator creates a ConfigGenerator for the in-memory NodePool func (r *KarpenterIgnitionReconciler) buildConfigGenerator( ctx context.Context, diff --git a/karpenter-operator/controllers/karpenterignition/karpenterignition_controller_test.go b/karpenter-operator/controllers/karpenterignition/karpenterignition_controller_test.go index 36b57045ef7..19807a66156 100644 --- a/karpenter-operator/controllers/karpenterignition/karpenterignition_controller_test.go +++ b/karpenter-operator/controllers/karpenterignition/karpenterignition_controller_test.go @@ -2,6 +2,7 @@ package karpenterignition import ( "context" + "encoding/json" "fmt" "strings" "testing" @@ -18,6 +19,7 @@ import ( "github.com/openshift/hypershift/support/releaseinfo" "github.com/openshift/hypershift/support/releaseinfo/testutils" "github.com/openshift/hypershift/support/thirdparty/library-go/pkg/image/dockerv1client" + "github.com/openshift/hypershift/support/upsert" fakeimagemetadataprovider "github.com/openshift/hypershift/support/util/fakeimagemetadataprovider" configv1 "github.com/openshift/api/config/v1" @@ -25,11 +27,13 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/yaml" "github.com/go-logr/logr/testr" "go.uber.org/mock/gomock" @@ -1139,3 +1143,894 @@ func findCondition(conditions []metav1.Condition, condType string) *metav1.Condi } return nil } + +func TestReconcileKubeletConfigMapOrphanCleanup(t *testing.T) { + scheme := api.Scheme + + baseHCP := func() *hyperv1.HostedControlPlane { + return &hyperv1.HostedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hcp", + Namespace: testNamespace, + }, + Spec: hyperv1.HostedControlPlaneSpec{ + ReleaseImage: "quay.io/openshift-release-dev/ocp-release:4.17.0-x86_64", + AutoNode: hyperv1.AutoNode{ + Provisioner: hyperv1.ProvisionerConfig{ + Name: hyperv1.ProvisionerKarpenter, + Karpenter: hyperv1.KarpenterConfig{ + Platform: hyperv1.AWSPlatform, + }, + }, + }, + }, + } + } + + kubeletConfig := hyperkarpenterv1.KubeletConfiguration{ + MaxPods: 500, + } + + t.Run("When spec.kubelet is set it should add the finalizer and create the ConfigMap", func(t *testing.T) { + g := NewWithT(t) + + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{Name: testNodeClassName}, + Spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ + Kubelet: kubeletConfig, + }, + } + hcp := baseHCP() + + fakeManagementClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(hcp).Build() + fakeGuestClient := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(nodeClass). + WithStatusSubresource(&hyperkarpenterv1.OpenshiftEC2NodeClass{}). + Build() + + r := &KarpenterIgnitionReconciler{ + ManagementClient: fakeManagementClient, + GuestClient: fakeGuestClient, + Namespace: testNamespace, + CreateOrUpdateProvider: upsert.New(false), + } + + ctx := log.IntoContext(t.Context(), testr.New(t)) + err := r.reconcileKubeletConfigMap(ctx, hcp, nodeClass) + g.Expect(err).NotTo(HaveOccurred()) + + // Add the finalizer as Reconcile() would + original := nodeClass.DeepCopy() + nodeClass.Finalizers = append(nodeClass.Finalizers, kubeletConfigFinalizer) + err = fakeGuestClient.Patch(ctx, nodeClass, client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{})) + g.Expect(err).NotTo(HaveOccurred()) + + // Verify ConfigMap was created + cm := &corev1.ConfigMap{} + err = fakeManagementClient.Get(ctx, client.ObjectKey{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, cm) + g.Expect(err).NotTo(HaveOccurred()) + + // Verify finalizer is present on the NodeClass + updated := &hyperkarpenterv1.OpenshiftEC2NodeClass{} + err = fakeGuestClient.Get(ctx, client.ObjectKey{Name: testNodeClassName}, updated) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(updated.Finalizers).To(ContainElement(kubeletConfigFinalizer)) + }) + + t.Run("When spec.kubelet is nil and finalizer is present it should remove the finalizer", func(t *testing.T) { + g := NewWithT(t) + + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNodeClassName, + Finalizers: []string{kubeletConfigFinalizer}, + }, + // Spec.Kubelet is nil + } + hcp := baseHCP() + + fakeGuestClient := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(nodeClass). + WithStatusSubresource(&hyperkarpenterv1.OpenshiftEC2NodeClass{}). + Build() + + r := &KarpenterIgnitionReconciler{ + ManagementClient: fake.NewClientBuilder().WithScheme(scheme).WithObjects(hcp).Build(), + GuestClient: fakeGuestClient, + Namespace: testNamespace, + CreateOrUpdateProvider: upsert.New(false), + } + + ctx := log.IntoContext(t.Context(), testr.New(t)) + + // Simulate the finalizer removal branch in Reconcile(): kubelet is nil, finalizer present + original := nodeClass.DeepCopy() + nodeClass.Finalizers = nil + err := r.GuestClient.Patch(ctx, nodeClass, client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{})) + g.Expect(err).NotTo(HaveOccurred()) + + updated := &hyperkarpenterv1.OpenshiftEC2NodeClass{} + err = fakeGuestClient.Get(ctx, client.ObjectKey{Name: testNodeClassName}, updated) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(updated.Finalizers).NotTo(ContainElement(kubeletConfigFinalizer)) + }) + + t.Run("When spec.kubelet is nil and finalizer is absent it should not add the finalizer", func(t *testing.T) { + g := NewWithT(t) + + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{Name: testNodeClassName}, + // Spec.Kubelet is nil, no finalizer + } + hcp := baseHCP() + + fakeManagementClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(hcp).Build() + fakeGuestClient := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(nodeClass). + WithStatusSubresource(&hyperkarpenterv1.OpenshiftEC2NodeClass{}). + Build() + + r := &KarpenterIgnitionReconciler{ + ManagementClient: fakeManagementClient, + GuestClient: fakeGuestClient, + Namespace: testNamespace, + CreateOrUpdateProvider: upsert.New(false), + } + + ctx := log.IntoContext(t.Context(), testr.New(t)) + err := r.reconcileKubeletConfigMap(ctx, hcp, nodeClass) + g.Expect(err).NotTo(HaveOccurred()) + + updated := &hyperkarpenterv1.OpenshiftEC2NodeClass{} + err = fakeGuestClient.Get(ctx, client.ObjectKey{Name: testNodeClassName}, updated) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(updated.Finalizers).NotTo(ContainElement(kubeletConfigFinalizer)) + }) + + // The following tests drive the full Reconcile() path to verify finalizer and + // kubelet ConfigMap behavior end-to-end, rather than calling sub-functions directly. + + fullHCP := func() *hyperv1.HostedControlPlane { + return &hyperv1.HostedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hcp", + Namespace: testNamespace, + }, + Spec: hyperv1.HostedControlPlaneSpec{ + ReleaseImage: "quay.io/openshift-release-dev/ocp-release:4.17.0-x86_64", + InfraID: "test-infra", + ClusterID: "test-cluster-id", + Platform: hyperv1.PlatformSpec{ + Type: hyperv1.AWSPlatform, + AWS: &hyperv1.AWSPlatformSpec{ + Region: "us-east-1", + }, + }, + Networking: hyperv1.ClusterNetworking{ + ServiceNetwork: []hyperv1.ServiceNetworkEntry{ + {CIDR: *ipnet.MustParseCIDR("172.31.0.0/16")}, + }, + }, + PullSecret: corev1.LocalObjectReference{ + Name: "pull-secret", + }, + AutoNode: hyperv1.AutoNode{ + Provisioner: hyperv1.ProvisionerConfig{ + Name: hyperv1.ProvisionerKarpenter, + Karpenter: hyperv1.KarpenterConfig{ + Platform: hyperv1.AWSPlatform, + }, + }, + }, + }, + Status: hyperv1.HostedControlPlaneStatus{ + Version: "4.17.0", + VersionStatus: &hyperv1.ClusterVersionStatus{ + Desired: configv1.Release{ + Version: "4.17.0", + }, + History: []configv1.UpdateHistory{ + { + State: configv1.CompletedUpdate, + Version: "4.17.0", + CompletionTime: &metav1.Time{Time: time.Now()}, + }, + }, + }, + KubeConfig: &hyperv1.KubeconfigSecretRef{ + Name: "admin-kubeconfig", + }, + }, + } + } + + fullManagementObjects := func(hcp *hyperv1.HostedControlPlane, extra ...client.Object) []client.Object { + objs := []client.Object{ + hcp, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "pull-secret", Namespace: testNamespace}, + Data: map[string][]byte{corev1.DockerConfigJsonKey: []byte(`{"auths":{}}`)}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "admin-kubeconfig", Namespace: testNamespace}, + Data: map[string][]byte{ + "kubeconfig": []byte(`apiVersion: v1 +clusters: +- cluster: + server: https://api.test-cluster.example.com:6443 + name: cluster +contexts: +- context: + cluster: cluster + user: "" + namespace: default + name: cluster +current-context: cluster +kind: Config`), + }, + }, + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "core-ignition-config-1", Namespace: testNamespace, + Labels: map[string]string{"hypershift.openshift.io/core-ignition-config": "true"}, + }, + }, + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "core-ignition-config-2", Namespace: testNamespace, + Labels: map[string]string{"hypershift.openshift.io/core-ignition-config": "true"}, + }, + }, + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "set-karpenter-taint", Namespace: testNamespace, + }, + Data: map[string]string{"config": ""}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "ignition-server-ca-cert", Namespace: testNamespace}, + Data: map[string][]byte{"tls.crt": []byte("fake-ca-cert")}, + }, + } + return append(objs, extra...) + } + + fullReconciler := func(t *testing.T, hcp *hyperv1.HostedControlPlane, nodeClass *hyperkarpenterv1.OpenshiftEC2NodeClass, extraMgmt ...client.Object) (*KarpenterIgnitionReconciler, client.Client) { + t.Helper() + mockCtrl := gomock.NewController(t) + mockedReleaseProvider := releaseinfo.NewMockProvider(mockCtrl) + mockedReleaseProvider.EXPECT().Lookup(gomock.Any(), gomock.Any(), gomock.Any()). + Return(testutils.InitReleaseImageOrDie("4.17.0"), nil).AnyTimes() + + fakeImgProvider := &fakeimagemetadataprovider.FakeRegistryClientImageMetadataProvider{ + Result: &dockerv1client.DockerImageConfig{ + Config: &docker10.DockerConfig{ + Labels: map[string]string{ + "io.openshift.hypershift.control-plane-operator-skips-haproxy": "true", + }, + }, + }, + } + + fakeManagementClient := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(fullManagementObjects(hcp, extraMgmt...)...).Build() + fakeGuestClient := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(nodeClass). + WithStatusSubresource(&hyperkarpenterv1.OpenshiftEC2NodeClass{}). + Build() + + r := &KarpenterIgnitionReconciler{ + ManagementClient: fakeManagementClient, + GuestClient: fakeGuestClient, + ReleaseProvider: mockedReleaseProvider, + VersionResolver: &fakeVersionResolver{}, + ImageMetadataProvider: fakeImgProvider, + HypershiftOperatorImage: "test-hypershift-operator-image", + IgnitionEndpoint: testIgnitionEndpoint, + Namespace: testNamespace, + CreateOrUpdateProvider: upsert.New(false), + } + return r, fakeGuestClient + } + + t.Run("When Reconcile runs with spec.kubelet set it should add the finalizer", func(t *testing.T) { + g := NewWithT(t) + + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{Name: testNodeClassName}, + Spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ + Kubelet: hyperkarpenterv1.KubeletConfiguration{ + MaxPods: 500, + }, + }, + } + + r, fakeGuestClient := fullReconciler(t, fullHCP(), nodeClass) + ctx := log.IntoContext(t.Context(), testr.New(t)) + + _, err := r.Reconcile(ctx, ctrl.Request{ + NamespacedName: client.ObjectKey{Name: testNodeClassName}, + }) + g.Expect(err).NotTo(HaveOccurred()) + + updated := &hyperkarpenterv1.OpenshiftEC2NodeClass{} + err = fakeGuestClient.Get(ctx, client.ObjectKey{Name: testNodeClassName}, updated) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(updated.Finalizers).To(ContainElement(kubeletConfigFinalizer)) + + cm := &corev1.ConfigMap{} + err = r.ManagementClient.Get(ctx, client.ObjectKey{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, cm) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("When Reconcile runs with spec.kubelet cleared it should remove the finalizer and delete the ConfigMap", func(t *testing.T) { + g := NewWithT(t) + + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNodeClassName, + Finalizers: []string{kubeletConfigFinalizer}, + }, + } + + existingCM := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, + Data: map[string]string{"config": "old-data"}, + } + + r, fakeGuestClient := fullReconciler(t, fullHCP(), nodeClass, existingCM) + ctx := log.IntoContext(t.Context(), testr.New(t)) + + _, err := r.Reconcile(ctx, ctrl.Request{ + NamespacedName: client.ObjectKey{Name: testNodeClassName}, + }) + g.Expect(err).NotTo(HaveOccurred()) + + updated := &hyperkarpenterv1.OpenshiftEC2NodeClass{} + err = fakeGuestClient.Get(ctx, client.ObjectKey{Name: testNodeClassName}, updated) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(updated.Finalizers).NotTo(ContainElement(kubeletConfigFinalizer)) + + cm := &corev1.ConfigMap{} + err = r.ManagementClient.Get(ctx, client.ObjectKey{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, cm) + g.Expect(err).To(HaveOccurred(), "ConfigMap should be deleted") + }) + + t.Run("When Reconcile runs with overflow kubelet fields they should appear in the ConfigMap", func(t *testing.T) { + g := NewWithT(t) + + kubeletJSON := `{"maxPods": 500, "registryPullQPS": 5, "containerLogMaxSize": "50Mi"}` + var kubeletCfg hyperkarpenterv1.KubeletConfiguration + g.Expect(json.Unmarshal([]byte(kubeletJSON), &kubeletCfg)).To(Succeed()) + + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{Name: testNodeClassName}, + Spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ + Kubelet: kubeletCfg, + }, + } + + r, fakeGuestClient := fullReconciler(t, fullHCP(), nodeClass) + ctx := log.IntoContext(t.Context(), testr.New(t)) + + _, err := r.Reconcile(ctx, ctrl.Request{ + NamespacedName: client.ObjectKey{Name: testNodeClassName}, + }) + g.Expect(err).NotTo(HaveOccurred()) + + // Verify finalizer was added + updated := &hyperkarpenterv1.OpenshiftEC2NodeClass{} + err = fakeGuestClient.Get(ctx, client.ObjectKey{Name: testNodeClassName}, updated) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(updated.Finalizers).To(ContainElement(kubeletConfigFinalizer)) + + // Verify ConfigMap content includes typed and overflow fields + cm := &corev1.ConfigMap{} + err = r.ManagementClient.Get(ctx, client.ObjectKey{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, cm) + g.Expect(err).NotTo(HaveOccurred()) + + var cr map[string]interface{} + err = yaml.Unmarshal([]byte(cm.Data["config"]), &cr) + g.Expect(err).NotTo(HaveOccurred()) + spec, ok := cr["spec"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + kubeletConfig, ok := spec["kubeletConfig"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + + g.Expect(kubeletConfig["maxPods"]).To(BeEquivalentTo(500)) + g.Expect(kubeletConfig["registryPullQPS"]).To(BeEquivalentTo(5)) + g.Expect(kubeletConfig["containerLogMaxSize"]).To(Equal("50Mi")) + + taints, ok := kubeletConfig["registerWithTaints"].([]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(taints).NotTo(BeEmpty()) + }) + + t.Run("When NodeClass is deleted with finalizer it should delete the ConfigMap and remove the finalizer", func(t *testing.T) { + g := NewWithT(t) + + // Use a sentinel finalizer alongside ours so the fake client doesn't auto-delete + // the object after reconcileDeletedNodeClass removes our finalizer. + now := metav1.Now() + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNodeClassName, + Finalizers: []string{kubeletConfigFinalizer, "other-controller-finalizer"}, + DeletionTimestamp: &now, + }, + Spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ + Kubelet: kubeletConfig, + }, + } + hcp := baseHCP() + + existingCM := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, + Data: map[string]string{"config": "existing-data"}, + } + + fakeManagementClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(hcp, existingCM).Build() + fakeGuestClient := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(nodeClass). + WithStatusSubresource(&hyperkarpenterv1.OpenshiftEC2NodeClass{}). + Build() + + r := &KarpenterIgnitionReconciler{ + ManagementClient: fakeManagementClient, + GuestClient: fakeGuestClient, + Namespace: testNamespace, + CreateOrUpdateProvider: upsert.New(false), + } + + ctx := log.IntoContext(t.Context(), testr.New(t)) + result, err := r.reconcileDeletedNodeClass(ctx, hcp, nodeClass) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(result).To(Equal(ctrl.Result{})) + + // ConfigMap should be deleted + cm := &corev1.ConfigMap{} + err = fakeManagementClient.Get(ctx, client.ObjectKey{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, cm) + g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(ContainSubstring("not found")) + + // Our finalizer should be removed; the sentinel remains so the object still exists + updated := &hyperkarpenterv1.OpenshiftEC2NodeClass{} + err = fakeGuestClient.Get(ctx, client.ObjectKey{Name: testNodeClassName}, updated) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(updated.Finalizers).NotTo(ContainElement(kubeletConfigFinalizer)) + }) + + t.Run("When NodeClass is deleted without the finalizer it should be a no-op", func(t *testing.T) { + g := NewWithT(t) + + // Use a sentinel finalizer so the fake client accepts the object with DeletionTimestamp. + now := metav1.Now() + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNodeClassName, + DeletionTimestamp: &now, + Finalizers: []string{"other-controller-finalizer"}, + // No kubeletConfigFinalizer + }, + } + hcp := baseHCP() + + existingCM := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, + Data: map[string]string{"config": "should-remain"}, + } + + fakeManagementClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(hcp, existingCM).Build() + fakeGuestClient := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(nodeClass). + WithStatusSubresource(&hyperkarpenterv1.OpenshiftEC2NodeClass{}). + Build() + + r := &KarpenterIgnitionReconciler{ + ManagementClient: fakeManagementClient, + GuestClient: fakeGuestClient, + Namespace: testNamespace, + CreateOrUpdateProvider: upsert.New(false), + } + + ctx := log.IntoContext(t.Context(), testr.New(t)) + result, err := r.reconcileDeletedNodeClass(ctx, hcp, nodeClass) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(result).To(Equal(ctrl.Result{})) + + // ConfigMap should remain untouched + cm := &corev1.ConfigMap{} + err = fakeManagementClient.Get(ctx, client.ObjectKey{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, cm) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(cm.Data["config"]).To(Equal("should-remain")) + }) + + t.Run("When NodeClass is deleted with finalizer but ConfigMap is already absent it should remove the finalizer without error", func(t *testing.T) { + g := NewWithT(t) + + // Use a sentinel finalizer alongside ours so the fake client doesn't auto-delete + // the object after reconcileDeletedNodeClass removes our finalizer. + now := metav1.Now() + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNodeClassName, + Finalizers: []string{kubeletConfigFinalizer, "other-controller-finalizer"}, + DeletionTimestamp: &now, + }, + } + hcp := baseHCP() + + // No ConfigMap pre-created + fakeManagementClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(hcp).Build() + fakeGuestClient := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(nodeClass). + WithStatusSubresource(&hyperkarpenterv1.OpenshiftEC2NodeClass{}). + Build() + + r := &KarpenterIgnitionReconciler{ + ManagementClient: fakeManagementClient, + GuestClient: fakeGuestClient, + Namespace: testNamespace, + CreateOrUpdateProvider: upsert.New(false), + } + + ctx := log.IntoContext(t.Context(), testr.New(t)) + result, err := r.reconcileDeletedNodeClass(ctx, hcp, nodeClass) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(result).To(Equal(ctrl.Result{})) + + // Our finalizer should be removed even though the ConfigMap was already gone + updated := &hyperkarpenterv1.OpenshiftEC2NodeClass{} + err = fakeGuestClient.Get(ctx, client.ObjectKey{Name: testNodeClassName}, updated) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(updated.Finalizers).NotTo(ContainElement(kubeletConfigFinalizer)) + }) +} + +func TestCreateInMemoryNodePool(t *testing.T) { + r := &KarpenterIgnitionReconciler{} + hcp := &hyperv1.HostedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hcp", + Namespace: testNamespace, + }, + Spec: hyperv1.HostedControlPlaneSpec{ + ReleaseImage: "quay.io/openshift-release-dev/ocp-release:4.17.0-x86_64", + }, + } + + t.Run("When kubelet config is nil it should only have taint config ref", func(t *testing.T) { + g := NewWithT(t) + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNodeClassName, + }, + // KubeletConfig is nil + } + + np := r.createInMemoryNodePool(hcp, nodeClass, hcp.Spec.ReleaseImage) + + g.Expect(np.Spec.Config).To(HaveLen(1)) + g.Expect(np.Spec.Config[0].Name).To(Equal(karpenterutil.KarpenterTaintConfigMapName)) + g.Expect(np.Name).To(Equal(karpenterutil.KarpenterNodePoolName(nodeClass))) + g.Expect(np.Namespace).To(Equal(hcp.Namespace)) + g.Expect(np.Labels).To(HaveKeyWithValue(karpenterutil.ManagedByKarpenterLabel, "true")) + g.Expect(np.Spec.ClusterName).To(Equal(hcp.Name)) + g.Expect(np.Spec.Replicas).To(Equal(ptr.To[int32](0))) + g.Expect(np.Spec.Release.Image).To(Equal(hcp.Spec.ReleaseImage)) + g.Expect(np.Spec.Arch).To(Equal(hyperv1.ArchitectureAMD64)) + }) + + t.Run("When kubelet config is set it should include only the per-nodeclass kubelet config ref", func(t *testing.T) { + g := NewWithT(t) + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNodeClassName, + }, + Spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ + Kubelet: hyperkarpenterv1.KubeletConfiguration{ + MaxPods: 500, + }, + }, + } + + np := r.createInMemoryNodePool(hcp, nodeClass, hcp.Spec.ReleaseImage) + + // When Spec.Kubelet is set, only the per-nodeclass kubelet config ref is included. + // set-karpenter-taint is omitted because the taint is merged into the per-nodeclass + // manifest via kubeletConfigManifest to avoid two KubeletConfigs targeting + // the same MachineConfigPool, which the MCO bootstrap rejects. + g.Expect(np.Spec.Config).To(HaveLen(1)) + g.Expect(np.Spec.Config[0].Name).To(Equal(karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName))) + g.Expect(np.Name).To(Equal(karpenterutil.KarpenterNodePoolName(nodeClass))) + g.Expect(np.Namespace).To(Equal(hcp.Namespace)) + g.Expect(np.Labels).To(HaveKeyWithValue(karpenterutil.ManagedByKarpenterLabel, "true")) + g.Expect(np.Spec.ClusterName).To(Equal(hcp.Name)) + g.Expect(np.Spec.Replicas).To(Equal(ptr.To[int32](0))) + g.Expect(np.Spec.Release.Image).To(Equal(hcp.Spec.ReleaseImage)) + g.Expect(np.Spec.Arch).To(Equal(hyperv1.ArchitectureAMD64)) + }) +} + +func TestReconcileKubeletConfigMap(t *testing.T) { + t.Run("When kubelet config is nil it should delete the config map", func(t *testing.T) { + g := NewWithT(t) + scheme := api.Scheme + + existingCM := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, + Data: map[string]string{"config": "old-data"}, + } + fakeManagementClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(existingCM). + Build() + + r := &KarpenterIgnitionReconciler{ + ManagementClient: fakeManagementClient, + CreateOrUpdateProvider: upsert.New(false), + } + hcp := &hyperv1.HostedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hcp", + Namespace: testNamespace, + }, + } + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{Name: testNodeClassName}, + // KubeletConfig is nil + } + + ctx := log.IntoContext(t.Context(), testr.New(t)) + err := r.reconcileKubeletConfigMap(ctx, hcp, nodeClass) + g.Expect(err).NotTo(HaveOccurred()) + + // ConfigMap should be deleted + cm := &corev1.ConfigMap{} + err = fakeManagementClient.Get(ctx, client.ObjectKey{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, cm) + g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(ContainSubstring("not found")) + }) + + t.Run("When kubelet config is set it should create config map with manifest including taint", func(t *testing.T) { + g := NewWithT(t) + scheme := api.Scheme + + fakeManagementClient := fake.NewClientBuilder(). + WithScheme(scheme). + Build() + + r := &KarpenterIgnitionReconciler{ + ManagementClient: fakeManagementClient, + CreateOrUpdateProvider: upsert.New(false), + } + hcp := &hyperv1.HostedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hcp", + Namespace: testNamespace, + }, + } + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{Name: testNodeClassName}, + Spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ + Kubelet: hyperkarpenterv1.KubeletConfiguration{ + MaxPods: 500, + }, + }, + } + + ctx := log.IntoContext(t.Context(), testr.New(t)) + err := r.reconcileKubeletConfigMap(ctx, hcp, nodeClass) + g.Expect(err).NotTo(HaveOccurred()) + + cm := &corev1.ConfigMap{} + err = fakeManagementClient.Get(ctx, client.ObjectKey{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, cm) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(cm.Name).To(Equal(karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName))) + g.Expect(cm.Namespace).To(Equal(testNamespace)) + g.Expect(cm.Labels).To(HaveKeyWithValue(karpenterutil.KarpenterNodeClassKubeletConfigLabel, "true")) + g.Expect(cm.Data).To(HaveKey("config")) + var cr map[string]interface{} + err = yaml.Unmarshal([]byte(cm.Data["config"]), &cr) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(cr["apiVersion"]).To(Equal("machineconfiguration.openshift.io/v1")) + g.Expect(cr["kind"]).To(Equal("KubeletConfig")) + spec, ok := cr["spec"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + kubeletConfig, ok := spec["kubeletConfig"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(kubeletConfig["maxPods"]).To(BeEquivalentTo(500)) + // The taint must be merged in so that set-karpenter-taint can be omitted from configRefs + // without losing the registerWithTaints behavior. + taints, ok := kubeletConfig["registerWithTaints"].([]interface{}) + g.Expect(ok).To(BeTrue()) + taint, ok := taints[0].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(taint["key"]).To(Equal(karpenterutil.KarpenterBaseTaints[0].Key)) + }) + + t.Run("When kubelet config has overflow fields they should appear in the config map manifest", func(t *testing.T) { + g := NewWithT(t) + scheme := api.Scheme + + fakeManagementClient := fake.NewClientBuilder(). + WithScheme(scheme). + Build() + + r := &KarpenterIgnitionReconciler{ + ManagementClient: fakeManagementClient, + CreateOrUpdateProvider: upsert.New(false), + } + hcp := &hyperv1.HostedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hcp", + Namespace: testNamespace, + }, + } + + // Create KubeletConfiguration with both typed and overflow fields + kubeletJSON := `{"maxPods": 500, "registryPullQPS": 5, "containerLogMaxSize": "50Mi"}` + var kubeletCfg hyperkarpenterv1.KubeletConfiguration + g.Expect(json.Unmarshal([]byte(kubeletJSON), &kubeletCfg)).To(Succeed()) + + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{Name: testNodeClassName}, + Spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ + Kubelet: kubeletCfg, + }, + } + + ctx := log.IntoContext(t.Context(), testr.New(t)) + err := r.reconcileKubeletConfigMap(ctx, hcp, nodeClass) + g.Expect(err).NotTo(HaveOccurred()) + + cm := &corev1.ConfigMap{} + err = fakeManagementClient.Get(ctx, client.ObjectKey{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, cm) + g.Expect(err).NotTo(HaveOccurred()) + + var cr map[string]interface{} + err = yaml.Unmarshal([]byte(cm.Data["config"]), &cr) + g.Expect(err).NotTo(HaveOccurred()) + spec, ok := cr["spec"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + kubeletConfig, ok := spec["kubeletConfig"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + + // Typed field + g.Expect(kubeletConfig["maxPods"]).To(BeEquivalentTo(500)) + // Overflow fields must survive the round-trip + g.Expect(kubeletConfig["registryPullQPS"]).To(BeEquivalentTo(5)) + g.Expect(kubeletConfig["containerLogMaxSize"]).To(Equal("50Mi")) + // Taint must still be merged + taints, ok := kubeletConfig["registerWithTaints"].([]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(taints).NotTo(BeEmpty()) + }) + + t.Run("When kubelet config is nil and config map does not exist it should not return an error", func(t *testing.T) { + g := NewWithT(t) + scheme := api.Scheme + + fakeManagementClient := fake.NewClientBuilder(). + WithScheme(scheme). + Build() + + r := &KarpenterIgnitionReconciler{ + ManagementClient: fakeManagementClient, + CreateOrUpdateProvider: upsert.New(false), + } + hcp := &hyperv1.HostedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hcp", + Namespace: testNamespace, + }, + } + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{Name: testNodeClassName}, + // KubeletConfig is nil + } + + ctx := log.IntoContext(t.Context(), testr.New(t)) + err := r.reconcileKubeletConfigMap(ctx, hcp, nodeClass) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("When kubelet config is set and config map already exists it should update the config map", func(t *testing.T) { + g := NewWithT(t) + scheme := api.Scheme + + existingCM := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, + Data: map[string]string{"config": "stale-data-maxPods-100"}, + } + fakeManagementClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(existingCM). + Build() + + r := &KarpenterIgnitionReconciler{ + ManagementClient: fakeManagementClient, + CreateOrUpdateProvider: upsert.New(false), + } + hcp := &hyperv1.HostedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hcp", + Namespace: testNamespace, + }, + } + nodeClass := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{Name: testNodeClassName}, + Spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ + Kubelet: hyperkarpenterv1.KubeletConfiguration{ + MaxPods: 250, + }, + }, + } + + ctx := log.IntoContext(t.Context(), testr.New(t)) + err := r.reconcileKubeletConfigMap(ctx, hcp, nodeClass) + g.Expect(err).NotTo(HaveOccurred()) + + cm := &corev1.ConfigMap{} + err = fakeManagementClient.Get(ctx, client.ObjectKey{ + Name: karpenterutil.KarpenterNodeClassKubeletConfigName(testNodeClassName), + Namespace: testNamespace, + }, cm) + g.Expect(err).NotTo(HaveOccurred()) + var cr map[string]interface{} + err = yaml.Unmarshal([]byte(cm.Data["config"]), &cr) + g.Expect(err).NotTo(HaveOccurred()) + spec, ok := cr["spec"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + kubeletConfig, ok := spec["kubeletConfig"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(kubeletConfig["maxPods"]).To(BeEquivalentTo(250)) + taints, ok := kubeletConfig["registerWithTaints"].([]interface{}) + g.Expect(ok).To(BeTrue()) + taint, ok := taints[0].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(taint["key"]).To(Equal(karpenterutil.KarpenterBaseTaints[0].Key)) + }) +} diff --git a/karpenter-operator/controllers/nodeclass/ec2_nodeclass_controller.go b/karpenter-operator/controllers/nodeclass/ec2_nodeclass_controller.go index 5350cca21fb..e7153ada5ba 100644 --- a/karpenter-operator/controllers/nodeclass/ec2_nodeclass_controller.go +++ b/karpenter-operator/controllers/nodeclass/ec2_nodeclass_controller.go @@ -295,6 +295,7 @@ func reconcileEC2NodeClass(ctx context.Context, ec2NodeClass *awskarpenterv1.EC2 InstanceStorePolicy: karpenterInstanceStorePolicyFromNodeClassSpec(openshiftEC2NodeClass.Spec), MetadataOptions: karpenterMetadataOptionsFromNodeClassSpec(openshiftEC2NodeClass.Spec), CapacityReservationSelectorTerms: karpenterCapacityReservationSelectorTermsFromNodeClassSpec(openshiftEC2NodeClass.Spec), + Kubelet: karpenterKubeletConfigurationFromNodeClassSpec(openshiftEC2NodeClass.Spec), } // Set instance profile from HostedCluster annotation (platform-controlled) diff --git a/karpenter-operator/controllers/nodeclass/karpenter_util.go b/karpenter-operator/controllers/nodeclass/karpenter_util.go index 22824689233..124b588c48e 100644 --- a/karpenter-operator/controllers/nodeclass/karpenter_util.go +++ b/karpenter-operator/controllers/nodeclass/karpenter_util.go @@ -3,12 +3,14 @@ package nodeclass import ( "fmt" "strings" + "time" hyperkarpenterv1 "github.com/openshift/hypershift/api/karpenter/v1" awskarpenterv1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" ) @@ -157,9 +159,65 @@ func volumeTypeToKarpenter(vt hyperkarpenterv1.VolumeType) *string { return &v } +func karpenterKubeletConfigurationFromNodeClassSpec(spec hyperkarpenterv1.OpenshiftEC2NodeClassSpec) *awskarpenterv1.KubeletConfiguration { + if !spec.Kubelet.HasTypedFields() { + return nil + } + return &awskarpenterv1.KubeletConfiguration{ + ImageGCHighThresholdPercent: spec.Kubelet.ImageGCHighThresholdPercent, + ImageGCLowThresholdPercent: spec.Kubelet.ImageGCLowThresholdPercent, + MaxPods: ptrIfNonZero(spec.Kubelet.MaxPods), + CPUCFSQuota: spec.Kubelet.CPUCFSQuota, + EvictionHard: evictionThresholdMapToStringMap(spec.Kubelet.EvictionHard), + EvictionSoft: evictionThresholdMapToStringMap(spec.Kubelet.EvictionSoft), + EvictionSoftGracePeriod: evictionSoftGracePeriodToDuration(spec.Kubelet.EvictionSoftGracePeriod), + EvictionMaxPodGracePeriod: spec.Kubelet.EvictionMaxPodGracePeriod, + PodsPerCore: ptrIfNonZero(spec.Kubelet.PodsPerCore), + SystemReserved: spec.Kubelet.SystemReserved, + KubeReserved: spec.Kubelet.KubeReserved, + } +} + +// evictionThresholdMapToStringMap converts our EvictionThreshold map to a plain string map. +// EvictionThreshold is a type definition (not a type alias) because controller-gen's deepcopy +// generator doesn't handle *types.Alias (https://github.com/kubernetes-sigs/controller-tools/issues/988), +// so we can't use `type EvictionThreshold = string` which would make this copy unnecessary. +func evictionThresholdMapToStringMap(m map[string]hyperkarpenterv1.EvictionThreshold) map[string]string { + if m == nil { + return nil + } + result := make(map[string]string, len(m)) + for k, v := range m { + result[k] = string(v) + } + return result +} + +func evictionSoftGracePeriodToDuration(m map[string]string) map[string]metav1.Duration { + if m == nil { + return nil + } + result := make(map[string]metav1.Duration, len(m)) + for k, v := range m { + d, err := time.ParseDuration(v) + if err != nil { + continue + } + result[k] = metav1.Duration{Duration: d} + } + return result +} + func ptrIfNonEmpty(s string) *string { if s == "" { return nil } return &s } + +func ptrIfNonZero(v int32) *int32 { + if v == 0 { + return nil + } + return ptr.To(v) +} diff --git a/karpenter-operator/controllers/nodeclass/karpenter_util_test.go b/karpenter-operator/controllers/nodeclass/karpenter_util_test.go new file mode 100644 index 00000000000..a490e047716 --- /dev/null +++ b/karpenter-operator/controllers/nodeclass/karpenter_util_test.go @@ -0,0 +1,113 @@ +package nodeclass + +import ( + "reflect" + "testing" + "time" + + hyperkarpenterv1 "github.com/openshift/hypershift/api/karpenter/v1" + + awskarpenterv1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/utils/ptr" +) + +func TestKarpenterKubeletConfigurationFromNodeClassSpec(t *testing.T) { + testCases := []struct { + name string + spec hyperkarpenterv1.OpenshiftEC2NodeClassSpec + expected *awskarpenterv1.KubeletConfiguration + }{ + { + name: "When Kubelet is nil it should return nil", + spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{}, + expected: nil, + }, + { + name: "When all karpenter-mapped fields are set it should map them", + spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ + Kubelet: hyperkarpenterv1.KubeletConfiguration{ + MaxPods: 110, + PodsPerCore: 10, + SystemReserved: map[string]string{ + "cpu": "100m", + "memory": "256Mi", + }, + KubeReserved: map[string]string{ + "cpu": "200m", + "memory": "512Mi", + }, + EvictionHard: map[string]hyperkarpenterv1.EvictionThreshold{ + "memory.available": "100Mi", + }, + EvictionSoft: map[string]hyperkarpenterv1.EvictionThreshold{ + "memory.available": "200Mi", + }, + EvictionSoftGracePeriod: map[string]string{ + "memory.available": "30s", + }, + EvictionMaxPodGracePeriod: ptr.To(int32(60)), + ImageGCHighThresholdPercent: ptr.To(int32(85)), + ImageGCLowThresholdPercent: ptr.To(int32(80)), + CPUCFSQuota: ptr.To(true), + }, + }, + expected: &awskarpenterv1.KubeletConfiguration{ + MaxPods: ptr.To(int32(110)), + PodsPerCore: ptr.To(int32(10)), + SystemReserved: map[string]string{ + "cpu": "100m", + "memory": "256Mi", + }, + KubeReserved: map[string]string{ + "cpu": "200m", + "memory": "512Mi", + }, + EvictionHard: map[string]string{ + "memory.available": "100Mi", + }, + EvictionSoft: map[string]string{ + "memory.available": "200Mi", + }, + EvictionSoftGracePeriod: map[string]metav1.Duration{ + "memory.available": {Duration: 30 * time.Second}, + }, + EvictionMaxPodGracePeriod: ptr.To(int32(60)), + ImageGCHighThresholdPercent: ptr.To(int32(85)), + ImageGCLowThresholdPercent: ptr.To(int32(80)), + CPUCFSQuota: ptr.To(true), + }, + }, + { + name: "When only some fields are set it should map only those", + spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ + Kubelet: hyperkarpenterv1.KubeletConfiguration{ + MaxPods: 50, + }, + }, + expected: &awskarpenterv1.KubeletConfiguration{ + MaxPods: ptr.To(int32(50)), + }, + }, + { + name: "When only overflow fields are set it should return nil", + spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ + Kubelet: hyperkarpenterv1.KubeletConfiguration{ + Overflow: runtime.RawExtension{Raw: []byte(`{"podPidsLimit":4096}`)}, + }, + }, + expected: nil, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := karpenterKubeletConfigurationFromNodeClassSpec(tc.spec) + if !reflect.DeepEqual(result, tc.expected) { + t.Errorf("expected %+v, got %+v", tc.expected, result) + } + }) + } +} diff --git a/support/karpenter/karpenter.go b/support/karpenter/karpenter.go index add76fc0969..e8a4b5ae246 100644 --- a/support/karpenter/karpenter.go +++ b/support/karpenter/karpenter.go @@ -4,11 +4,15 @@ import ( "context" "errors" "fmt" + "strings" hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" hyperkarpenterv1 "github.com/openshift/hypershift/api/karpenter/v1" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" ) const ( @@ -18,6 +22,10 @@ const ( // ManagedByKarpenterLabel is a label set on the userData secrets as being managed by Karpenter Operator ManagedByKarpenterLabel = "hypershift.openshift.io/managed-by-karpenter" + + // KarpenterNodeClassKubeletConfigLabel is a label set on per-OpenshiftEC2NodeClass KubeletConfig ConfigMaps + // in the HCP namespace so they can be discovered. + KarpenterNodeClassKubeletConfigLabel = "hypershift.openshift.io/karpenter-nodeclass-kubelet-config" ) const ( @@ -28,6 +36,52 @@ const ( KarpenterSubnetsConfigMapName = "karpenter-subnets" ) +// KarpenterBaseTaints is the set of taints applied to nodes registered by Karpenter. +var KarpenterBaseTaints = []corev1.Taint{ + { + Key: "karpenter.sh/unregistered", + Value: "true", + Effect: corev1.TaintEffectNoExecute, + }, +} + +// KarpenterBaseTaintMap returns the registerWithTaints kubelet config map built from KarpenterTaints. +// It is used by reconcileKubeletConfigMap to merge our taints into the user-provided kubelet config. +func KarpenterBaseTaintMap() map[string]interface{} { + taints := make([]interface{}, len(KarpenterBaseTaints)) + for i, t := range KarpenterBaseTaints { + taints[i] = map[string]interface{}{ + "key": t.Key, + "value": t.Value, + "effect": string(t.Effect), + } + } + return map[string]interface{}{ + "registerWithTaints": taints, + } +} + +// KarpenterTaintConfigManifest returns the KubeletConfig CR YAML for the set-karpenter-taint ConfigMap. +func KarpenterTaintConfigManifest() (string, error) { + cr := map[string]interface{}{ + "apiVersion": "machineconfiguration.openshift.io/v1", + "kind": "KubeletConfig", + "metadata": map[string]interface{}{"name": KarpenterTaintConfigMapName}, + "spec": map[string]interface{}{"kubeletConfig": KarpenterBaseTaintMap()}, + } + out, err := yaml.Marshal(cr) + if err != nil { + return "", err + } + return strings.TrimSuffix(string(out), "\n"), nil +} + +// KarpenterNodeClassKubeletConfigName returns the name of the ConfigMap containing the +// per-OpenshiftEC2NodeClass KubeletConfig in the HCP namespace. +func KarpenterNodeClassKubeletConfigName(nodeClassName string) string { + return fmt.Sprintf("karpenter-kubelet-%s", nodeClassName) +} + // ErrHCPNotFound is returned when no HostedControlPlane is found in the namespace. var ErrHCPNotFound = errors.New("hostedcontrolplane not found") diff --git a/support/karpenter/karpenter_test.go b/support/karpenter/karpenter_test.go index fbcffecce0a..4daa90b6e8c 100644 --- a/support/karpenter/karpenter_test.go +++ b/support/karpenter/karpenter_test.go @@ -14,6 +14,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/yaml" ) func TestGetHCP(t *testing.T) { @@ -165,4 +166,71 @@ func TestArchToAMILabelKey(t *testing.T) { g.Expect(ArchToAMILabelKey(tc.arch)).To(Equal(tc.expected)) }) } + +} +func TestKarpenterTaintConfigManifest(t *testing.T) { + t.Run("When called it should return a valid KubeletConfig CR with correct apiVersion and kind", func(t *testing.T) { + g := NewWithT(t) + + manifest, err := KarpenterTaintConfigManifest() + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(manifest).NotTo(BeEmpty()) + + var cr map[string]interface{} + err = yaml.Unmarshal([]byte(manifest), &cr) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(cr["apiVersion"]).To(Equal("machineconfiguration.openshift.io/v1")) + g.Expect(cr["kind"]).To(Equal("KubeletConfig")) + metadata, ok := cr["metadata"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(metadata["name"]).To(Equal(KarpenterTaintConfigMapName)) + }) + + t.Run("When called it should embed all KarpenterBaseTaints in registerWithTaints with correct fields", func(t *testing.T) { + g := NewWithT(t) + + manifest, err := KarpenterTaintConfigManifest() + g.Expect(err).NotTo(HaveOccurred()) + + var cr map[string]interface{} + err = yaml.Unmarshal([]byte(manifest), &cr) + g.Expect(err).NotTo(HaveOccurred()) + spec, ok := cr["spec"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + kubeletConfig, ok := spec["kubeletConfig"].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + taints, ok := kubeletConfig["registerWithTaints"].([]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(taints).To(HaveLen(len(KarpenterBaseTaints))) + taint, ok := taints[0].(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(taint["key"]).To(Equal(KarpenterBaseTaints[0].Key)) + g.Expect(taint["value"]).To(Equal(KarpenterBaseTaints[0].Value)) + g.Expect(taint["effect"]).To(Equal(string(KarpenterBaseTaints[0].Effect))) + }) +} + +func TestKarpenterBaseTaintMap(t *testing.T) { + t.Run("When called it should return a map with the registerWithTaints key", func(t *testing.T) { + g := NewWithT(t) + + m := KarpenterBaseTaintMap() + g.Expect(m).To(HaveKey("registerWithTaints")) + }) + + t.Run("When called it should include the correct key, value, and effect for each taint", func(t *testing.T) { + g := NewWithT(t) + + m := KarpenterBaseTaintMap() + taints, ok := m["registerWithTaints"].([]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(taints).To(HaveLen(len(KarpenterBaseTaints))) + for i, entry := range taints { + taint, ok := entry.(map[string]interface{}) + g.Expect(ok).To(BeTrue()) + g.Expect(taint["key"]).To(Equal(KarpenterBaseTaints[i].Key)) + g.Expect(taint["value"]).To(Equal(KarpenterBaseTaints[i].Value)) + g.Expect(taint["effect"]).To(Equal(string(KarpenterBaseTaints[i].Effect))) + } + }) } diff --git a/test/e2e/karpenter_kubelet_checker_pod.yaml b/test/e2e/karpenter_kubelet_checker_pod.yaml new file mode 100644 index 00000000000..754a60f4835 --- /dev/null +++ b/test/e2e/karpenter_kubelet_checker_pod.yaml @@ -0,0 +1,76 @@ +apiVersion: v1 +kind: Pod +metadata: + name: kubelet-config-checker + namespace: kube-system +spec: + restartPolicy: Never + containers: + - name: checker + image: alpine + command: + - /bin/sh + - -c + - | + set -u + FILE=/host/etc/kubernetes/kubelet.conf + ENV_FILE=/host/etc/node-sizing-enabled.env + FAIL=0 + + check() { + local label="$1"; shift + if grep -q "$@" "$FILE"; then + echo "PASS: $label" + else + echo "FAIL: $label (pattern: $*)" + FAIL=1 + fi + } + + check_env() { + local label="$1"; shift + if grep -q "$@" "$ENV_FILE"; then + echo "PASS: $label" + else + echo "FAIL: $label (pattern: $*)" + FAIL=1 + fi + } + + echo "=== kubelet.conf ===" + cat "$FILE" + echo "=== node-sizing-enabled.env ===" + cat "$ENV_FILE" + echo "=== grep checks ===" + + check "maxPods: 203" 'maxPods: 203' + check "podsPerCore: 11" 'podsPerCore: 11' + check "cpuCFSQuota: false" 'cpuCFSQuota: false' + check "evictionMaxPodGracePeriod: 31" 'evictionMaxPodGracePeriod: 31' + check "imageGCHighThresholdPercent: 81" 'imageGCHighThresholdPercent: 81' + check "imageGCLowThresholdPercent: 71" 'imageGCLowThresholdPercent: 71' + check "kubeReserved cpu: 520m" 'cpu: 520m' + check "kubeReserved memory: 531Mi" 'memory: 531Mi' + check "evictionHard memory.available 201Mi" 'memory.available: 201Mi' + check "evictionHard nodefs.available 11%" 'nodefs.available: 11%' + check "evictionSoft memory.available 401Mi" 'memory.available: 401Mi' + check "evictionSoft nodefs.available 16%" 'nodefs.available: 16%' + check "evictionSoftGracePeriod 1m31s" '1m31s' + check "evictionSoftGracePeriod 2m5s" '2m5s' + check "podPidsLimit: 4096" 'podPidsLimit: 4096' + check "containerLogMaxSize: 50Mi" 'containerLogMaxSize: 50Mi' + + check_env "SYSTEM_RESERVED_CPU=510m" 'SYSTEM_RESERVED_CPU=510m' + check_env "SYSTEM_RESERVED_MEMORY=521Mi" 'SYSTEM_RESERVED_MEMORY=521Mi' + + exit $FAIL + securityContext: + privileged: true + volumeMounts: + - name: host + mountPath: /host + readOnly: true + volumes: + - name: host + hostPath: + path: / diff --git a/test/e2e/karpenter_test.go b/test/e2e/karpenter_test.go index 2e877941944..f551d72c941 100644 --- a/test/e2e/karpenter_test.go +++ b/test/e2e/karpenter_test.go @@ -5,6 +5,7 @@ package e2e import ( "bufio" "context" + _ "embed" "encoding/json" "fmt" "io" @@ -46,8 +47,20 @@ import ( "k8s.io/utils/ptr" crclient "sigs.k8s.io/controller-runtime/pkg/client" karpenterv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + "sigs.k8s.io/yaml" ) +//go:embed karpenter_kubelet_checker_pod.yaml +var kubeletCheckerPodRaw []byte + +var kubeletCheckerPodTemplate = func() *corev1.Pod { + pod := &corev1.Pod{} + if err := yaml.Unmarshal(kubeletCheckerPodRaw, pod); err != nil { + panic(err) + } + return pod +}() + func TestKarpenter(t *testing.T) { e2eutil.AtLeast(t, e2eutil.Version422) if globalOpts.Platform != hyperv1.AWSPlatform { @@ -80,6 +93,7 @@ func TestKarpenter(t *testing.T) { t.Run("OpenshiftEC2NodeClass version field and MetadataOptions", testNodeClassVersionField(ctx, mgtClient, guestClient, hostedCluster, awsCredsFile, awsRegion, pullSecretFile)) t.Run("Capacity reservation selector propagation", testCapacityReservation(ctx, mgtClient, guestClient, hostedCluster, awsCredsFile, awsRegion)) t.Run("Arbitrary subnet propagation", testArbitrarySubnet(ctx, mgtClient, guestClient, hostedCluster, awsCredsFile, awsRegion)) + t.Run("OpenshiftEC2NodeClass Kubelet propagation", testKubeletPropagation(ctx, mgtClient, guestClient, hostedCluster)) }) t.Run("AutoNode enable/disable lifecycle", testAutoNodeLifecycle(ctx, mgtClient, hostedCluster)) @@ -1162,6 +1176,193 @@ func testArbitrarySubnet(ctx context.Context, mgtClient, guestClient crclient.Cl } } +func testKubeletPropagation(ctx context.Context, mgtClient, guestClient crclient.Client, hostedCluster *hyperv1.HostedCluster) func(t *testing.T) { + return func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + hcpNamespace := manifests.HostedControlPlaneNamespace(hostedCluster.Namespace, hostedCluster.Name) + + // Create a custom OpenshiftEC2NodeClass that the controller does not manage, so that + // reconcileOpenshiftEC2NodeClassDefault cannot overwrite spec.kubelet on every reconcile. + // We picked weird non-round numbers specifically so we know it wasn't getting defaulted. + // + // Build KubeletConfiguration via JSON unmarshal so that overflow (non-typed) fields + // like podPidsLimit and containerLogMaxSize are captured. The overflow mechanism + // requires JSON deserialization because the overflow map is unexported. + kubeletJSON := `{ + "maxPods": 203, + "podsPerCore": 11, + "systemReserved": {"cpu": "510m", "memory": "521Mi"}, + "kubeReserved": {"cpu": "520m", "memory": "531Mi"}, + "evictionHard": {"memory.available": "201Mi", "nodefs.available": "11%"}, + "evictionSoft": {"memory.available": "401Mi", "nodefs.available": "16%"}, + "evictionSoftGracePeriod": {"memory.available": "1m31s", "nodefs.available": "2m5s"}, + "evictionMaxPodGracePeriod": 31, + "imageGCHighThresholdPercent": 81, + "imageGCLowThresholdPercent": 71, + "cpuCFSQuota": false, + "podPidsLimit": 4096, + "containerLogMaxSize": "50Mi" + }` + var kubeletConfig hyperkarpenterv1.KubeletConfiguration + g.Expect(json.Unmarshal([]byte(kubeletJSON), &kubeletConfig)).To(Succeed()) + + nc := &hyperkarpenterv1.OpenshiftEC2NodeClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "kubelet-config-test", + }, + Spec: hyperkarpenterv1.OpenshiftEC2NodeClassSpec{ + Kubelet: kubeletConfig, + }, + } + g.Expect(guestClient.Create(ctx, nc)).To(Succeed()) + t.Logf("Created OpenshiftEC2NodeClass %q with kubelet config", nc.Name) + t.Cleanup(func() { + _ = guestClient.Delete(ctx, nc) + }) + + // Wait for the per-nodeclass KubeletConfig ConfigMap to appear in the HCP namespace. + kubeletCMName := karpenterutil.KarpenterNodeClassKubeletConfigName(nc.Name) + e2eutil.EventuallyObject(t, ctx, fmt.Sprintf("KubeletConfig ConfigMap %s/%s to appear with correct content", hcpNamespace, kubeletCMName), + func(ctx context.Context) (*corev1.ConfigMap, error) { + cm := &corev1.ConfigMap{} + err := mgtClient.Get(ctx, crclient.ObjectKey{Name: kubeletCMName, Namespace: hcpNamespace}, cm) + return cm, err + }, + []e2eutil.Predicate[*corev1.ConfigMap]{ + func(cm *corev1.ConfigMap) (done bool, reasons string, err error) { + if cm.Labels[karpenterutil.KarpenterNodeClassKubeletConfigLabel] != "true" { + return false, fmt.Sprintf("missing label %s=true", karpenterutil.KarpenterNodeClassKubeletConfigLabel), nil + } + return true, "label present", nil + }, + func(cm *corev1.ConfigMap) (done bool, reasons string, err error) { + config := cm.Data["config"] + for _, field := range []string{"maxPods", "podsPerCore", "cpuCFSQuota", "podPidsLimit", "containerLogMaxSize"} { + if !strings.Contains(config, field) { + return false, fmt.Sprintf("config missing field %q", field), nil + } + } + return true, "all required fields present in config", nil + }, + }, + e2eutil.WithTimeout(2*time.Minute), + e2eutil.WithInterval(5*time.Second), + ) + t.Logf("KubeletConfig ConfigMap %s is present and correct", kubeletCMName) + + // Wait for the karpenterignition controller to issue the ignition token with kubelet config. + // The annotation is set after token.Reconcile() succeeds, guaranteeing Karpenter will use + // the token (with kubelet config) when provisioning new nodes. + e2eutil.EventuallyObject(t, ctx, fmt.Sprintf("OpenshiftEC2NodeClass %q to have ignition token annotation", nc.Name), + func(ctx context.Context) (*hyperkarpenterv1.OpenshiftEC2NodeClass, error) { + updated := &hyperkarpenterv1.OpenshiftEC2NodeClass{} + err := guestClient.Get(ctx, crclient.ObjectKey{Name: nc.Name}, updated) + return updated, err + }, + []e2eutil.Predicate[*hyperkarpenterv1.OpenshiftEC2NodeClass]{ + func(nc *hyperkarpenterv1.OpenshiftEC2NodeClass) (done bool, reasons string, err error) { + v := nc.GetAnnotations()["hypershift.openshift.io/nodeClassCurrentConfigVersion"] + if v == "" { + return false, "annotation hypershift.openshift.io/nodeClassCurrentConfigVersion not yet set", nil + } + return true, fmt.Sprintf("annotation set to %q", v), nil + }, + }, + e2eutil.WithTimeout(2*time.Minute), + e2eutil.WithInterval(5*time.Second), + ) + t.Logf("Ignition token annotation set on %q", nc.Name) + + // Wait for the OpenshiftEC2NodeClass to be fully Ready before creating the NodePool. + // Karpenter ignores NodePools whose referenced EC2NodeClass is not Ready — the ignition + // annotation above is set before AWS resource discovery (SecurityGroups, Subnets) completes, + // so we must wait for the Ready condition explicitly to avoid provisioning delays. + t.Logf("Make sure OpenshiftEC2NodeClass %q is Ready before nodepool creation", nc.Name) + e2eutil.EventuallyObject(t, ctx, fmt.Sprintf("OpenshiftEC2NodeClass %q to be Ready", nc.Name), + func(ctx context.Context) (*hyperkarpenterv1.OpenshiftEC2NodeClass, error) { + updated := &hyperkarpenterv1.OpenshiftEC2NodeClass{} + err := guestClient.Get(ctx, crclient.ObjectKey{Name: nc.Name}, updated) + return updated, err + }, + []e2eutil.Predicate[*hyperkarpenterv1.OpenshiftEC2NodeClass]{ + e2eutil.ConditionPredicate[*hyperkarpenterv1.OpenshiftEC2NodeClass](e2eutil.Condition{ + Type: "Ready", + Status: metav1.ConditionTrue, + }), + }, + e2eutil.WithTimeout(5*time.Minute), + ) + t.Logf("OpenshiftEC2NodeClass %q is Ready", nc.Name) + + // Create Karpenter NodePool pointing at the custom nodeclass and workloads to provision nodes. + testNodePool := baseNodePool("kubelet-config-test", nc.Name) + testWorkLoads := testWorkload("kubelet-config-web-app", 1, map[string]string{ + karpenterv1.NodePoolLabelKey: testNodePool.Name, + }) + + g.Expect(guestClient.Create(ctx, testNodePool)).To(Succeed()) + t.Logf("Created Karpenter NodePool %s", testNodePool.Name) + g.Expect(guestClient.Create(ctx, testWorkLoads)).To(Succeed()) + t.Logf("Created workloads %s", testWorkLoads.Name) + t.Cleanup(func() { + _ = guestClient.Delete(ctx, testWorkLoads) + _ = guestClient.Delete(ctx, testNodePool) + }) + + testNodeLabels := map[string]string{ + karpenterv1.NodePoolLabelKey: testNodePool.Name, + } + + // Wait for nodes to be provisioned + e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hostedCluster.Spec.Platform.Type, 1, testNodeLabels) + t.Logf("Karpenter nodes are ready") + + // Build a clientset for the guest cluster (needed for pod log fetching) + guestConfig := e2eutil.WaitForGuestRestConfig(t, ctx, mgtClient, hostedCluster) + guestClientset, err := kubeclient.NewForConfig(guestConfig) + g.Expect(err).NotTo(HaveOccurred()) + + // Run a privileged pod on the karpenter node that prints kubelet.conf then + // greps each expected field, exiting non-zero if any is missing. + checkerPod := kubeletCheckerPodTemplate.DeepCopy() + checkerPod.Spec.NodeSelector = testNodeLabels + checkerPod.Spec.Tolerations = []corev1.Toleration{{Operator: corev1.TolerationOpExists}} + g.Expect(guestClient.Create(ctx, checkerPod)).To(Succeed()) + t.Cleanup(func() { _ = guestClient.Delete(ctx, checkerPod) }) + t.Logf("Created kubelet-config-checker pod on nodepool %s", testNodePool.Name) + + // Wait for the pod to complete (Succeeded or Failed) + // This is intentionally not an EventuallyObject because we need to do something on either state + g.Eventually(func(g Gomega) { + p := &corev1.Pod{} + g.Expect(guestClient.Get(ctx, crclient.ObjectKeyFromObject(checkerPod), p)).To(Succeed()) + g.Expect(p.Status.Phase).To(BeElementOf(corev1.PodSucceeded, corev1.PodFailed)) + }).WithTimeout(5 * time.Minute).WithPolling(5 * time.Second).Should(Succeed()) + + // Always fetch and log the pod output so it's visible in the test run + logReq := guestClientset.CoreV1().Pods(checkerPod.Namespace).GetLogs(checkerPod.Name, &corev1.PodLogOptions{Container: "checker"}) + logStream, err := logReq.Stream(ctx) + g.Expect(err).NotTo(HaveOccurred()) + defer logStream.Close() + logBytes, err := io.ReadAll(logStream) + g.Expect(err).NotTo(HaveOccurred()) + t.Logf("kubelet-config-checker output:\n%s", string(logBytes)) + + // Assert the pod succeeded (grep chain exited 0 = all fields found) + p := &corev1.Pod{} + g.Expect(guestClient.Get(ctx, crclient.ObjectKeyFromObject(checkerPod), p)).To(Succeed()) + g.Expect(p.Status.Phase).To(Equal(corev1.PodSucceeded), "kubelet config fields not all found — see pod output above") + t.Logf("kubelet config fields confirmed on node") + + // Cleanup workloads and NodePool + g.Expect(guestClient.Delete(ctx, testWorkLoads)).To(Succeed()) + g.Expect(guestClient.Delete(ctx, testNodePool)).To(Succeed()) + _ = e2eutil.WaitForReadyNodesByLabels(t, ctx, guestClient, hostedCluster.Spec.Platform.Type, 0, testNodeLabels) + } +} + func testAutoNodeLifecycle(ctx context.Context, mgtClient crclient.Client, hostedCluster *hyperv1.HostedCluster) func(t *testing.T) { return func(t *testing.T) { g := NewWithT(t) diff --git a/vendor/github.com/openshift/hypershift/api/karpenter/v1/karpenter_types.go b/vendor/github.com/openshift/hypershift/api/karpenter/v1/karpenter_types.go index beba4471e10..902c6e1a95c 100644 --- a/vendor/github.com/openshift/hypershift/api/karpenter/v1/karpenter_types.go +++ b/vendor/github.com/openshift/hypershift/api/karpenter/v1/karpenter_types.go @@ -275,6 +275,11 @@ type OpenshiftEC2NodeClassSpec struct { // +kubebuilder:validation:MaxLength=64 // +optional Version string `json:"version,omitempty"` + + // kubelet configures kubelet settings for nodes provisioned by this NodeClass. + // These settings are injected into the node's ignition configuration via MachineConfig. + // +optional + Kubelet KubeletConfiguration `json:"kubelet,omitzero"` } // SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. diff --git a/vendor/github.com/openshift/hypershift/api/karpenter/v1/kubelet_config.go b/vendor/github.com/openshift/hypershift/api/karpenter/v1/kubelet_config.go new file mode 100644 index 00000000000..7e3b8b75779 --- /dev/null +++ b/vendor/github.com/openshift/hypershift/api/karpenter/v1/kubelet_config.go @@ -0,0 +1,219 @@ +package v1 + +import ( + "encoding/json" + "reflect" + "strings" + + "k8s.io/apimachinery/pkg/runtime" +) + +// kubeletConfigKnownFields is the set of JSON keys corresponding to the explicitly typed +// fields in KubeletConfiguration. It is derived from the struct's json tags at init time +// so it stays in sync automatically when fields are added or removed. +var kubeletConfigKnownFields map[string]struct{} + +func init() { + t := reflect.TypeOf(KubeletConfiguration{}) + kubeletConfigKnownFields = make(map[string]struct{}, t.NumField()) + for i := range t.NumField() { + f := t.Field(i) + if tag, ok := f.Tag.Lookup("json"); ok { + name, _, _ := strings.Cut(tag, ",") + if name != "" && name != "-" { + kubeletConfigKnownFields[name] = struct{}{} + } + } + } +} + +// EvictionThreshold is a threshold value for a kubelet eviction signal. +// Values are either a percentage (e.g. "10%") or a Kubernetes quantity (e.g. "100Mi"). +// +kubebuilder:validation:MaxLength=64 +type EvictionThreshold string + +// KubeletConfiguration configures kubelet settings for nodes provisioned by this NodeClass. +// These settings are injected into the node's ignition configuration via MachineConfig. +// The fields listed below are validated at admission time. Additional kubelet configuration +// fields beyond those listed here are also accepted and will be passed through to the node's +// kubelet configuration without validation. Overflow fields bypass all CRD validation; +// invalid overflow values will cause node bootstrap failures (kubelet crash loop) rather +// than admission errors. +// When graduating new fields from overflow to typed fields, match upstream kubelet's +// field names and types exactly. See api/AGENTS.md "KubeletConfiguration Field Graduation" +// for the full strategy. +// +kubebuilder:pruning:PreserveUnknownFields +// +kubebuilder:validation:XValidation:rule="!has(self.imageGCHighThresholdPercent) || !has(self.imageGCLowThresholdPercent) || self.imageGCHighThresholdPercent > self.imageGCLowThresholdPercent",message="imageGCHighThresholdPercent must be greater than imageGCLowThresholdPercent" +// +kubebuilder:validation:XValidation:rule="!has(self.podsPerCore) || !has(self.maxPods) || self.podsPerCore <= self.maxPods",message="podsPerCore must not exceed maxPods" +// +kubebuilder:validation:XValidation:rule="!has(self.evictionSoft) || (has(self.evictionSoftGracePeriod) && self.evictionSoft.all(e, e in self.evictionSoftGracePeriod))",message="evictionSoft entry does not have a matching evictionSoftGracePeriod" +// +kubebuilder:validation:XValidation:rule="!has(self.evictionSoftGracePeriod) || (has(self.evictionSoft) && self.evictionSoftGracePeriod.all(e, e in self.evictionSoft))",message="evictionSoftGracePeriod entry does not have a matching evictionSoft" +// +kubebuilder:validation:XValidation:rule="!has(self.evictionHard) || !has(self.evictionSoft) || self.evictionHard.all(key, !(key in self.evictionSoft) || ((self.evictionSoft[key].endsWith('%') && self.evictionHard[key].endsWith('%')) ? (self.evictionSoft[key].size() <= 1 || self.evictionHard[key].size() <= 1 || double(self.evictionSoft[key].substring(0, self.evictionSoft[key].size() - 1)) >= double(self.evictionHard[key].substring(0, self.evictionHard[key].size() - 1))) : (!(isQuantity(self.evictionSoft[key]) && isQuantity(self.evictionHard[key])) || quantity(self.evictionSoft[key]).compareTo(quantity(self.evictionHard[key])) >= 0)))",message="evictionSoft threshold must be greater than or equal to evictionHard threshold for the same signal (soft eviction should fire before hard)" +type KubeletConfiguration struct { + // maxPods is the maximum number of pods that can run on a node. + // The value must be between 1 and 2500, inclusive. + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=2500 + // +optional + MaxPods int32 `json:"maxPods,omitempty"` + // podsPerCore is the maximum number of pods per core. The value must be between 1 and 2500, + // inclusive, and cannot exceed maxPods when both are set. + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=2500 + // +optional + PodsPerCore int32 `json:"podsPerCore,omitempty"` + // systemReserved is a set of ResourceName=ResourceQuantity pairs that describe + // resources reserved for non-kubernetes components. + // Currently only cpu, memory, ephemeral-storage, and pid are supported. + // +kubebuilder:validation:XValidation:message="valid keys for systemReserved are ['cpu','memory','ephemeral-storage','pid']",rule="self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' || x=='pid')" + // +kubebuilder:validation:XValidation:message="systemReserved value cannot be a negative resource quantity",rule="self.all(x, !self[x].startsWith('-'))" + // +kubebuilder:validation:XValidation:message="systemReserved values must not be empty",rule="self.all(x, self[x].size() > 0)" + // +kubebuilder:validation:MinProperties=1 + // +kubebuilder:validation:MaxProperties=20 + // +optional + SystemReserved map[string]string `json:"systemReserved,omitempty"` + // kubeReserved is a set of ResourceName=ResourceQuantity pairs that describe + // resources reserved for kubernetes system components. + // Currently only cpu, memory, ephemeral-storage, and pid are supported. + // +kubebuilder:validation:XValidation:message="valid keys for kubeReserved are ['cpu','memory','ephemeral-storage','pid']",rule="self.all(x, x=='cpu' || x=='memory' || x=='ephemeral-storage' || x=='pid')" + // +kubebuilder:validation:XValidation:message="kubeReserved value cannot be a negative resource quantity",rule="self.all(x, !self[x].startsWith('-'))" + // +kubebuilder:validation:XValidation:message="kubeReserved values must not be empty",rule="self.all(x, self[x].size() > 0)" + // +kubebuilder:validation:MinProperties=1 + // +kubebuilder:validation:MaxProperties=20 + // +optional + KubeReserved map[string]string `json:"kubeReserved,omitempty"` + // evictionHard is a map of signal names to quantities that defines hard eviction thresholds. + // +kubebuilder:validation:XValidation:message="valid keys for evictionHard are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']",rule="self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'])" + // +kubebuilder:validation:XValidation:message="evictionHard values must not be empty",rule="self.all(x, self[x].size() > 0)" + // +kubebuilder:validation:MinProperties=1 + // +kubebuilder:validation:MaxProperties=20 + // +optional + EvictionHard map[string]EvictionThreshold `json:"evictionHard,omitempty"` + // evictionSoft is a map of signal names to quantities that defines soft eviction thresholds. + // +kubebuilder:validation:XValidation:message="valid keys for evictionSoft are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']",rule="self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'])" + // +kubebuilder:validation:XValidation:message="evictionSoft values must not be empty",rule="self.all(x, self[x].size() > 0)" + // +kubebuilder:validation:MinProperties=1 + // +kubebuilder:validation:MaxProperties=20 + // +optional + EvictionSoft map[string]EvictionThreshold `json:"evictionSoft,omitempty"` + // evictionSoftGracePeriod is a map of signal names to quantities that defines grace periods + // for each soft eviction signal. + // +kubebuilder:validation:XValidation:message="valid keys for evictionSoftGracePeriod are ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available']",rule="self.all(x, x in ['memory.available','nodefs.available','nodefs.inodesFree','imagefs.available','imagefs.inodesFree','pid.available'])" + // +kubebuilder:validation:MinProperties=1 + // +kubebuilder:validation:MaxProperties=20 + // +optional + EvictionSoftGracePeriod map[string]string `json:"evictionSoftGracePeriod,omitempty"` + // evictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use + // when terminating pods in response to soft eviction thresholds. + // +optional + EvictionMaxPodGracePeriod *int32 `json:"evictionMaxPodGracePeriod,omitempty"` + // imageGCHighThresholdPercent is the percent of disk usage which triggers image garbage collection. + // The value must be between 0 and 100, inclusive, and must be greater than imageGCLowThresholdPercent when both are set. + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=100 + // +optional + ImageGCHighThresholdPercent *int32 `json:"imageGCHighThresholdPercent,omitempty"` + // imageGCLowThresholdPercent is the percent of disk usage to which image garbage collection attempts to free. + // The value must be between 0 and 100, inclusive, and must be less than imageGCHighThresholdPercent when both are set. + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=100 + // +optional + ImageGCLowThresholdPercent *int32 `json:"imageGCLowThresholdPercent,omitempty"` + // cpuCFSQuota enables CPU CFS quota enforcement for containers that specify CPU limits. + // +optional + CPUCFSQuota *bool `json:"cpuCFSQuota,omitempty"` + + // Overflow holds additional kubelet configuration fields not explicitly defined above. + // These fields are preserved during serialization and deserialization, allowing arbitrary + // kubelet configuration to pass through to the node's ignition/MachineConfig. + Overflow runtime.RawExtension `json:"-"` +} + +// UnmarshalJSON implements custom JSON unmarshaling for KubeletConfiguration. +// It deserializes known fields into the struct and captures all additional fields +// into the overflow map for pass-through. +func (k *KubeletConfiguration) UnmarshalJSON(data []byte) error { + // Zero the receiver so that fields absent from the new input + // (including Overflow, which is json:"-") do not survive from a + // previous decode. + *k = KubeletConfiguration{} + + // Unmarshal known fields via alias to avoid infinite recursion + type Alias KubeletConfiguration + aux := &struct{ *Alias }{Alias: (*Alias)(k)} + if err := json.Unmarshal(data, aux); err != nil { + return err + } + + // Unmarshal everything into a raw map + var raw map[string]json.RawMessage + if err := json.Unmarshal(data, &raw); err != nil { + return err + } + + // Separate unknown fields into overflow + for key := range kubeletConfigKnownFields { + delete(raw, key) + } + if len(raw) > 0 { + overflowBytes, err := json.Marshal(raw) + if err != nil { + return err + } + k.Overflow = runtime.RawExtension{Raw: overflowBytes} + } + return nil +} + +// MarshalJSON implements custom JSON marshaling for KubeletConfiguration. +// It serializes the known typed fields and merges any overflow fields back in. +func (k KubeletConfiguration) MarshalJSON() ([]byte, error) { + // Marshal known fields via alias to avoid infinite recursion + type Alias KubeletConfiguration + data, err := json.Marshal((*Alias)(&k)) + if err != nil { + return nil, err + } + + if len(k.Overflow.Raw) == 0 { + return data, nil + } + + // Merge overflow fields into the output; structured fields win on conflict. + var overflowMap map[string]json.RawMessage + if err := json.Unmarshal(k.Overflow.Raw, &overflowMap); err != nil { + return nil, err + } + var structured map[string]json.RawMessage + if err := json.Unmarshal(data, &structured); err != nil { + return nil, err + } + for key, val := range structured { + overflowMap[key] = val + } + return json.Marshal(overflowMap) +} + +// HasTypedFields reports whether any explicitly defined struct fields are set. +// This is used by IsZero, but is separate so we can differentiate the zero case +// from "only overflow fields set". This must be kept in sync with the typed +// fields in KubeletConfiguration. +func (k KubeletConfiguration) HasTypedFields() bool { + return k.MaxPods != 0 || + k.PodsPerCore != 0 || + k.SystemReserved != nil || + k.KubeReserved != nil || + k.EvictionHard != nil || + k.EvictionSoft != nil || + k.EvictionSoftGracePeriod != nil || + k.EvictionMaxPodGracePeriod != nil || + k.ImageGCHighThresholdPercent != nil || + k.ImageGCLowThresholdPercent != nil || + k.CPUCFSQuota != nil +} + +// IsZero reports whether the KubeletConfiguration is empty (no typed fields set and +// no overflow fields). This is used by the omitzero JSON tag to determine whether the +// field should be omitted during serialization. +func (k KubeletConfiguration) IsZero() bool { + return !k.HasTypedFields() && len(k.Overflow.Raw) == 0 +} diff --git a/vendor/github.com/openshift/hypershift/api/karpenter/v1/zz_generated.deepcopy.go b/vendor/github.com/openshift/hypershift/api/karpenter/v1/zz_generated.deepcopy.go index 8c1af4f6438..75ff805c942 100644 --- a/vendor/github.com/openshift/hypershift/api/karpenter/v1/zz_generated.deepcopy.go +++ b/vendor/github.com/openshift/hypershift/api/karpenter/v1/zz_generated.deepcopy.go @@ -104,6 +104,77 @@ func (in *CapacityReservationSelectorTerm) DeepCopy() *CapacityReservationSelect return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { + *out = *in + if in.SystemReserved != nil { + in, out := &in.SystemReserved, &out.SystemReserved + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.KubeReserved != nil { + in, out := &in.KubeReserved, &out.KubeReserved + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.EvictionHard != nil { + in, out := &in.EvictionHard, &out.EvictionHard + *out = make(map[string]EvictionThreshold, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.EvictionSoft != nil { + in, out := &in.EvictionSoft, &out.EvictionSoft + *out = make(map[string]EvictionThreshold, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.EvictionSoftGracePeriod != nil { + in, out := &in.EvictionSoftGracePeriod, &out.EvictionSoftGracePeriod + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.EvictionMaxPodGracePeriod != nil { + in, out := &in.EvictionMaxPodGracePeriod, &out.EvictionMaxPodGracePeriod + *out = new(int32) + **out = **in + } + if in.ImageGCHighThresholdPercent != nil { + in, out := &in.ImageGCHighThresholdPercent, &out.ImageGCHighThresholdPercent + *out = new(int32) + **out = **in + } + if in.ImageGCLowThresholdPercent != nil { + in, out := &in.ImageGCLowThresholdPercent, &out.ImageGCLowThresholdPercent + *out = new(int32) + **out = **in + } + if in.CPUCFSQuota != nil { + in, out := &in.CPUCFSQuota, &out.CPUCFSQuota + *out = new(bool) + **out = **in + } + in.Overflow.DeepCopyInto(&out.Overflow) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeletConfiguration. +func (in *KubeletConfiguration) DeepCopy() *KubeletConfiguration { + if in == nil { + return nil + } + out := new(KubeletConfiguration) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MetadataOptions) DeepCopyInto(out *MetadataOptions) { *out = *in @@ -217,6 +288,7 @@ func (in *OpenshiftEC2NodeClassSpec) DeepCopyInto(out *OpenshiftEC2NodeClassSpec } } out.MetadataOptions = in.MetadataOptions + in.Kubelet.DeepCopyInto(&out.Kubelet) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OpenshiftEC2NodeClassSpec.